MFEM  v4.3.0
Finite element discretization library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
bilininteg_hcurl.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2021, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "../general/forall.hpp"
13 #include "bilininteg.hpp"
14 #include "gridfunc.hpp"
15 
16 using namespace std;
17 
18 namespace mfem
19 {
20 
21 void PAHcurlMassApply2D(const int D1D,
22  const int Q1D,
23  const int NE,
24  const bool symmetric,
25  const Array<double> &bo,
26  const Array<double> &bc,
27  const Array<double> &bot,
28  const Array<double> &bct,
29  const Vector &pa_data,
30  const Vector &x,
31  Vector &y)
32 {
33  constexpr static int VDIM = 2;
34  constexpr static int MAX_D1D = HCURL_MAX_D1D;
35  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
36 
37  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
38  auto Bc = Reshape(bc.Read(), Q1D, D1D);
39  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
40  auto Bct = Reshape(bct.Read(), D1D, Q1D);
41  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
42  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
43  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
44 
45  MFEM_FORALL(e, NE,
46  {
47  double mass[MAX_Q1D][MAX_Q1D][VDIM];
48 
49  for (int qy = 0; qy < Q1D; ++qy)
50  {
51  for (int qx = 0; qx < Q1D; ++qx)
52  {
53  for (int c = 0; c < VDIM; ++c)
54  {
55  mass[qy][qx][c] = 0.0;
56  }
57  }
58  }
59 
60  int osc = 0;
61 
62  for (int c = 0; c < VDIM; ++c) // loop over x, y components
63  {
64  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
65  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
66 
67  for (int dy = 0; dy < D1Dy; ++dy)
68  {
69  double massX[MAX_Q1D];
70  for (int qx = 0; qx < Q1D; ++qx)
71  {
72  massX[qx] = 0.0;
73  }
74 
75  for (int dx = 0; dx < D1Dx; ++dx)
76  {
77  const double t = X(dx + (dy * D1Dx) + osc, e);
78  for (int qx = 0; qx < Q1D; ++qx)
79  {
80  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
81  }
82  }
83 
84  for (int qy = 0; qy < Q1D; ++qy)
85  {
86  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
87  for (int qx = 0; qx < Q1D; ++qx)
88  {
89  mass[qy][qx][c] += massX[qx] * wy;
90  }
91  }
92  }
93 
94  osc += D1Dx * D1Dy;
95  } // loop (c) over components
96 
97  // Apply D operator.
98  for (int qy = 0; qy < Q1D; ++qy)
99  {
100  for (int qx = 0; qx < Q1D; ++qx)
101  {
102  const double O11 = op(qx,qy,0,e);
103  const double O21 = op(qx,qy,1,e);
104  const double O12 = symmetric ? O21 : op(qx,qy,2,e);
105  const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e);
106  const double massX = mass[qy][qx][0];
107  const double massY = mass[qy][qx][1];
108  mass[qy][qx][0] = (O11*massX)+(O12*massY);
109  mass[qy][qx][1] = (O21*massX)+(O22*massY);
110  }
111  }
112 
113  for (int qy = 0; qy < Q1D; ++qy)
114  {
115  osc = 0;
116 
117  for (int c = 0; c < VDIM; ++c) // loop over x, y components
118  {
119  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
120  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
121 
122  double massX[MAX_D1D];
123  for (int dx = 0; dx < D1Dx; ++dx)
124  {
125  massX[dx] = 0.0;
126  }
127  for (int qx = 0; qx < Q1D; ++qx)
128  {
129  for (int dx = 0; dx < D1Dx; ++dx)
130  {
131  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
132  }
133  }
134 
135  for (int dy = 0; dy < D1Dy; ++dy)
136  {
137  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
138 
139  for (int dx = 0; dx < D1Dx; ++dx)
140  {
141  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
142  }
143  }
144 
145  osc += D1Dx * D1Dy;
146  } // loop c
147  } // loop qy
148  }); // end of element loop
149 }
150 
151 void PAHcurlMassAssembleDiagonal2D(const int D1D,
152  const int Q1D,
153  const int NE,
154  const bool symmetric,
155  const Array<double> &bo,
156  const Array<double> &bc,
157  const Vector &pa_data,
158  Vector &diag)
159 {
160  constexpr static int VDIM = 2;
161  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
162 
163  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
164  auto Bc = Reshape(bc.Read(), Q1D, D1D);
165  auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE);
166  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
167 
168  MFEM_FORALL(e, NE,
169  {
170  int osc = 0;
171 
172  for (int c = 0; c < VDIM; ++c) // loop over x, y components
173  {
174  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
175  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
176 
177  double mass[MAX_Q1D];
178 
179  for (int dy = 0; dy < D1Dy; ++dy)
180  {
181  for (int qx = 0; qx < Q1D; ++qx)
182  {
183  mass[qx] = 0.0;
184  for (int qy = 0; qy < Q1D; ++qy)
185  {
186  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
187 
188  mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) :
189  op(qx,qy,symmetric ? 2 : 3, e));
190  }
191  }
192 
193  for (int dx = 0; dx < D1Dx; ++dx)
194  {
195  for (int qx = 0; qx < Q1D; ++qx)
196  {
197  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
198  D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx;
199  }
200  }
201  }
202 
203  osc += D1Dx * D1Dy;
204  } // loop c
205  }); // end of element loop
206 }
207 
208 void PAHcurlMassAssembleDiagonal3D(const int D1D,
209  const int Q1D,
210  const int NE,
211  const bool symmetric,
212  const Array<double> &bo,
213  const Array<double> &bc,
214  const Vector &pa_data,
215  Vector &diag)
216 {
217  constexpr static int MAX_D1D = HCURL_MAX_D1D;
218  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
219 
220  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
221  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
222  constexpr static int VDIM = 3;
223 
224  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
225  auto Bc = Reshape(bc.Read(), Q1D, D1D);
226  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
227  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
228 
229  MFEM_FORALL(e, NE,
230  {
231  int osc = 0;
232 
233  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
234  {
235  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
236  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
237  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
238 
239  const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) :
240  (symmetric ? 5 : 8));
241 
242  double mass[MAX_Q1D];
243 
244  for (int dz = 0; dz < D1Dz; ++dz)
245  {
246  for (int dy = 0; dy < D1Dy; ++dy)
247  {
248  for (int qx = 0; qx < Q1D; ++qx)
249  {
250  mass[qx] = 0.0;
251  for (int qy = 0; qy < Q1D; ++qy)
252  {
253  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
254 
255  for (int qz = 0; qz < Q1D; ++qz)
256  {
257  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
258 
259  mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e);
260  }
261  }
262  }
263 
264  for (int dx = 0; dx < D1Dx; ++dx)
265  {
266  for (int qx = 0; qx < Q1D; ++qx)
267  {
268  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
269  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx;
270  }
271  }
272  }
273  }
274 
275  osc += D1Dx * D1Dy * D1Dz;
276  } // loop c
277  }); // end of element loop
278 }
279 
280 template<int T_D1D, int T_Q1D>
281 void SmemPAHcurlMassAssembleDiagonal3D(const int D1D,
282  const int Q1D,
283  const int NE,
284  const bool symmetric,
285  const Array<double> &bo,
286  const Array<double> &bc,
287  const Vector &pa_data,
288  Vector &diag)
289 {
290  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
291  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
292 
293  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
294  auto Bc = Reshape(bc.Read(), Q1D, D1D);
295  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
296  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
297 
298  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
299  {
300  constexpr int VDIM = 3;
301  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
302  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
303 
304  MFEM_SHARED double sBo[tQ1D][tD1D];
305  MFEM_SHARED double sBc[tQ1D][tD1D];
306 
307  double op3[3];
308  MFEM_SHARED double sop[3][tQ1D][tQ1D];
309 
310  MFEM_FOREACH_THREAD(qx,x,Q1D)
311  {
312  MFEM_FOREACH_THREAD(qy,y,Q1D)
313  {
314  MFEM_FOREACH_THREAD(qz,z,Q1D)
315  {
316  op3[0] = op(qx,qy,qz,0,e);
317  op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e);
318  op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e);
319  }
320  }
321  }
322 
323  const int tidx = MFEM_THREAD_ID(x);
324  const int tidy = MFEM_THREAD_ID(y);
325  const int tidz = MFEM_THREAD_ID(z);
326 
327  if (tidz == 0)
328  {
329  MFEM_FOREACH_THREAD(d,y,D1D)
330  {
331  MFEM_FOREACH_THREAD(q,x,Q1D)
332  {
333  sBc[q][d] = Bc(q,d);
334  if (d < D1D-1)
335  {
336  sBo[q][d] = Bo(q,d);
337  }
338  }
339  }
340  }
341  MFEM_SYNC_THREAD;
342 
343  int osc = 0;
344  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
345  {
346  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
347  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
348  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
349 
350  double dxyz = 0.0;
351 
352  for (int qz=0; qz < Q1D; ++qz)
353  {
354  if (tidz == qz)
355  {
356  for (int i=0; i<3; ++i)
357  {
358  sop[i][tidx][tidy] = op3[i];
359  }
360  }
361 
362  MFEM_SYNC_THREAD;
363 
364  MFEM_FOREACH_THREAD(dz,z,D1Dz)
365  {
366  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
367 
368  MFEM_FOREACH_THREAD(dy,y,D1Dy)
369  {
370  MFEM_FOREACH_THREAD(dx,x,D1Dx)
371  {
372  for (int qy = 0; qy < Q1D; ++qy)
373  {
374  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
375 
376  for (int qx = 0; qx < Q1D; ++qx)
377  {
378  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
379  dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz;
380  }
381  }
382  }
383  }
384  }
385 
386  MFEM_SYNC_THREAD;
387  } // qz loop
388 
389  MFEM_FOREACH_THREAD(dz,z,D1Dz)
390  {
391  MFEM_FOREACH_THREAD(dy,y,D1Dy)
392  {
393  MFEM_FOREACH_THREAD(dx,x,D1Dx)
394  {
395  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
396  }
397  }
398  }
399 
400  osc += D1Dx * D1Dy * D1Dz;
401  } // c loop
402  }); // end of element loop
403 }
404 
405 void PAHcurlMassApply3D(const int D1D,
406  const int Q1D,
407  const int NE,
408  const bool symmetric,
409  const Array<double> &bo,
410  const Array<double> &bc,
411  const Array<double> &bot,
412  const Array<double> &bct,
413  const Vector &pa_data,
414  const Vector &x,
415  Vector &y)
416 {
417  constexpr static int MAX_D1D = HCURL_MAX_D1D;
418  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
419 
420  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
421  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
422  constexpr static int VDIM = 3;
423 
424  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
425  auto Bc = Reshape(bc.Read(), Q1D, D1D);
426  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
427  auto Bct = Reshape(bct.Read(), D1D, Q1D);
428  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
429  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
430  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
431 
432  MFEM_FORALL(e, NE,
433  {
434  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
435 
436  for (int qz = 0; qz < Q1D; ++qz)
437  {
438  for (int qy = 0; qy < Q1D; ++qy)
439  {
440  for (int qx = 0; qx < Q1D; ++qx)
441  {
442  for (int c = 0; c < VDIM; ++c)
443  {
444  mass[qz][qy][qx][c] = 0.0;
445  }
446  }
447  }
448  }
449 
450  int osc = 0;
451 
452  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
453  {
454  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
455  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
456  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
457 
458  for (int dz = 0; dz < D1Dz; ++dz)
459  {
460  double massXY[MAX_Q1D][MAX_Q1D];
461  for (int qy = 0; qy < Q1D; ++qy)
462  {
463  for (int qx = 0; qx < Q1D; ++qx)
464  {
465  massXY[qy][qx] = 0.0;
466  }
467  }
468 
469  for (int dy = 0; dy < D1Dy; ++dy)
470  {
471  double massX[MAX_Q1D];
472  for (int qx = 0; qx < Q1D; ++qx)
473  {
474  massX[qx] = 0.0;
475  }
476 
477  for (int dx = 0; dx < D1Dx; ++dx)
478  {
479  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
480  for (int qx = 0; qx < Q1D; ++qx)
481  {
482  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
483  }
484  }
485 
486  for (int qy = 0; qy < Q1D; ++qy)
487  {
488  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
489  for (int qx = 0; qx < Q1D; ++qx)
490  {
491  const double wx = massX[qx];
492  massXY[qy][qx] += wx * wy;
493  }
494  }
495  }
496 
497  for (int qz = 0; qz < Q1D; ++qz)
498  {
499  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
500  for (int qy = 0; qy < Q1D; ++qy)
501  {
502  for (int qx = 0; qx < Q1D; ++qx)
503  {
504  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
505  }
506  }
507  }
508  }
509 
510  osc += D1Dx * D1Dy * D1Dz;
511  } // loop (c) over components
512 
513  // Apply D operator.
514  for (int qz = 0; qz < Q1D; ++qz)
515  {
516  for (int qy = 0; qy < Q1D; ++qy)
517  {
518  for (int qx = 0; qx < Q1D; ++qx)
519  {
520  const double O11 = op(qx,qy,qz,0,e);
521  const double O12 = op(qx,qy,qz,1,e);
522  const double O13 = op(qx,qy,qz,2,e);
523  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
524  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
525  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
526  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
527  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
528  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
529  const double massX = mass[qz][qy][qx][0];
530  const double massY = mass[qz][qy][qx][1];
531  const double massZ = mass[qz][qy][qx][2];
532  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
533  mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
534  mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
535  }
536  }
537  }
538 
539  for (int qz = 0; qz < Q1D; ++qz)
540  {
541  double massXY[MAX_D1D][MAX_D1D];
542 
543  osc = 0;
544 
545  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
546  {
547  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
548  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
549  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
550 
551  for (int dy = 0; dy < D1Dy; ++dy)
552  {
553  for (int dx = 0; dx < D1Dx; ++dx)
554  {
555  massXY[dy][dx] = 0.0;
556  }
557  }
558  for (int qy = 0; qy < Q1D; ++qy)
559  {
560  double massX[MAX_D1D];
561  for (int dx = 0; dx < D1Dx; ++dx)
562  {
563  massX[dx] = 0;
564  }
565  for (int qx = 0; qx < Q1D; ++qx)
566  {
567  for (int dx = 0; dx < D1Dx; ++dx)
568  {
569  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
570  }
571  }
572  for (int dy = 0; dy < D1Dy; ++dy)
573  {
574  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
575  for (int dx = 0; dx < D1Dx; ++dx)
576  {
577  massXY[dy][dx] += massX[dx] * wy;
578  }
579  }
580  }
581 
582  for (int dz = 0; dz < D1Dz; ++dz)
583  {
584  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
585  for (int dy = 0; dy < D1Dy; ++dy)
586  {
587  for (int dx = 0; dx < D1Dx; ++dx)
588  {
589  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
590  }
591  }
592  }
593 
594  osc += D1Dx * D1Dy * D1Dz;
595  } // loop c
596  } // loop qz
597  }); // end of element loop
598 }
599 
600 template<int T_D1D, int T_Q1D>
601 void SmemPAHcurlMassApply3D(const int D1D,
602  const int Q1D,
603  const int NE,
604  const bool symmetric,
605  const Array<double> &bo,
606  const Array<double> &bc,
607  const Array<double> &bot,
608  const Array<double> &bct,
609  const Vector &pa_data,
610  const Vector &x,
611  Vector &y)
612 {
613  MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D");
614  MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D");
615 
616  const int dataSize = symmetric ? 6 : 9;
617 
618  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
619  auto Bc = Reshape(bc.Read(), Q1D, D1D);
620  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE);
621  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
622  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
623 
624  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
625  {
626  constexpr int VDIM = 3;
627  constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D;
628  constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D;
629 
630  MFEM_SHARED double sBo[tQ1D][tD1D];
631  MFEM_SHARED double sBc[tQ1D][tD1D];
632 
633  double op9[9];
634  MFEM_SHARED double sop[9*tQ1D*tQ1D];
635  MFEM_SHARED double mass[tQ1D][tQ1D][3];
636 
637  MFEM_SHARED double sX[tD1D][tD1D][tD1D];
638 
639  MFEM_FOREACH_THREAD(qx,x,Q1D)
640  {
641  MFEM_FOREACH_THREAD(qy,y,Q1D)
642  {
643  MFEM_FOREACH_THREAD(qz,z,Q1D)
644  {
645  for (int i=0; i<dataSize; ++i)
646  {
647  op9[i] = op(qx,qy,qz,i,e);
648  }
649  }
650  }
651  }
652 
653  const int tidx = MFEM_THREAD_ID(x);
654  const int tidy = MFEM_THREAD_ID(y);
655  const int tidz = MFEM_THREAD_ID(z);
656 
657  if (tidz == 0)
658  {
659  MFEM_FOREACH_THREAD(d,y,D1D)
660  {
661  MFEM_FOREACH_THREAD(q,x,Q1D)
662  {
663  sBc[q][d] = Bc(q,d);
664  if (d < D1D-1)
665  {
666  sBo[q][d] = Bo(q,d);
667  }
668  }
669  }
670  }
671  MFEM_SYNC_THREAD;
672 
673  for (int qz=0; qz < Q1D; ++qz)
674  {
675  int osc = 0;
676  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
677  {
678  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
679  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
680  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
681 
682  MFEM_FOREACH_THREAD(dz,z,D1Dz)
683  {
684  MFEM_FOREACH_THREAD(dy,y,D1Dy)
685  {
686  MFEM_FOREACH_THREAD(dx,x,D1Dx)
687  {
688  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
689  }
690  }
691  }
692  MFEM_SYNC_THREAD;
693 
694  if (tidz == qz)
695  {
696  for (int i=0; i<dataSize; ++i)
697  {
698  sop[i + (dataSize*tidx) + (dataSize*Q1D*tidy)] = op9[i];
699  }
700 
701  MFEM_FOREACH_THREAD(qy,y,Q1D)
702  {
703  MFEM_FOREACH_THREAD(qx,x,Q1D)
704  {
705  double u = 0.0;
706 
707  for (int dz = 0; dz < D1Dz; ++dz)
708  {
709  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
710  for (int dy = 0; dy < D1Dy; ++dy)
711  {
712  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
713  for (int dx = 0; dx < D1Dx; ++dx)
714  {
715  const double t = sX[dz][dy][dx];
716  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
717  u += t * wx * wy * wz;
718  }
719  }
720  }
721 
722  mass[qy][qx][c] = u;
723  } // qx
724  } // qy
725  } // tidz == qz
726 
727  osc += D1Dx * D1Dy * D1Dz;
728  MFEM_SYNC_THREAD;
729  } // c
730 
731  MFEM_SYNC_THREAD; // Sync mass[qy][qx][d] and sop
732 
733  osc = 0;
734  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
735  {
736  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
737  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
738  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
739 
740  double dxyz = 0.0;
741 
742  MFEM_FOREACH_THREAD(dz,z,D1Dz)
743  {
744  const double wz = (c == 2) ? sBo[qz][dz] : sBc[qz][dz];
745 
746  MFEM_FOREACH_THREAD(dy,y,D1Dy)
747  {
748  MFEM_FOREACH_THREAD(dx,x,D1Dx)
749  {
750  for (int qy = 0; qy < Q1D; ++qy)
751  {
752  const double wy = (c == 1) ? sBo[qy][dy] : sBc[qy][dy];
753  for (int qx = 0; qx < Q1D; ++qx)
754  {
755  const int os = (dataSize*qx) + (dataSize*Q1D*qy);
756  const int id1 = os + ((c == 0) ? 0 : ((c == 1) ? (symmetric ? 1 : 3) :
757  (symmetric ? 2 : 6))); // O11, O21, O31
758  const int id2 = os + ((c == 0) ? 1 : ((c == 1) ? (symmetric ? 3 : 4) :
759  (symmetric ? 4 : 7))); // O12, O22, O32
760  const int id3 = os + ((c == 0) ? 2 : ((c == 1) ? (symmetric ? 4 : 5) :
761  (symmetric ? 5 : 8))); // O13, O23, O33
762 
763  const double m_c = (sop[id1] * mass[qy][qx][0]) + (sop[id2] * mass[qy][qx][1]) +
764  (sop[id3] * mass[qy][qx][2]);
765 
766  const double wx = (c == 0) ? sBo[qx][dx] : sBc[qx][dx];
767  dxyz += m_c * wx * wy * wz;
768  }
769  }
770  }
771  }
772  }
773 
774  MFEM_SYNC_THREAD;
775 
776  MFEM_FOREACH_THREAD(dz,z,D1Dz)
777  {
778  MFEM_FOREACH_THREAD(dy,y,D1Dy)
779  {
780  MFEM_FOREACH_THREAD(dx,x,D1Dx)
781  {
782  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
783  }
784  }
785  }
786 
787  osc += D1Dx * D1Dy * D1Dz;
788  } // c loop
789  } // qz
790  }); // end of element loop
791 }
792 
793 // PA H(curl) curl-curl assemble 2D kernel
794 static void PACurlCurlSetup2D(const int Q1D,
795  const int NE,
796  const Array<double> &w,
797  const Vector &j,
798  Vector &coeff,
799  Vector &op)
800 {
801  const int NQ = Q1D*Q1D;
802  auto W = w.Read();
803  auto J = Reshape(j.Read(), NQ, 2, 2, NE);
804  auto C = Reshape(coeff.Read(), NQ, NE);
805  auto y = Reshape(op.Write(), NQ, NE);
806  MFEM_FORALL(e, NE,
807  {
808  for (int q = 0; q < NQ; ++q)
809  {
810  const double J11 = J(q,0,0,e);
811  const double J21 = J(q,1,0,e);
812  const double J12 = J(q,0,1,e);
813  const double J22 = J(q,1,1,e);
814  const double detJ = (J11*J22)-(J21*J12);
815  y(q,e) = W[q] * C(q,e) / detJ;
816  }
817  });
818 }
819 
820 // PA H(curl) curl-curl assemble 3D kernel
821 static void PACurlCurlSetup3D(const int Q1D,
822  const int coeffDim,
823  const int NE,
824  const Array<double> &w,
825  const Vector &j,
826  Vector &coeff,
827  Vector &op)
828 {
829  const int NQ = Q1D*Q1D*Q1D;
830  const bool symmetric = (coeffDim != 9);
831  auto W = w.Read();
832  auto J = Reshape(j.Read(), NQ, 3, 3, NE);
833  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
834  auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE);
835 
836  MFEM_FORALL(e, NE,
837  {
838  for (int q = 0; q < NQ; ++q)
839  {
840  const double J11 = J(q,0,0,e);
841  const double J21 = J(q,1,0,e);
842  const double J31 = J(q,2,0,e);
843  const double J12 = J(q,0,1,e);
844  const double J22 = J(q,1,1,e);
845  const double J32 = J(q,2,1,e);
846  const double J13 = J(q,0,2,e);
847  const double J23 = J(q,1,2,e);
848  const double J33 = J(q,2,2,e);
849  const double detJ = J11 * (J22 * J33 - J32 * J23) -
850  /* */ J21 * (J12 * J33 - J32 * J13) +
851  /* */ J31 * (J12 * J23 - J22 * J13);
852 
853  const double c_detJ = W[q] / detJ;
854 
855  if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
856  {
857  // Set y to the 6 or 9 entries of J^T M J / det
858  const double M11 = C(0, q, e);
859  const double M12 = C(1, q, e);
860  const double M13 = C(2, q, e);
861  const double M21 = (!symmetric) ? C(3, q, e) : M12;
862  const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e);
863  const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e);
864  const double M31 = (!symmetric) ? C(6, q, e) : M13;
865  const double M32 = (!symmetric) ? C(7, q, e) : M23;
866  const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e);
867 
868  // First compute R = MJ
869  const double R11 = M11*J11 + M12*J21 + M13*J31;
870  const double R12 = M11*J12 + M12*J22 + M13*J32;
871  const double R13 = M11*J13 + M12*J23 + M13*J33;
872  const double R21 = M21*J11 + M22*J21 + M23*J31;
873  const double R22 = M21*J12 + M22*J22 + M23*J32;
874  const double R23 = M21*J13 + M22*J23 + M23*J33;
875  const double R31 = M31*J11 + M32*J21 + M33*J31;
876  const double R32 = M31*J12 + M32*J22 + M33*J32;
877  const double R33 = M31*J13 + M32*J23 + M33*J33;
878 
879  // Now set y to J^T R / det
880  y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1
881  const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32);
882  y(q,1,e) = Y12; // 1,2
883  y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3
884 
885  const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31);
886  const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32);
887  const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33);
888 
889  const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33);
890 
891  y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1
892  y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2
893  y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3
894 
895  if (!symmetric)
896  {
897  y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1
898  y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2
899  y(q,8,e) = Y33; // 3,3
900  }
901  }
902  else // Vector or scalar coefficient version
903  {
904  // Set y to the 6 entries of J^T D J / det^2
905  const double D1 = C(0, q, e);
906  const double D2 = coeffDim == 3 ? C(1, q, e) : D1;
907  const double D3 = coeffDim == 3 ? C(2, q, e) : D1;
908 
909  y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1
910  y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2
911  y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3
912  y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2
913  y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3
914  y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3
915  }
916  }
917  });
918 }
919 
920 void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes)
921 {
922  // Assumes tensor-product elements
923  Mesh *mesh = fes.GetMesh();
924  const FiniteElement *fel = fes.GetFE(0);
925 
926  const VectorTensorFiniteElement *el =
927  dynamic_cast<const VectorTensorFiniteElement*>(fel);
928  MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!");
929 
930  const IntegrationRule *ir
931  = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el,
932  *mesh->GetElementTransformation(0));
933 
934  const int dims = el->GetDim();
935  MFEM_VERIFY(dims == 2 || dims == 3, "");
936 
937  const int nq = ir->GetNPoints();
938  dim = mesh->Dimension();
939  MFEM_VERIFY(dim == 2 || dim == 3, "");
940 
941  const int dimc = (dim == 3) ? 3 : 1;
942 
943  ne = fes.GetNE();
944  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
945  mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR);
946  mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
947  dofs1D = mapsC->ndof;
948  quad1D = mapsC->nqpt;
949 
950  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
951 
952  const int MQsymmDim = SMQ ? (SMQ->GetSize() * (SMQ->GetSize() + 1)) / 2 : 0;
953  const int MQfullDim = MQ ? (MQ->GetHeight() * MQ->GetWidth()) : 0;
954  const int MQdim = MQ ? MQfullDim : MQsymmDim;
955  const int coeffDim = (MQ || SMQ) ? MQdim : (DQ ? DQ->GetVDim() : 1);
956 
957  symmetric = (MQ == NULL);
958 
959  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
960  const int ndata = (dim == 2) ? 1 : (symmetric ? symmDims : MQfullDim);
961  pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType());
962 
963  Vector coeff(coeffDim * ne * nq);
964  coeff = 1.0;
965  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
966  if (Q || DQ || MQ || SMQ)
967  {
968  Vector D(DQ ? coeffDim : 0);
969  DenseMatrix M;
970  DenseSymmetricMatrix SM;
971 
972  if (DQ)
973  {
974  MFEM_VERIFY(coeffDim == dimc, "");
975  }
976  if (MQ)
977  {
978  M.SetSize(dimc);
979  MFEM_VERIFY(coeffDim == MQdim, "");
980  MFEM_VERIFY(MQ->GetHeight() == dimc && MQ->GetWidth() == dimc, "");
981  }
982  if (SMQ)
983  {
984  SM.SetSize(dimc);
985  MFEM_VERIFY(SMQ->GetSize() == dimc, "");
986  }
987 
988  for (int e=0; e<ne; ++e)
989  {
990  ElementTransformation *tr = mesh->GetElementTransformation(e);
991  for (int p=0; p<nq; ++p)
992  {
993  if (MQ)
994  {
995  MQ->Eval(M, *tr, ir->IntPoint(p));
996 
997  for (int i=0; i<dimc; ++i)
998  for (int j=0; j<dimc; ++j)
999  {
1000  coeffh(j+(i*dimc), p, e) = M(i,j);
1001  }
1002 
1003  }
1004  else if (SMQ)
1005  {
1006  SMQ->Eval(SM, *tr, ir->IntPoint(p));
1007 
1008  int cnt = 0;
1009  for (int i=0; i<dimc; ++i)
1010  for (int j=i; j<dimc; ++j, ++cnt)
1011  {
1012  coeffh(cnt, p, e) = SM(i,j);
1013  }
1014 
1015  }
1016  else if (DQ)
1017  {
1018  DQ->Eval(D, *tr, ir->IntPoint(p));
1019  for (int i=0; i<coeffDim; ++i)
1020  {
1021  coeffh(i, p, e) = D[i];
1022  }
1023  }
1024  else
1025  {
1026  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
1027  }
1028  }
1029  }
1030  }
1031 
1032  if (el->GetDerivType() != mfem::FiniteElement::CURL)
1033  {
1034  MFEM_ABORT("Unknown kernel.");
1035  }
1036 
1037  if (dim == 3)
1038  {
1039  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
1040  pa_data);
1041  }
1042  else
1043  {
1044  PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data);
1045  }
1046 }
1047 
1048 static void PACurlCurlApply2D(const int D1D,
1049  const int Q1D,
1050  const int NE,
1051  const Array<double> &bo,
1052  const Array<double> &bot,
1053  const Array<double> &gc,
1054  const Array<double> &gct,
1055  const Vector &pa_data,
1056  const Vector &x,
1057  Vector &y)
1058 {
1059  constexpr static int VDIM = 2;
1060  constexpr static int MAX_D1D = HCURL_MAX_D1D;
1061  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
1062 
1063  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1064  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1065  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1066  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1067  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
1068  auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE);
1069  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
1070 
1071  MFEM_FORALL(e, NE,
1072  {
1073  double curl[MAX_Q1D][MAX_Q1D];
1074 
1075  // curl[qy][qx] will be computed as du_y/dx - du_x/dy
1076 
1077  for (int qy = 0; qy < Q1D; ++qy)
1078  {
1079  for (int qx = 0; qx < Q1D; ++qx)
1080  {
1081  curl[qy][qx] = 0.0;
1082  }
1083  }
1084 
1085  int osc = 0;
1086 
1087  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1088  {
1089  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1090  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1091 
1092  for (int dy = 0; dy < D1Dy; ++dy)
1093  {
1094  double gradX[MAX_Q1D];
1095  for (int qx = 0; qx < Q1D; ++qx)
1096  {
1097  gradX[qx] = 0;
1098  }
1099 
1100  for (int dx = 0; dx < D1Dx; ++dx)
1101  {
1102  const double t = X(dx + (dy * D1Dx) + osc, e);
1103  for (int qx = 0; qx < Q1D; ++qx)
1104  {
1105  gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
1106  }
1107  }
1108 
1109  for (int qy = 0; qy < Q1D; ++qy)
1110  {
1111  const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy);
1112  for (int qx = 0; qx < Q1D; ++qx)
1113  {
1114  curl[qy][qx] += gradX[qx] * wy;
1115  }
1116  }
1117  }
1118 
1119  osc += D1Dx * D1Dy;
1120  } // loop (c) over components
1121 
1122  // Apply D operator.
1123  for (int qy = 0; qy < Q1D; ++qy)
1124  {
1125  for (int qx = 0; qx < Q1D; ++qx)
1126  {
1127  curl[qy][qx] *= op(qx,qy,e);
1128  }
1129  }
1130 
1131  for (int qy = 0; qy < Q1D; ++qy)
1132  {
1133  osc = 0;
1134 
1135  for (int c = 0; c < VDIM; ++c) // loop over x, y components
1136  {
1137  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1138  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1139 
1140  double gradX[MAX_D1D];
1141  for (int dx = 0; dx < D1Dx; ++dx)
1142  {
1143  gradX[dx] = 0.0;
1144  }
1145  for (int qx = 0; qx < Q1D; ++qx)
1146  {
1147  for (int dx = 0; dx < D1Dx; ++dx)
1148  {
1149  gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx));
1150  }
1151  }
1152  for (int dy = 0; dy < D1Dy; ++dy)
1153  {
1154  const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy);
1155 
1156  for (int dx = 0; dx < D1Dx; ++dx)
1157  {
1158  Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy;
1159  }
1160  }
1161 
1162  osc += D1Dx * D1Dy;
1163  } // loop c
1164  } // loop qy
1165  }); // end of element loop
1166 }
1167 
1168 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1169 static void PACurlCurlApply3D(const int D1D,
1170  const int Q1D,
1171  const bool symmetric,
1172  const int NE,
1173  const Array<double> &bo,
1174  const Array<double> &bc,
1175  const Array<double> &bot,
1176  const Array<double> &bct,
1177  const Array<double> &gc,
1178  const Array<double> &gct,
1179  const Vector &pa_data,
1180  const Vector &x,
1181  Vector &y)
1182 {
1183  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1184  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1185  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1186  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1187  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1188  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1189  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1190 
1191  constexpr static int VDIM = 3;
1192 
1193  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1194  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1195  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
1196  auto Bct = Reshape(bct.Read(), D1D, Q1D);
1197  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1198  auto Gct = Reshape(gct.Read(), D1D, Q1D);
1199  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
1200  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1201  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1202 
1203  MFEM_FORALL(e, NE,
1204  {
1205  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
1206  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
1207 
1208  for (int qz = 0; qz < Q1D; ++qz)
1209  {
1210  for (int qy = 0; qy < Q1D; ++qy)
1211  {
1212  for (int qx = 0; qx < Q1D; ++qx)
1213  {
1214  for (int c = 0; c < VDIM; ++c)
1215  {
1216  curl[qz][qy][qx][c] = 0.0;
1217  }
1218  }
1219  }
1220  }
1221 
1222  // We treat x, y, z components separately for optimization specific to each.
1223 
1224  int osc = 0;
1225 
1226  {
1227  // x component
1228  const int D1Dz = D1D;
1229  const int D1Dy = D1D;
1230  const int D1Dx = D1D - 1;
1231 
1232  for (int dz = 0; dz < D1Dz; ++dz)
1233  {
1234  double gradXY[MAX_Q1D][MAX_Q1D][2];
1235  for (int qy = 0; qy < Q1D; ++qy)
1236  {
1237  for (int qx = 0; qx < Q1D; ++qx)
1238  {
1239  for (int d = 0; d < 2; ++d)
1240  {
1241  gradXY[qy][qx][d] = 0.0;
1242  }
1243  }
1244  }
1245 
1246  for (int dy = 0; dy < D1Dy; ++dy)
1247  {
1248  double massX[MAX_Q1D];
1249  for (int qx = 0; qx < Q1D; ++qx)
1250  {
1251  massX[qx] = 0.0;
1252  }
1253 
1254  for (int dx = 0; dx < D1Dx; ++dx)
1255  {
1256  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1257  for (int qx = 0; qx < Q1D; ++qx)
1258  {
1259  massX[qx] += t * Bo(qx,dx);
1260  }
1261  }
1262 
1263  for (int qy = 0; qy < Q1D; ++qy)
1264  {
1265  const double wy = Bc(qy,dy);
1266  const double wDy = Gc(qy,dy);
1267  for (int qx = 0; qx < Q1D; ++qx)
1268  {
1269  const double wx = massX[qx];
1270  gradXY[qy][qx][0] += wx * wDy;
1271  gradXY[qy][qx][1] += wx * wy;
1272  }
1273  }
1274  }
1275 
1276  for (int qz = 0; qz < Q1D; ++qz)
1277  {
1278  const double wz = Bc(qz,dz);
1279  const double wDz = Gc(qz,dz);
1280  for (int qy = 0; qy < Q1D; ++qy)
1281  {
1282  for (int qx = 0; qx < Q1D; ++qx)
1283  {
1284  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1285  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
1286  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
1287  }
1288  }
1289  }
1290  }
1291 
1292  osc += D1Dx * D1Dy * D1Dz;
1293  }
1294 
1295  {
1296  // y component
1297  const int D1Dz = D1D;
1298  const int D1Dy = D1D - 1;
1299  const int D1Dx = D1D;
1300 
1301  for (int dz = 0; dz < D1Dz; ++dz)
1302  {
1303  double gradXY[MAX_Q1D][MAX_Q1D][2];
1304  for (int qy = 0; qy < Q1D; ++qy)
1305  {
1306  for (int qx = 0; qx < Q1D; ++qx)
1307  {
1308  for (int d = 0; d < 2; ++d)
1309  {
1310  gradXY[qy][qx][d] = 0.0;
1311  }
1312  }
1313  }
1314 
1315  for (int dx = 0; dx < D1Dx; ++dx)
1316  {
1317  double massY[MAX_Q1D];
1318  for (int qy = 0; qy < Q1D; ++qy)
1319  {
1320  massY[qy] = 0.0;
1321  }
1322 
1323  for (int dy = 0; dy < D1Dy; ++dy)
1324  {
1325  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1326  for (int qy = 0; qy < Q1D; ++qy)
1327  {
1328  massY[qy] += t * Bo(qy,dy);
1329  }
1330  }
1331 
1332  for (int qx = 0; qx < Q1D; ++qx)
1333  {
1334  const double wx = Bc(qx,dx);
1335  const double wDx = Gc(qx,dx);
1336  for (int qy = 0; qy < Q1D; ++qy)
1337  {
1338  const double wy = massY[qy];
1339  gradXY[qy][qx][0] += wDx * wy;
1340  gradXY[qy][qx][1] += wx * wy;
1341  }
1342  }
1343  }
1344 
1345  for (int qz = 0; qz < Q1D; ++qz)
1346  {
1347  const double wz = Bc(qz,dz);
1348  const double wDz = Gc(qz,dz);
1349  for (int qy = 0; qy < Q1D; ++qy)
1350  {
1351  for (int qx = 0; qx < Q1D; ++qx)
1352  {
1353  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1354  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
1355  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
1356  }
1357  }
1358  }
1359  }
1360 
1361  osc += D1Dx * D1Dy * D1Dz;
1362  }
1363 
1364  {
1365  // z component
1366  const int D1Dz = D1D - 1;
1367  const int D1Dy = D1D;
1368  const int D1Dx = D1D;
1369 
1370  for (int dx = 0; dx < D1Dx; ++dx)
1371  {
1372  double gradYZ[MAX_Q1D][MAX_Q1D][2];
1373  for (int qz = 0; qz < Q1D; ++qz)
1374  {
1375  for (int qy = 0; qy < Q1D; ++qy)
1376  {
1377  for (int d = 0; d < 2; ++d)
1378  {
1379  gradYZ[qz][qy][d] = 0.0;
1380  }
1381  }
1382  }
1383 
1384  for (int dy = 0; dy < D1Dy; ++dy)
1385  {
1386  double massZ[MAX_Q1D];
1387  for (int qz = 0; qz < Q1D; ++qz)
1388  {
1389  massZ[qz] = 0.0;
1390  }
1391 
1392  for (int dz = 0; dz < D1Dz; ++dz)
1393  {
1394  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1395  for (int qz = 0; qz < Q1D; ++qz)
1396  {
1397  massZ[qz] += t * Bo(qz,dz);
1398  }
1399  }
1400 
1401  for (int qy = 0; qy < Q1D; ++qy)
1402  {
1403  const double wy = Bc(qy,dy);
1404  const double wDy = Gc(qy,dy);
1405  for (int qz = 0; qz < Q1D; ++qz)
1406  {
1407  const double wz = massZ[qz];
1408  gradYZ[qz][qy][0] += wz * wy;
1409  gradYZ[qz][qy][1] += wz * wDy;
1410  }
1411  }
1412  }
1413 
1414  for (int qx = 0; qx < Q1D; ++qx)
1415  {
1416  const double wx = Bc(qx,dx);
1417  const double wDx = Gc(qx,dx);
1418 
1419  for (int qy = 0; qy < Q1D; ++qy)
1420  {
1421  for (int qz = 0; qz < Q1D; ++qz)
1422  {
1423  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1424  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
1425  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
1426  }
1427  }
1428  }
1429  }
1430  }
1431 
1432  // Apply D operator.
1433  for (int qz = 0; qz < Q1D; ++qz)
1434  {
1435  for (int qy = 0; qy < Q1D; ++qy)
1436  {
1437  for (int qx = 0; qx < Q1D; ++qx)
1438  {
1439  const double O11 = op(qx,qy,qz,0,e);
1440  const double O12 = op(qx,qy,qz,1,e);
1441  const double O13 = op(qx,qy,qz,2,e);
1442  const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e);
1443  const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e);
1444  const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e);
1445  const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e);
1446  const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e);
1447  const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e);
1448 
1449  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
1450  (O13 * curl[qz][qy][qx][2]);
1451  const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
1452  (O23 * curl[qz][qy][qx][2]);
1453  const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) +
1454  (O33 * curl[qz][qy][qx][2]);
1455 
1456  curl[qz][qy][qx][0] = c1;
1457  curl[qz][qy][qx][1] = c2;
1458  curl[qz][qy][qx][2] = c3;
1459  }
1460  }
1461  }
1462 
1463  // x component
1464  osc = 0;
1465  {
1466  const int D1Dz = D1D;
1467  const int D1Dy = D1D;
1468  const int D1Dx = D1D - 1;
1469 
1470  for (int qz = 0; qz < Q1D; ++qz)
1471  {
1472  double gradXY12[MAX_D1D][MAX_D1D];
1473  double gradXY21[MAX_D1D][MAX_D1D];
1474 
1475  for (int dy = 0; dy < D1Dy; ++dy)
1476  {
1477  for (int dx = 0; dx < D1Dx; ++dx)
1478  {
1479  gradXY12[dy][dx] = 0.0;
1480  gradXY21[dy][dx] = 0.0;
1481  }
1482  }
1483  for (int qy = 0; qy < Q1D; ++qy)
1484  {
1485  double massX[MAX_D1D][2];
1486  for (int dx = 0; dx < D1Dx; ++dx)
1487  {
1488  for (int n = 0; n < 2; ++n)
1489  {
1490  massX[dx][n] = 0.0;
1491  }
1492  }
1493  for (int qx = 0; qx < Q1D; ++qx)
1494  {
1495  for (int dx = 0; dx < D1Dx; ++dx)
1496  {
1497  const double wx = Bot(dx,qx);
1498 
1499  massX[dx][0] += wx * curl[qz][qy][qx][1];
1500  massX[dx][1] += wx * curl[qz][qy][qx][2];
1501  }
1502  }
1503  for (int dy = 0; dy < D1Dy; ++dy)
1504  {
1505  const double wy = Bct(dy,qy);
1506  const double wDy = Gct(dy,qy);
1507 
1508  for (int dx = 0; dx < D1Dx; ++dx)
1509  {
1510  gradXY21[dy][dx] += massX[dx][0] * wy;
1511  gradXY12[dy][dx] += massX[dx][1] * wDy;
1512  }
1513  }
1514  }
1515 
1516  for (int dz = 0; dz < D1Dz; ++dz)
1517  {
1518  const double wz = Bct(dz,qz);
1519  const double wDz = Gct(dz,qz);
1520  for (int dy = 0; dy < D1Dy; ++dy)
1521  {
1522  for (int dx = 0; dx < D1Dx; ++dx)
1523  {
1524  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1525  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1526  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1527  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
1528  }
1529  }
1530  }
1531  } // loop qz
1532 
1533  osc += D1Dx * D1Dy * D1Dz;
1534  }
1535 
1536  // y component
1537  {
1538  const int D1Dz = D1D;
1539  const int D1Dy = D1D - 1;
1540  const int D1Dx = D1D;
1541 
1542  for (int qz = 0; qz < Q1D; ++qz)
1543  {
1544  double gradXY02[MAX_D1D][MAX_D1D];
1545  double gradXY20[MAX_D1D][MAX_D1D];
1546 
1547  for (int dy = 0; dy < D1Dy; ++dy)
1548  {
1549  for (int dx = 0; dx < D1Dx; ++dx)
1550  {
1551  gradXY02[dy][dx] = 0.0;
1552  gradXY20[dy][dx] = 0.0;
1553  }
1554  }
1555  for (int qx = 0; qx < Q1D; ++qx)
1556  {
1557  double massY[MAX_D1D][2];
1558  for (int dy = 0; dy < D1Dy; ++dy)
1559  {
1560  massY[dy][0] = 0.0;
1561  massY[dy][1] = 0.0;
1562  }
1563  for (int qy = 0; qy < Q1D; ++qy)
1564  {
1565  for (int dy = 0; dy < D1Dy; ++dy)
1566  {
1567  const double wy = Bot(dy,qy);
1568 
1569  massY[dy][0] += wy * curl[qz][qy][qx][2];
1570  massY[dy][1] += wy * curl[qz][qy][qx][0];
1571  }
1572  }
1573  for (int dx = 0; dx < D1Dx; ++dx)
1574  {
1575  const double wx = Bct(dx,qx);
1576  const double wDx = Gct(dx,qx);
1577 
1578  for (int dy = 0; dy < D1Dy; ++dy)
1579  {
1580  gradXY02[dy][dx] += massY[dy][0] * wDx;
1581  gradXY20[dy][dx] += massY[dy][1] * wx;
1582  }
1583  }
1584  }
1585 
1586  for (int dz = 0; dz < D1Dz; ++dz)
1587  {
1588  const double wz = Bct(dz,qz);
1589  const double wDz = Gct(dz,qz);
1590  for (int dy = 0; dy < D1Dy; ++dy)
1591  {
1592  for (int dx = 0; dx < D1Dx; ++dx)
1593  {
1594  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1595  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1596  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1597  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
1598  }
1599  }
1600  }
1601  } // loop qz
1602 
1603  osc += D1Dx * D1Dy * D1Dz;
1604  }
1605 
1606  // z component
1607  {
1608  const int D1Dz = D1D - 1;
1609  const int D1Dy = D1D;
1610  const int D1Dx = D1D;
1611 
1612  for (int qx = 0; qx < Q1D; ++qx)
1613  {
1614  double gradYZ01[MAX_D1D][MAX_D1D];
1615  double gradYZ10[MAX_D1D][MAX_D1D];
1616 
1617  for (int dy = 0; dy < D1Dy; ++dy)
1618  {
1619  for (int dz = 0; dz < D1Dz; ++dz)
1620  {
1621  gradYZ01[dz][dy] = 0.0;
1622  gradYZ10[dz][dy] = 0.0;
1623  }
1624  }
1625  for (int qy = 0; qy < Q1D; ++qy)
1626  {
1627  double massZ[MAX_D1D][2];
1628  for (int dz = 0; dz < D1Dz; ++dz)
1629  {
1630  for (int n = 0; n < 2; ++n)
1631  {
1632  massZ[dz][n] = 0.0;
1633  }
1634  }
1635  for (int qz = 0; qz < Q1D; ++qz)
1636  {
1637  for (int dz = 0; dz < D1Dz; ++dz)
1638  {
1639  const double wz = Bot(dz,qz);
1640 
1641  massZ[dz][0] += wz * curl[qz][qy][qx][0];
1642  massZ[dz][1] += wz * curl[qz][qy][qx][1];
1643  }
1644  }
1645  for (int dy = 0; dy < D1Dy; ++dy)
1646  {
1647  const double wy = Bct(dy,qy);
1648  const double wDy = Gct(dy,qy);
1649 
1650  for (int dz = 0; dz < D1Dz; ++dz)
1651  {
1652  gradYZ01[dz][dy] += wy * massZ[dz][1];
1653  gradYZ10[dz][dy] += wDy * massZ[dz][0];
1654  }
1655  }
1656  }
1657 
1658  for (int dx = 0; dx < D1Dx; ++dx)
1659  {
1660  const double wx = Bct(dx,qx);
1661  const double wDx = Gct(dx,qx);
1662 
1663  for (int dy = 0; dy < D1Dy; ++dy)
1664  {
1665  for (int dz = 0; dz < D1Dz; ++dz)
1666  {
1667  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1668  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1669  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
1670  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
1671  }
1672  }
1673  }
1674  } // loop qx
1675  }
1676  }); // end of element loop
1677 }
1678 
1679 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
1680 static void SmemPACurlCurlApply3D(const int D1D,
1681  const int Q1D,
1682  const bool symmetric,
1683  const int NE,
1684  const Array<double> &bo,
1685  const Array<double> &bc,
1686  const Array<double> &bot,
1687  const Array<double> &bct,
1688  const Array<double> &gc,
1689  const Array<double> &gct,
1690  const Vector &pa_data,
1691  const Vector &x,
1692  Vector &y)
1693 {
1694  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
1695  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
1696  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
1697  // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v}
1698  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1699  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1700  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1701 
1702  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
1703  auto Bc = Reshape(bc.Read(), Q1D, D1D);
1704  auto Gc = Reshape(gc.Read(), Q1D, D1D);
1705  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE);
1706  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
1707  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
1708 
1709  const int s = symmetric ? 6 : 9;
1710 
1711  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
1712  {
1713  constexpr int VDIM = 3;
1714 
1715  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
1716  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
1717  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
1718 
1719  double ope[9];
1720  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
1721  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
1722 
1723  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
1724 
1725  MFEM_FOREACH_THREAD(qx,x,Q1D)
1726  {
1727  MFEM_FOREACH_THREAD(qy,y,Q1D)
1728  {
1729  MFEM_FOREACH_THREAD(qz,z,Q1D)
1730  {
1731  for (int i=0; i<s; ++i)
1732  {
1733  ope[i] = op(qx,qy,qz,i,e);
1734  }
1735  }
1736  }
1737  }
1738 
1739  const int tidx = MFEM_THREAD_ID(x);
1740  const int tidy = MFEM_THREAD_ID(y);
1741  const int tidz = MFEM_THREAD_ID(z);
1742 
1743  if (tidz == 0)
1744  {
1745  MFEM_FOREACH_THREAD(d,y,D1D)
1746  {
1747  MFEM_FOREACH_THREAD(q,x,Q1D)
1748  {
1749  sBc[d][q] = Bc(q,d);
1750  sGc[d][q] = Gc(q,d);
1751  if (d < D1D-1)
1752  {
1753  sBo[d][q] = Bo(q,d);
1754  }
1755  }
1756  }
1757  }
1758  MFEM_SYNC_THREAD;
1759 
1760  for (int qz=0; qz < Q1D; ++qz)
1761  {
1762  if (tidz == qz)
1763  {
1764  MFEM_FOREACH_THREAD(qy,y,Q1D)
1765  {
1766  MFEM_FOREACH_THREAD(qx,x,Q1D)
1767  {
1768  for (int i=0; i<3; ++i)
1769  {
1770  curl[qy][qx][i] = 0.0;
1771  }
1772  }
1773  }
1774  }
1775 
1776  int osc = 0;
1777  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
1778  {
1779  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
1780  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
1781  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
1782 
1783  MFEM_FOREACH_THREAD(dz,z,D1Dz)
1784  {
1785  MFEM_FOREACH_THREAD(dy,y,D1Dy)
1786  {
1787  MFEM_FOREACH_THREAD(dx,x,D1Dx)
1788  {
1789  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
1790  }
1791  }
1792  }
1793  MFEM_SYNC_THREAD;
1794 
1795  if (tidz == qz)
1796  {
1797  if (c == 0)
1798  {
1799  for (int i=0; i<s; ++i)
1800  {
1801  sop[i][tidx][tidy] = ope[i];
1802  }
1803  }
1804 
1805  MFEM_FOREACH_THREAD(qy,y,Q1D)
1806  {
1807  MFEM_FOREACH_THREAD(qx,x,Q1D)
1808  {
1809  double u = 0.0;
1810  double v = 0.0;
1811 
1812  // We treat x, y, z components separately for optimization specific to each.
1813  if (c == 0) // x component
1814  {
1815  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1816 
1817  for (int dz = 0; dz < D1Dz; ++dz)
1818  {
1819  const double wz = sBc[dz][qz];
1820  const double wDz = sGc[dz][qz];
1821 
1822  for (int dy = 0; dy < D1Dy; ++dy)
1823  {
1824  const double wy = sBc[dy][qy];
1825  const double wDy = sGc[dy][qy];
1826 
1827  for (int dx = 0; dx < D1Dx; ++dx)
1828  {
1829  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
1830  u += wx * wDy * wz;
1831  v += wx * wy * wDz;
1832  }
1833  }
1834  }
1835 
1836  curl[qy][qx][1] += v; // (u_0)_{x_2}
1837  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
1838  }
1839  else if (c == 1) // y component
1840  {
1841  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1842 
1843  for (int dz = 0; dz < D1Dz; ++dz)
1844  {
1845  const double wz = sBc[dz][qz];
1846  const double wDz = sGc[dz][qz];
1847 
1848  for (int dy = 0; dy < D1Dy; ++dy)
1849  {
1850  const double wy = sBo[dy][qy];
1851 
1852  for (int dx = 0; dx < D1Dx; ++dx)
1853  {
1854  const double t = sX[dz][dy][dx];
1855  const double wx = t * sBc[dx][qx];
1856  const double wDx = t * sGc[dx][qx];
1857 
1858  u += wDx * wy * wz;
1859  v += wx * wy * wDz;
1860  }
1861  }
1862  }
1863 
1864  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
1865  curl[qy][qx][2] += u; // (u_1)_{x_0}
1866  }
1867  else // z component
1868  {
1869  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1870 
1871  for (int dz = 0; dz < D1Dz; ++dz)
1872  {
1873  const double wz = sBo[dz][qz];
1874 
1875  for (int dy = 0; dy < D1Dy; ++dy)
1876  {
1877  const double wy = sBc[dy][qy];
1878  const double wDy = sGc[dy][qy];
1879 
1880  for (int dx = 0; dx < D1Dx; ++dx)
1881  {
1882  const double t = sX[dz][dy][dx];
1883  const double wx = t * sBc[dx][qx];
1884  const double wDx = t * sGc[dx][qx];
1885 
1886  u += wDx * wy * wz;
1887  v += wx * wDy * wz;
1888  }
1889  }
1890  }
1891 
1892  curl[qy][qx][0] += v; // (u_2)_{x_1}
1893  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
1894  }
1895  } // qx
1896  } // qy
1897  } // tidz == qz
1898 
1899  osc += D1Dx * D1Dy * D1Dz;
1900  MFEM_SYNC_THREAD;
1901  } // c
1902 
1903  double dxyz1 = 0.0;
1904  double dxyz2 = 0.0;
1905  double dxyz3 = 0.0;
1906 
1907  MFEM_FOREACH_THREAD(dz,z,D1D)
1908  {
1909  const double wcz = sBc[dz][qz];
1910  const double wcDz = sGc[dz][qz];
1911  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
1912 
1913  MFEM_FOREACH_THREAD(dy,y,D1D)
1914  {
1915  MFEM_FOREACH_THREAD(dx,x,D1D)
1916  {
1917  for (int qy = 0; qy < Q1D; ++qy)
1918  {
1919  const double wcy = sBc[dy][qy];
1920  const double wcDy = sGc[dy][qy];
1921  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
1922 
1923  for (int qx = 0; qx < Q1D; ++qx)
1924  {
1925  const double O11 = sop[0][qx][qy];
1926  const double O12 = sop[1][qx][qy];
1927  const double O13 = sop[2][qx][qy];
1928  const double O21 = symmetric ? O12 : sop[3][qx][qy];
1929  const double O22 = symmetric ? sop[3][qx][qy] : sop[4][qx][qy];
1930  const double O23 = symmetric ? sop[4][qx][qy] : sop[5][qx][qy];
1931  const double O31 = symmetric ? O13 : sop[6][qx][qy];
1932  const double O32 = symmetric ? O23 : sop[7][qx][qy];
1933  const double O33 = symmetric ? sop[5][qx][qy] : sop[8][qx][qy];
1934 
1935  const double c1 = (O11 * curl[qy][qx][0]) + (O12 * curl[qy][qx][1]) +
1936  (O13 * curl[qy][qx][2]);
1937  const double c2 = (O21 * curl[qy][qx][0]) + (O22 * curl[qy][qx][1]) +
1938  (O23 * curl[qy][qx][2]);
1939  const double c3 = (O31 * curl[qy][qx][0]) + (O32 * curl[qy][qx][1]) +
1940  (O33 * curl[qy][qx][2]);
1941 
1942  const double wcx = sBc[dx][qx];
1943  const double wDx = sGc[dx][qx];
1944 
1945  if (dx < D1D-1)
1946  {
1947  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
1948  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
1949  const double wx = sBo[dx][qx];
1950  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
1951  }
1952 
1953  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
1954  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
1955  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
1956 
1957  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
1958  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
1959  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
1960  } // qx
1961  } // qy
1962  } // dx
1963  } // dy
1964  } // dz
1965 
1966  MFEM_SYNC_THREAD;
1967 
1968  MFEM_FOREACH_THREAD(dz,z,D1D)
1969  {
1970  MFEM_FOREACH_THREAD(dy,y,D1D)
1971  {
1972  MFEM_FOREACH_THREAD(dx,x,D1D)
1973  {
1974  if (dx < D1D-1)
1975  {
1976  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
1977  }
1978  if (dy < D1D-1)
1979  {
1980  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
1981  }
1982  if (dz < D1D-1)
1983  {
1984  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
1985  }
1986  }
1987  }
1988  }
1989  } // qz
1990  }); // end of element loop
1991 }
1992 
1993 void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
1994 {
1995  if (dim == 3)
1996  {
1997  if (Device::Allows(Backend::DEVICE_MASK))
1998  {
1999  const int ID = (dofs1D << 4) | quad1D;
2000  switch (ID)
2001  {
2002  case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne,
2003  mapsO->B, mapsC->B, mapsO->Bt,
2004  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2005  case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne,
2006  mapsO->B, mapsC->B, mapsO->Bt,
2007  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2008  case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne,
2009  mapsO->B,
2010  mapsC->B, mapsO->Bt,
2011  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2012  case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne,
2013  mapsO->B, mapsC->B, mapsO->Bt,
2014  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2015  default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B,
2016  mapsC->B, mapsO->Bt, mapsC->Bt,
2017  mapsC->G, mapsC->Gt, pa_data, x, y);
2018  }
2019  }
2020  else
2021  PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt,
2022  mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y);
2023  }
2024  else if (dim == 2)
2025  {
2026  PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt,
2027  mapsC->G, mapsC->Gt, pa_data, x, y);
2028  }
2029  else
2030  {
2031  MFEM_ABORT("Unsupported dimension!");
2032  }
2033 }
2034 
2035 static void PACurlCurlAssembleDiagonal2D(const int D1D,
2036  const int Q1D,
2037  const int NE,
2038  const Array<double> &bo,
2039  const Array<double> &gc,
2040  const Vector &pa_data,
2041  Vector &diag)
2042 {
2043  constexpr static int VDIM = 2;
2044  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2045 
2046  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2047  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2048  auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE);
2049  auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE);
2050 
2051  MFEM_FORALL(e, NE,
2052  {
2053  int osc = 0;
2054 
2055  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2056  {
2057  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2058  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2059 
2060  double t[MAX_Q1D];
2061 
2062  for (int dy = 0; dy < D1Dy; ++dy)
2063  {
2064  for (int qx = 0; qx < Q1D; ++qx)
2065  {
2066  t[qx] = 0.0;
2067  for (int qy = 0; qy < Q1D; ++qy)
2068  {
2069  const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy);
2070  t[qx] += wy * wy * op(qx,qy,e);
2071  }
2072  }
2073 
2074  for (int dx = 0; dx < D1Dx; ++dx)
2075  {
2076  for (int qx = 0; qx < Q1D; ++qx)
2077  {
2078  const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx));
2079  D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx;
2080  }
2081  }
2082  }
2083 
2084  osc += D1Dx * D1Dy;
2085  } // loop c
2086  }); // end of element loop
2087 }
2088 
2089 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2090 static void PACurlCurlAssembleDiagonal3D(const int D1D,
2091  const int Q1D,
2092  const bool symmetric,
2093  const int NE,
2094  const Array<double> &bo,
2095  const Array<double> &bc,
2096  const Array<double> &go,
2097  const Array<double> &gc,
2098  const Vector &pa_data,
2099  Vector &diag)
2100 {
2101  constexpr static int VDIM = 3;
2102  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2103  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2104 
2105  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2106  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2107  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2108  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2109  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2110  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2111 
2112  const int s = symmetric ? 6 : 9;
2113  const int i11 = 0;
2114  const int i12 = 1;
2115  const int i13 = 2;
2116  const int i21 = symmetric ? i12 : 3;
2117  const int i22 = symmetric ? 3 : 4;
2118  const int i23 = symmetric ? 4 : 5;
2119  const int i31 = symmetric ? i13 : 6;
2120  const int i32 = symmetric ? i23 : 7;
2121  const int i33 = symmetric ? 5 : 8;
2122 
2123  MFEM_FORALL(e, NE,
2124  {
2125  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2126  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2127  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2128  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2129  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2130 
2131  // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF),
2132  // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient.
2133 
2134  int osc = 0;
2135 
2136  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2137  {
2138  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2139  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2140  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2141 
2142  double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3];
2143 
2144  // z contraction
2145  for (int qx = 0; qx < Q1D; ++qx)
2146  {
2147  for (int qy = 0; qy < Q1D; ++qy)
2148  {
2149  for (int dz = 0; dz < D1Dz; ++dz)
2150  {
2151  for (int i=0; i<s; ++i)
2152  {
2153  for (int d=0; d<3; ++d)
2154  {
2155  zt[qx][qy][dz][i][d] = 0.0;
2156  }
2157  }
2158 
2159  for (int qz = 0; qz < Q1D; ++qz)
2160  {
2161  const double wz = ((c == 2) ? Bo(qz,dz) : Bc(qz,dz));
2162  const double wDz = ((c == 2) ? Go(qz,dz) : Gc(qz,dz));
2163 
2164  for (int i=0; i<s; ++i)
2165  {
2166  zt[qx][qy][dz][i][0] += wz * wz * op(qx,qy,qz,i,e);
2167  zt[qx][qy][dz][i][1] += wDz * wz * op(qx,qy,qz,i,e);
2168  zt[qx][qy][dz][i][2] += wDz * wDz * op(qx,qy,qz,i,e);
2169  }
2170  }
2171  }
2172  }
2173  } // end of z contraction
2174 
2175  double yt[MAX_Q1D][MAX_D1D][MAX_D1D][9][3][3];
2176 
2177  // y contraction
2178  for (int qx = 0; qx < Q1D; ++qx)
2179  {
2180  for (int dz = 0; dz < D1Dz; ++dz)
2181  {
2182  for (int dy = 0; dy < D1Dy; ++dy)
2183  {
2184  for (int i=0; i<s; ++i)
2185  {
2186  for (int d=0; d<3; ++d)
2187  for (int j=0; j<3; ++j)
2188  {
2189  yt[qx][dy][dz][i][d][j] = 0.0;
2190  }
2191  }
2192 
2193  for (int qy = 0; qy < Q1D; ++qy)
2194  {
2195  const double wy = ((c == 1) ? Bo(qy,dy) : Bc(qy,dy));
2196  const double wDy = ((c == 1) ? Go(qy,dy) : Gc(qy,dy));
2197 
2198  for (int i=0; i<s; ++i)
2199  {
2200  for (int d=0; d<3; ++d)
2201  {
2202  yt[qx][dy][dz][i][d][0] += wy * wy * zt[qx][qy][dz][i][d];
2203  yt[qx][dy][dz][i][d][1] += wDy * wy * zt[qx][qy][dz][i][d];
2204  yt[qx][dy][dz][i][d][2] += wDy * wDy * zt[qx][qy][dz][i][d];
2205  }
2206  }
2207  }
2208  }
2209  }
2210  } // end of y contraction
2211 
2212  // x contraction
2213  for (int dz = 0; dz < D1Dz; ++dz)
2214  {
2215  for (int dy = 0; dy < D1Dy; ++dy)
2216  {
2217  for (int dx = 0; dx < D1Dx; ++dx)
2218  {
2219  for (int qx = 0; qx < Q1D; ++qx)
2220  {
2221  const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
2222  const double wDx = ((c == 0) ? Go(qx,dx) : Gc(qx,dx));
2223 
2224  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2225  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2226  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2227  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2228  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2229 
2230  /*
2231  const double O11 = op(q,0,e);
2232  const double O12 = op(q,1,e);
2233  const double O13 = op(q,2,e);
2234  const double O22 = op(q,3,e);
2235  const double O23 = op(q,4,e);
2236  const double O33 = op(q,5,e);
2237  */
2238 
2239  if (c == 0)
2240  {
2241  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2242  const double sumy = yt[qx][dy][dz][i22][2][0] - yt[qx][dy][dz][i23][1][1]
2243  - yt[qx][dy][dz][i32][1][1] + yt[qx][dy][dz][i33][0][2];
2244 
2245  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += sumy * wx * wx;
2246  }
2247  else if (c == 1)
2248  {
2249  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2250  const double d = (yt[qx][dy][dz][i11][2][0] * wx * wx)
2251  - ((yt[qx][dy][dz][i13][1][0] + yt[qx][dy][dz][i31][1][0]) * wDx * wx)
2252  + (yt[qx][dy][dz][i33][0][0] * wDx * wDx);
2253 
2254  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2255  }
2256  else
2257  {
2258  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2259  const double d = (yt[qx][dy][dz][i11][0][2] * wx * wx)
2260  - ((yt[qx][dy][dz][i12][0][1] + yt[qx][dy][dz][i21][0][1]) * wDx * wx)
2261  + (yt[qx][dy][dz][i22][0][0] * wDx * wDx);
2262 
2263  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += d;
2264  }
2265  }
2266  }
2267  }
2268  } // end of x contraction
2269 
2270  osc += D1Dx * D1Dy * D1Dz;
2271  } // loop c
2272  }); // end of element loop
2273 }
2274 
2275 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2276 static void SmemPACurlCurlAssembleDiagonal3D(const int D1D,
2277  const int Q1D,
2278  const bool symmetric,
2279  const int NE,
2280  const Array<double> &bo,
2281  const Array<double> &bc,
2282  const Array<double> &go,
2283  const Array<double> &gc,
2284  const Vector &pa_data,
2285  Vector &diag)
2286 {
2287  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2288  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2289 
2290  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2291  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2292  auto Go = Reshape(go.Read(), Q1D, D1D-1);
2293  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2294  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE);
2295  auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2296 
2297  const int s = symmetric ? 6 : 9;
2298  const int i11 = 0;
2299  const int i12 = 1;
2300  const int i13 = 2;
2301  const int i21 = symmetric ? i12 : 3;
2302  const int i22 = symmetric ? 3 : 4;
2303  const int i23 = symmetric ? 4 : 5;
2304  const int i31 = symmetric ? i13 : 6;
2305  const int i32 = symmetric ? i23 : 7;
2306  const int i33 = symmetric ? 5 : 8;
2307 
2308  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
2309  {
2310  // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2311  // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u}
2312  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2313  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2314  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2315 
2316  constexpr int VDIM = 3;
2317 
2318  MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D];
2319  MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D];
2320  MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D];
2321  MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D];
2322 
2323  double ope[9];
2324  MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D];
2325 
2326  MFEM_FOREACH_THREAD(qx,x,Q1D)
2327  {
2328  MFEM_FOREACH_THREAD(qy,y,Q1D)
2329  {
2330  MFEM_FOREACH_THREAD(qz,z,Q1D)
2331  {
2332  for (int i=0; i<s; ++i)
2333  {
2334  ope[i] = op(qx,qy,qz,i,e);
2335  }
2336  }
2337  }
2338  }
2339 
2340  const int tidx = MFEM_THREAD_ID(x);
2341  const int tidy = MFEM_THREAD_ID(y);
2342  const int tidz = MFEM_THREAD_ID(z);
2343 
2344  if (tidz == 0)
2345  {
2346  MFEM_FOREACH_THREAD(d,y,D1D)
2347  {
2348  MFEM_FOREACH_THREAD(q,x,Q1D)
2349  {
2350  sBc[q][d] = Bc(q,d);
2351  sGc[q][d] = Gc(q,d);
2352  if (d < D1D-1)
2353  {
2354  sBo[q][d] = Bo(q,d);
2355  sGo[q][d] = Go(q,d);
2356  }
2357  }
2358  }
2359  }
2360  MFEM_SYNC_THREAD;
2361 
2362  int osc = 0;
2363  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2364  {
2365  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2366  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2367  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2368 
2369  double dxyz = 0.0;
2370 
2371  for (int qz=0; qz < Q1D; ++qz)
2372  {
2373  if (tidz == qz)
2374  {
2375  for (int i=0; i<s; ++i)
2376  {
2377  sop[i][tidx][tidy] = ope[i];
2378  }
2379  }
2380 
2381  MFEM_SYNC_THREAD;
2382 
2383  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2384  {
2385  const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]);
2386  const double wDz = ((c == 2) ? sGo[qz][dz] : sGc[qz][dz]);
2387 
2388  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2389  {
2390  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2391  {
2392  for (int qy = 0; qy < Q1D; ++qy)
2393  {
2394  const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]);
2395  const double wDy = ((c == 1) ? sGo[qy][dy] : sGc[qy][dy]);
2396 
2397  for (int qx = 0; qx < Q1D; ++qx)
2398  {
2399  const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]);
2400  const double wDx = ((c == 0) ? sGo[qx][dx] : sGc[qx][dx]);
2401 
2402  if (c == 0)
2403  {
2404  // (u_0)_{x_2} (O22 (u_0)_{x_2} - O23 (u_0)_{x_1}) - (u_0)_{x_1} (O32 (u_0)_{x_2} - O33 (u_0)_{x_1})
2405 
2406  // (u_0)_{x_2} O22 (u_0)_{x_2}
2407  dxyz += sop[i22][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2408 
2409  // -(u_0)_{x_2} O23 (u_0)_{x_1} - (u_0)_{x_1} O32 (u_0)_{x_2}
2410  dxyz += -(sop[i23][qx][qy] + sop[i32][qx][qy]) * wx * wx * wDy * wy * wDz * wz;
2411 
2412  // (u_0)_{x_1} O33 (u_0)_{x_1}
2413  dxyz += sop[i33][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2414  }
2415  else if (c == 1)
2416  {
2417  // (u_1)_{x_2} (O11 (u_1)_{x_2} - O13 (u_1)_{x_0}) + (u_1)_{x_0} (-O31 (u_1)_{x_2} + O33 (u_1)_{x_0})
2418 
2419  // (u_1)_{x_2} O11 (u_1)_{x_2}
2420  dxyz += sop[i11][qx][qy] * wx * wx * wy * wy * wDz * wDz;
2421 
2422  // -(u_1)_{x_2} O13 (u_1)_{x_0} - (u_1)_{x_0} O31 (u_1)_{x_2}
2423  dxyz += -(sop[i13][qx][qy] + sop[i31][qx][qy]) * wDx * wx * wy * wy * wDz * wz;
2424 
2425  // (u_1)_{x_0} O33 (u_1)_{x_0})
2426  dxyz += sop[i33][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2427  }
2428  else
2429  {
2430  // (u_2)_{x_1} (O11 (u_2)_{x_1} - O12 (u_2)_{x_0}) - (u_2)_{x_0} (O21 (u_2)_{x_1} - O22 (u_2)_{x_0})
2431 
2432  // (u_2)_{x_1} O11 (u_2)_{x_1}
2433  dxyz += sop[i11][qx][qy] * wx * wx * wDy * wDy * wz * wz;
2434 
2435  // -(u_2)_{x_1} O12 (u_2)_{x_0} - (u_2)_{x_0} O21 (u_2)_{x_1}
2436  dxyz += -(sop[i12][qx][qy] + sop[i21][qx][qy]) * wDx * wx * wDy * wy * wz * wz;
2437 
2438  // (u_2)_{x_0} O22 (u_2)_{x_0}
2439  dxyz += sop[i22][qx][qy] * wDx * wDx * wy * wy * wz * wz;
2440  }
2441  }
2442  }
2443  }
2444  }
2445  }
2446 
2447  MFEM_SYNC_THREAD;
2448  } // qz loop
2449 
2450  MFEM_FOREACH_THREAD(dz,z,D1Dz)
2451  {
2452  MFEM_FOREACH_THREAD(dy,y,D1Dy)
2453  {
2454  MFEM_FOREACH_THREAD(dx,x,D1Dx)
2455  {
2456  D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz;
2457  }
2458  }
2459  }
2460 
2461  osc += D1Dx * D1Dy * D1Dz;
2462  } // c loop
2463  }); // end of element loop
2464 }
2465 
2466 void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag)
2467 {
2468  if (dim == 3)
2469  {
2470  if (Device::Allows(Backend::DEVICE_MASK))
2471  {
2472  const int ID = (dofs1D << 4) | quad1D;
2473  switch (ID)
2474  {
2475  case 0x23: return SmemPACurlCurlAssembleDiagonal3D<2,3>(dofs1D, quad1D,
2476  symmetric, ne,
2477  mapsO->B, mapsC->B,
2478  mapsO->G, mapsC->G,
2479  pa_data, diag);
2480  case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D,
2481  symmetric, ne,
2482  mapsO->B, mapsC->B,
2483  mapsO->G, mapsC->G,
2484  pa_data, diag);
2485  case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D,
2486  symmetric, ne,
2487  mapsO->B, mapsC->B,
2488  mapsO->G, mapsC->G,
2489  pa_data, diag);
2490  case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D,
2491  symmetric, ne,
2492  mapsO->B, mapsC->B,
2493  mapsO->G, mapsC->G,
2494  pa_data, diag);
2495  default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2496  mapsO->B, mapsC->B,
2497  mapsO->G, mapsC->G,
2498  pa_data, diag);
2499  }
2500  }
2501  else
2502  PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne,
2503  mapsO->B, mapsC->B,
2504  mapsO->G, mapsC->G,
2505  pa_data, diag);
2506  }
2507  else if (dim == 2)
2508  {
2509  PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne,
2510  mapsO->B, mapsC->G, pa_data, diag);
2511  }
2512  else
2513  {
2514  MFEM_ABORT("Unsupported dimension!");
2515  }
2516 }
2517 
2518 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2519 // integrated against H(curl) test functions corresponding to y.
2520 void PAHcurlH1Apply3D(const int D1D,
2521  const int Q1D,
2522  const int NE,
2523  const Array<double> &bc,
2524  const Array<double> &gc,
2525  const Array<double> &bot,
2526  const Array<double> &bct,
2527  const Vector &pa_data,
2528  const Vector &x,
2529  Vector &y)
2530 {
2531  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2532  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2533 
2534  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2535  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2536 
2537  constexpr static int VDIM = 3;
2538 
2539  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2540  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2541  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2542  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2543  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
2544  auto X = Reshape(x.Read(), D1D, D1D, D1D, NE);
2545  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2546 
2547  MFEM_FORALL(e, NE,
2548  {
2549  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2550 
2551  for (int qz = 0; qz < Q1D; ++qz)
2552  {
2553  for (int qy = 0; qy < Q1D; ++qy)
2554  {
2555  for (int qx = 0; qx < Q1D; ++qx)
2556  {
2557  for (int c = 0; c < VDIM; ++c)
2558  {
2559  mass[qz][qy][qx][c] = 0.0;
2560  }
2561  }
2562  }
2563  }
2564 
2565  for (int dz = 0; dz < D1D; ++dz)
2566  {
2567  double gradXY[MAX_Q1D][MAX_Q1D][3];
2568  for (int qy = 0; qy < Q1D; ++qy)
2569  {
2570  for (int qx = 0; qx < Q1D; ++qx)
2571  {
2572  gradXY[qy][qx][0] = 0.0;
2573  gradXY[qy][qx][1] = 0.0;
2574  gradXY[qy][qx][2] = 0.0;
2575  }
2576  }
2577  for (int dy = 0; dy < D1D; ++dy)
2578  {
2579  double gradX[MAX_Q1D][2];
2580  for (int qx = 0; qx < Q1D; ++qx)
2581  {
2582  gradX[qx][0] = 0.0;
2583  gradX[qx][1] = 0.0;
2584  }
2585  for (int dx = 0; dx < D1D; ++dx)
2586  {
2587  const double s = X(dx,dy,dz,e);
2588  for (int qx = 0; qx < Q1D; ++qx)
2589  {
2590  gradX[qx][0] += s * Bc(qx,dx);
2591  gradX[qx][1] += s * Gc(qx,dx);
2592  }
2593  }
2594  for (int qy = 0; qy < Q1D; ++qy)
2595  {
2596  const double wy = Bc(qy,dy);
2597  const double wDy = Gc(qy,dy);
2598  for (int qx = 0; qx < Q1D; ++qx)
2599  {
2600  const double wx = gradX[qx][0];
2601  const double wDx = gradX[qx][1];
2602  gradXY[qy][qx][0] += wDx * wy;
2603  gradXY[qy][qx][1] += wx * wDy;
2604  gradXY[qy][qx][2] += wx * wy;
2605  }
2606  }
2607  }
2608  for (int qz = 0; qz < Q1D; ++qz)
2609  {
2610  const double wz = Bc(qz,dz);
2611  const double wDz = Gc(qz,dz);
2612  for (int qy = 0; qy < Q1D; ++qy)
2613  {
2614  for (int qx = 0; qx < Q1D; ++qx)
2615  {
2616  mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
2617  mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
2618  mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
2619  }
2620  }
2621  }
2622  }
2623 
2624  // Apply D operator.
2625  for (int qz = 0; qz < Q1D; ++qz)
2626  {
2627  for (int qy = 0; qy < Q1D; ++qy)
2628  {
2629  for (int qx = 0; qx < Q1D; ++qx)
2630  {
2631  const double O11 = op(qx,qy,qz,0,e);
2632  const double O12 = op(qx,qy,qz,1,e);
2633  const double O13 = op(qx,qy,qz,2,e);
2634  const double O22 = op(qx,qy,qz,3,e);
2635  const double O23 = op(qx,qy,qz,4,e);
2636  const double O33 = op(qx,qy,qz,5,e);
2637  const double massX = mass[qz][qy][qx][0];
2638  const double massY = mass[qz][qy][qx][1];
2639  const double massZ = mass[qz][qy][qx][2];
2640  mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
2641  mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ);
2642  mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ);
2643  }
2644  }
2645  }
2646 
2647  for (int qz = 0; qz < Q1D; ++qz)
2648  {
2649  double massXY[MAX_D1D][MAX_D1D];
2650 
2651  int osc = 0;
2652 
2653  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
2654  {
2655  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
2656  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2657  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2658 
2659  for (int dy = 0; dy < D1Dy; ++dy)
2660  {
2661  for (int dx = 0; dx < D1Dx; ++dx)
2662  {
2663  massXY[dy][dx] = 0.0;
2664  }
2665  }
2666  for (int qy = 0; qy < Q1D; ++qy)
2667  {
2668  double massX[MAX_D1D];
2669  for (int dx = 0; dx < D1Dx; ++dx)
2670  {
2671  massX[dx] = 0;
2672  }
2673  for (int qx = 0; qx < Q1D; ++qx)
2674  {
2675  for (int dx = 0; dx < D1Dx; ++dx)
2676  {
2677  massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2678  }
2679  }
2680  for (int dy = 0; dy < D1Dy; ++dy)
2681  {
2682  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2683  for (int dx = 0; dx < D1Dx; ++dx)
2684  {
2685  massXY[dy][dx] += massX[dx] * wy;
2686  }
2687  }
2688  }
2689 
2690  for (int dz = 0; dz < D1Dz; ++dz)
2691  {
2692  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
2693  for (int dy = 0; dy < D1Dy; ++dy)
2694  {
2695  for (int dx = 0; dx < D1Dx; ++dx)
2696  {
2697  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
2698  }
2699  }
2700  }
2701 
2702  osc += D1Dx * D1Dy * D1Dz;
2703  } // loop c
2704  } // loop qz
2705  }); // end of element loop
2706 }
2707 
2708 // Apply to x corresponding to DOF's in H^1 (trial), whose gradients are
2709 // integrated against H(curl) test functions corresponding to y.
2710 void PAHcurlH1Apply2D(const int D1D,
2711  const int Q1D,
2712  const int NE,
2713  const Array<double> &bc,
2714  const Array<double> &gc,
2715  const Array<double> &bot,
2716  const Array<double> &bct,
2717  const Vector &pa_data,
2718  const Vector &x,
2719  Vector &y)
2720 {
2721  constexpr static int VDIM = 2;
2722  constexpr static int MAX_D1D = HCURL_MAX_D1D;
2723  constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
2724 
2725  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2726  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2727  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2728  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2729  auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE);
2730  auto X = Reshape(x.Read(), D1D, D1D, NE);
2731  auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE);
2732 
2733  MFEM_FORALL(e, NE,
2734  {
2735  double mass[MAX_Q1D][MAX_Q1D][VDIM];
2736 
2737  for (int qy = 0; qy < Q1D; ++qy)
2738  {
2739  for (int qx = 0; qx < Q1D; ++qx)
2740  {
2741  for (int c = 0; c < VDIM; ++c)
2742  {
2743  mass[qy][qx][c] = 0.0;
2744  }
2745  }
2746  }
2747 
2748  for (int dy = 0; dy < D1D; ++dy)
2749  {
2750  double gradX[MAX_Q1D][2];
2751  for (int qx = 0; qx < Q1D; ++qx)
2752  {
2753  gradX[qx][0] = 0.0;
2754  gradX[qx][1] = 0.0;
2755  }
2756  for (int dx = 0; dx < D1D; ++dx)
2757  {
2758  const double s = X(dx,dy,e);
2759  for (int qx = 0; qx < Q1D; ++qx)
2760  {
2761  gradX[qx][0] += s * Bc(qx,dx);
2762  gradX[qx][1] += s * Gc(qx,dx);
2763  }
2764  }
2765  for (int qy = 0; qy < Q1D; ++qy)
2766  {
2767  const double wy = Bc(qy,dy);
2768  const double wDy = Gc(qy,dy);
2769  for (int qx = 0; qx < Q1D; ++qx)
2770  {
2771  const double wx = gradX[qx][0];
2772  const double wDx = gradX[qx][1];
2773  mass[qy][qx][0] += wDx * wy;
2774  mass[qy][qx][1] += wx * wDy;
2775  }
2776  }
2777  }
2778 
2779  // Apply D operator.
2780  for (int qy = 0; qy < Q1D; ++qy)
2781  {
2782  for (int qx = 0; qx < Q1D; ++qx)
2783  {
2784  const double O11 = op(qx,qy,0,e);
2785  const double O12 = op(qx,qy,1,e);
2786  const double O22 = op(qx,qy,2,e);
2787  const double massX = mass[qy][qx][0];
2788  const double massY = mass[qy][qx][1];
2789  mass[qy][qx][0] = (O11*massX)+(O12*massY);
2790  mass[qy][qx][1] = (O12*massX)+(O22*massY);
2791  }
2792  }
2793 
2794  for (int qy = 0; qy < Q1D; ++qy)
2795  {
2796  int osc = 0;
2797 
2798  for (int c = 0; c < VDIM; ++c) // loop over x, y components
2799  {
2800  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
2801  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
2802 
2803  double massX[MAX_D1D];
2804  for (int dx = 0; dx < D1Dx; ++dx)
2805  {
2806  massX[dx] = 0;
2807  }
2808  for (int qx = 0; qx < Q1D; ++qx)
2809  {
2810  for (int dx = 0; dx < D1Dx; ++dx)
2811  {
2812  massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
2813  }
2814  }
2815 
2816  for (int dy = 0; dy < D1Dy; ++dy)
2817  {
2818  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
2819 
2820  for (int dx = 0; dx < D1Dx; ++dx)
2821  {
2822  Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
2823  }
2824  }
2825 
2826  osc += D1Dx * D1Dy;
2827  } // loop c
2828  }
2829  }); // end of element loop
2830 }
2831 
2832 // PA H(curl) Mass Assemble 3D kernel
2833 void PAHcurlL2Setup(const int NQ,
2834  const int coeffDim,
2835  const int NE,
2836  const Array<double> &w,
2837  Vector &coeff,
2838  Vector &op)
2839 {
2840  auto W = w.Read();
2841  auto C = Reshape(coeff.Read(), coeffDim, NQ, NE);
2842  auto y = Reshape(op.Write(), coeffDim, NQ, NE);
2843 
2844  MFEM_FORALL(e, NE,
2845  {
2846  for (int q = 0; q < NQ; ++q)
2847  {
2848  for (int c=0; c<coeffDim; ++c)
2849  {
2850  y(c,q,e) = W[q] * C(c,q,e);
2851  }
2852  }
2853  });
2854 }
2855 
2856 void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes,
2857  const FiniteElementSpace &test_fes)
2858 {
2859  // Assumes tensor-product elements, with vector test and trial spaces.
2860  Mesh *mesh = trial_fes.GetMesh();
2861  const FiniteElement *trial_fel = trial_fes.GetFE(0);
2862  const FiniteElement *test_fel = test_fes.GetFE(0);
2863 
2864  const VectorTensorFiniteElement *trial_el =
2865  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
2866  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
2867 
2868  const VectorTensorFiniteElement *test_el =
2869  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
2870  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
2871 
2872  const IntegrationRule *ir
2873  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
2874  *mesh->GetElementTransformation(0));
2875  const int dims = trial_el->GetDim();
2876  MFEM_VERIFY(dims == 3, "");
2877 
2878  const int nq = ir->GetNPoints();
2879  dim = mesh->Dimension();
2880  MFEM_VERIFY(dim == 3, "");
2881 
2882  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
2883 
2884  ne = trial_fes.GetNE();
2885  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
2886  mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2887  mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2888  mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
2889  mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
2890  dofs1D = mapsC->ndof;
2891  quad1D = mapsC->nqpt;
2892  dofs1Dtest = mapsCtest->ndof;
2893 
2894  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
2895 
2896  testType = test_el->GetDerivType();
2897  trialType = trial_el->GetDerivType();
2898 
2899  const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6
2900  coeffDim = (DQ ? 3 : 1);
2901 
2902  pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType());
2903 
2904  Vector coeff(coeffDim * nq * ne);
2905  coeff = 1.0;
2906  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
2907  if (Q || DQ)
2908  {
2909  Vector V(coeffDim);
2910  if (DQ)
2911  {
2912  MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
2913  }
2914 
2915  for (int e=0; e<ne; ++e)
2916  {
2917  ElementTransformation *tr = mesh->GetElementTransformation(e);
2918 
2919  for (int p=0; p<nq; ++p)
2920  {
2921  if (DQ)
2922  {
2923  DQ->Eval(V, *tr, ir->IntPoint(p));
2924  for (int i=0; i<coeffDim; ++i)
2925  {
2926  coeffh(i, p, e) = V[i];
2927  }
2928  }
2929  else
2930  {
2931  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
2932  }
2933  }
2934  }
2935  }
2936 
2937  if (testType == mfem::FiniteElement::CURL &&
2938  trialType == mfem::FiniteElement::CURL && dim == 3)
2939  {
2940  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
2941  }
2942  else if (testType == mfem::FiniteElement::DIV &&
2943  trialType == mfem::FiniteElement::CURL && dim == 3 &&
2944  test_fel->GetOrder() == trial_fel->GetOrder())
2945  {
2946  PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff,
2947  pa_data);
2948  }
2949  else
2950  {
2951  MFEM_ABORT("Unknown kernel.");
2952  }
2953 }
2954 
2955 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
2956 // integrated against H(curl) test functions corresponding to y.
2957 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
2958 static void PAHcurlL2Apply3D(const int D1D,
2959  const int Q1D,
2960  const int coeffDim,
2961  const int NE,
2962  const Array<double> &bo,
2963  const Array<double> &bc,
2964  const Array<double> &bot,
2965  const Array<double> &bct,
2966  const Array<double> &gc,
2967  const Vector &pa_data,
2968  const Vector &x,
2969  Vector &y)
2970 {
2971  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
2972  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
2973  // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get
2974  // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v}
2975  // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v}
2976  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
2977  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
2978  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
2979 
2980  constexpr static int VDIM = 3;
2981 
2982  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
2983  auto Bc = Reshape(bc.Read(), Q1D, D1D);
2984  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
2985  auto Bct = Reshape(bct.Read(), D1D, Q1D);
2986  auto Gc = Reshape(gc.Read(), Q1D, D1D);
2987  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
2988  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
2989  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
2990 
2991  MFEM_FORALL(e, NE,
2992  {
2993  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
2994  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
2995 
2996  for (int qz = 0; qz < Q1D; ++qz)
2997  {
2998  for (int qy = 0; qy < Q1D; ++qy)
2999  {
3000  for (int qx = 0; qx < Q1D; ++qx)
3001  {
3002  for (int c = 0; c < VDIM; ++c)
3003  {
3004  curl[qz][qy][qx][c] = 0.0;
3005  }
3006  }
3007  }
3008  }
3009 
3010  // We treat x, y, z components separately for optimization specific to each.
3011 
3012  int osc = 0;
3013 
3014  {
3015  // x component
3016  const int D1Dz = D1D;
3017  const int D1Dy = D1D;
3018  const int D1Dx = D1D - 1;
3019 
3020  for (int dz = 0; dz < D1Dz; ++dz)
3021  {
3022  double gradXY[MAX_Q1D][MAX_Q1D][2];
3023  for (int qy = 0; qy < Q1D; ++qy)
3024  {
3025  for (int qx = 0; qx < Q1D; ++qx)
3026  {
3027  for (int d = 0; d < 2; ++d)
3028  {
3029  gradXY[qy][qx][d] = 0.0;
3030  }
3031  }
3032  }
3033 
3034  for (int dy = 0; dy < D1Dy; ++dy)
3035  {
3036  double massX[MAX_Q1D];
3037  for (int qx = 0; qx < Q1D; ++qx)
3038  {
3039  massX[qx] = 0.0;
3040  }
3041 
3042  for (int dx = 0; dx < D1Dx; ++dx)
3043  {
3044  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3045  for (int qx = 0; qx < Q1D; ++qx)
3046  {
3047  massX[qx] += t * Bo(qx,dx);
3048  }
3049  }
3050 
3051  for (int qy = 0; qy < Q1D; ++qy)
3052  {
3053  const double wy = Bc(qy,dy);
3054  const double wDy = Gc(qy,dy);
3055  for (int qx = 0; qx < Q1D; ++qx)
3056  {
3057  const double wx = massX[qx];
3058  gradXY[qy][qx][0] += wx * wDy;
3059  gradXY[qy][qx][1] += wx * wy;
3060  }
3061  }
3062  }
3063 
3064  for (int qz = 0; qz < Q1D; ++qz)
3065  {
3066  const double wz = Bc(qz,dz);
3067  const double wDz = Gc(qz,dz);
3068  for (int qy = 0; qy < Q1D; ++qy)
3069  {
3070  for (int qx = 0; qx < Q1D; ++qx)
3071  {
3072  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3073  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3074  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3075  }
3076  }
3077  }
3078  }
3079 
3080  osc += D1Dx * D1Dy * D1Dz;
3081  }
3082 
3083  {
3084  // y component
3085  const int D1Dz = D1D;
3086  const int D1Dy = D1D - 1;
3087  const int D1Dx = D1D;
3088 
3089  for (int dz = 0; dz < D1Dz; ++dz)
3090  {
3091  double gradXY[MAX_Q1D][MAX_Q1D][2];
3092  for (int qy = 0; qy < Q1D; ++qy)
3093  {
3094  for (int qx = 0; qx < Q1D; ++qx)
3095  {
3096  for (int d = 0; d < 2; ++d)
3097  {
3098  gradXY[qy][qx][d] = 0.0;
3099  }
3100  }
3101  }
3102 
3103  for (int dx = 0; dx < D1Dx; ++dx)
3104  {
3105  double massY[MAX_Q1D];
3106  for (int qy = 0; qy < Q1D; ++qy)
3107  {
3108  massY[qy] = 0.0;
3109  }
3110 
3111  for (int dy = 0; dy < D1Dy; ++dy)
3112  {
3113  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3114  for (int qy = 0; qy < Q1D; ++qy)
3115  {
3116  massY[qy] += t * Bo(qy,dy);
3117  }
3118  }
3119 
3120  for (int qx = 0; qx < Q1D; ++qx)
3121  {
3122  const double wx = Bc(qx,dx);
3123  const double wDx = Gc(qx,dx);
3124  for (int qy = 0; qy < Q1D; ++qy)
3125  {
3126  const double wy = massY[qy];
3127  gradXY[qy][qx][0] += wDx * wy;
3128  gradXY[qy][qx][1] += wx * wy;
3129  }
3130  }
3131  }
3132 
3133  for (int qz = 0; qz < Q1D; ++qz)
3134  {
3135  const double wz = Bc(qz,dz);
3136  const double wDz = Gc(qz,dz);
3137  for (int qy = 0; qy < Q1D; ++qy)
3138  {
3139  for (int qx = 0; qx < Q1D; ++qx)
3140  {
3141  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3142  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3143  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3144  }
3145  }
3146  }
3147  }
3148 
3149  osc += D1Dx * D1Dy * D1Dz;
3150  }
3151 
3152  {
3153  // z component
3154  const int D1Dz = D1D - 1;
3155  const int D1Dy = D1D;
3156  const int D1Dx = D1D;
3157 
3158  for (int dx = 0; dx < D1Dx; ++dx)
3159  {
3160  double gradYZ[MAX_Q1D][MAX_Q1D][2];
3161  for (int qz = 0; qz < Q1D; ++qz)
3162  {
3163  for (int qy = 0; qy < Q1D; ++qy)
3164  {
3165  for (int d = 0; d < 2; ++d)
3166  {
3167  gradYZ[qz][qy][d] = 0.0;
3168  }
3169  }
3170  }
3171 
3172  for (int dy = 0; dy < D1Dy; ++dy)
3173  {
3174  double massZ[MAX_Q1D];
3175  for (int qz = 0; qz < Q1D; ++qz)
3176  {
3177  massZ[qz] = 0.0;
3178  }
3179 
3180  for (int dz = 0; dz < D1Dz; ++dz)
3181  {
3182  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3183  for (int qz = 0; qz < Q1D; ++qz)
3184  {
3185  massZ[qz] += t * Bo(qz,dz);
3186  }
3187  }
3188 
3189  for (int qy = 0; qy < Q1D; ++qy)
3190  {
3191  const double wy = Bc(qy,dy);
3192  const double wDy = Gc(qy,dy);
3193  for (int qz = 0; qz < Q1D; ++qz)
3194  {
3195  const double wz = massZ[qz];
3196  gradYZ[qz][qy][0] += wz * wy;
3197  gradYZ[qz][qy][1] += wz * wDy;
3198  }
3199  }
3200  }
3201 
3202  for (int qx = 0; qx < Q1D; ++qx)
3203  {
3204  const double wx = Bc(qx,dx);
3205  const double wDx = Gc(qx,dx);
3206 
3207  for (int qy = 0; qy < Q1D; ++qy)
3208  {
3209  for (int qz = 0; qz < Q1D; ++qz)
3210  {
3211  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3212  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3213  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3214  }
3215  }
3216  }
3217  }
3218  }
3219 
3220  // Apply D operator.
3221  for (int qz = 0; qz < Q1D; ++qz)
3222  {
3223  for (int qy = 0; qy < Q1D; ++qy)
3224  {
3225  for (int qx = 0; qx < Q1D; ++qx)
3226  {
3227  for (int c = 0; c < VDIM; ++c)
3228  {
3229  curl[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
3230  }
3231  }
3232  }
3233  }
3234 
3235  for (int qz = 0; qz < Q1D; ++qz)
3236  {
3237  double massXY[MAX_D1D][MAX_D1D];
3238 
3239  osc = 0;
3240 
3241  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3242  {
3243  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3244  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3245  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3246 
3247  for (int dy = 0; dy < D1Dy; ++dy)
3248  {
3249  for (int dx = 0; dx < D1Dx; ++dx)
3250  {
3251  massXY[dy][dx] = 0;
3252  }
3253  }
3254  for (int qy = 0; qy < Q1D; ++qy)
3255  {
3256  double massX[MAX_D1D];
3257  for (int dx = 0; dx < D1Dx; ++dx)
3258  {
3259  massX[dx] = 0.0;
3260  }
3261  for (int qx = 0; qx < Q1D; ++qx)
3262  {
3263  for (int dx = 0; dx < D1Dx; ++dx)
3264  {
3265  massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx));
3266  }
3267  }
3268 
3269  for (int dy = 0; dy < D1Dy; ++dy)
3270  {
3271  const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy);
3272  for (int dx = 0; dx < D1Dx; ++dx)
3273  {
3274  massXY[dy][dx] += massX[dx] * wy;
3275  }
3276  }
3277  }
3278 
3279  for (int dz = 0; dz < D1Dz; ++dz)
3280  {
3281  const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz);
3282  for (int dy = 0; dy < D1Dy; ++dy)
3283  {
3284  for (int dx = 0; dx < D1Dx; ++dx)
3285  {
3286  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz;
3287  }
3288  }
3289  }
3290 
3291  osc += D1Dx * D1Dy * D1Dz;
3292  } // loop c
3293  } // loop qz
3294  }); // end of element loop
3295 }
3296 
3297 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3298 // integrated against H(curl) test functions corresponding to y.
3299 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3300 static void SmemPAHcurlL2Apply3D(const int D1D,
3301  const int Q1D,
3302  const int coeffDim,
3303  const int NE,
3304  const Array<double> &bo,
3305  const Array<double> &bc,
3306  const Array<double> &gc,
3307  const Vector &pa_data,
3308  const Vector &x,
3309  Vector &y)
3310 {
3311  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3312  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3313 
3314  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3315  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3316  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3317  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
3318  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3319  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
3320 
3321  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
3322  {
3323  constexpr int VDIM = 3;
3324  constexpr int maxCoeffDim = 3;
3325 
3326  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
3327  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
3328  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
3329 
3330  double opc[maxCoeffDim];
3331  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
3332  MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3];
3333 
3334  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
3335 
3336  MFEM_FOREACH_THREAD(qx,x,Q1D)
3337  {
3338  MFEM_FOREACH_THREAD(qy,y,Q1D)
3339  {
3340  MFEM_FOREACH_THREAD(qz,z,Q1D)
3341  {
3342  for (int i=0; i<coeffDim; ++i)
3343  {
3344  opc[i] = op(i,qx,qy,qz,e);
3345  }
3346  }
3347  }
3348  }
3349 
3350  const int tidx = MFEM_THREAD_ID(x);
3351  const int tidy = MFEM_THREAD_ID(y);
3352  const int tidz = MFEM_THREAD_ID(z);
3353 
3354  if (tidz == 0)
3355  {
3356  MFEM_FOREACH_THREAD(d,y,D1D)
3357  {
3358  MFEM_FOREACH_THREAD(q,x,Q1D)
3359  {
3360  sBc[d][q] = Bc(q,d);
3361  sGc[d][q] = Gc(q,d);
3362  if (d < D1D-1)
3363  {
3364  sBo[d][q] = Bo(q,d);
3365  }
3366  }
3367  }
3368  }
3369  MFEM_SYNC_THREAD;
3370 
3371  for (int qz=0; qz < Q1D; ++qz)
3372  {
3373  if (tidz == qz)
3374  {
3375  MFEM_FOREACH_THREAD(qy,y,Q1D)
3376  {
3377  MFEM_FOREACH_THREAD(qx,x,Q1D)
3378  {
3379  for (int i=0; i<3; ++i)
3380  {
3381  curl[qy][qx][i] = 0.0;
3382  }
3383  }
3384  }
3385  }
3386 
3387  int osc = 0;
3388  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3389  {
3390  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
3391  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
3392  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
3393 
3394  MFEM_FOREACH_THREAD(dz,z,D1Dz)
3395  {
3396  MFEM_FOREACH_THREAD(dy,y,D1Dy)
3397  {
3398  MFEM_FOREACH_THREAD(dx,x,D1Dx)
3399  {
3400  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3401  }
3402  }
3403  }
3404  MFEM_SYNC_THREAD;
3405 
3406  if (tidz == qz)
3407  {
3408  if (c == 0)
3409  {
3410  for (int i=0; i<coeffDim; ++i)
3411  {
3412  sop[i][tidx][tidy] = opc[i];
3413  }
3414  }
3415 
3416  MFEM_FOREACH_THREAD(qy,y,Q1D)
3417  {
3418  MFEM_FOREACH_THREAD(qx,x,Q1D)
3419  {
3420  double u = 0.0;
3421  double v = 0.0;
3422 
3423  // We treat x, y, z components separately for optimization specific to each.
3424  if (c == 0) // x component
3425  {
3426  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3427 
3428  for (int dz = 0; dz < D1Dz; ++dz)
3429  {
3430  const double wz = sBc[dz][qz];
3431  const double wDz = sGc[dz][qz];
3432 
3433  for (int dy = 0; dy < D1Dy; ++dy)
3434  {
3435  const double wy = sBc[dy][qy];
3436  const double wDy = sGc[dy][qy];
3437 
3438  for (int dx = 0; dx < D1Dx; ++dx)
3439  {
3440  const double wx = sX[dz][dy][dx] * sBo[dx][qx];
3441  u += wx * wDy * wz;
3442  v += wx * wy * wDz;
3443  }
3444  }
3445  }
3446 
3447  curl[qy][qx][1] += v; // (u_0)_{x_2}
3448  curl[qy][qx][2] -= u; // -(u_0)_{x_1}
3449  }
3450  else if (c == 1) // y component
3451  {
3452  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3453 
3454  for (int dz = 0; dz < D1Dz; ++dz)
3455  {
3456  const double wz = sBc[dz][qz];
3457  const double wDz = sGc[dz][qz];
3458 
3459  for (int dy = 0; dy < D1Dy; ++dy)
3460  {
3461  const double wy = sBo[dy][qy];
3462 
3463  for (int dx = 0; dx < D1Dx; ++dx)
3464  {
3465  const double t = sX[dz][dy][dx];
3466  const double wx = t * sBc[dx][qx];
3467  const double wDx = t * sGc[dx][qx];
3468 
3469  u += wDx * wy * wz;
3470  v += wx * wy * wDz;
3471  }
3472  }
3473  }
3474 
3475  curl[qy][qx][0] -= v; // -(u_1)_{x_2}
3476  curl[qy][qx][2] += u; // (u_1)_{x_0}
3477  }
3478  else // z component
3479  {
3480  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3481 
3482  for (int dz = 0; dz < D1Dz; ++dz)
3483  {
3484  const double wz = sBo[dz][qz];
3485 
3486  for (int dy = 0; dy < D1Dy; ++dy)
3487  {
3488  const double wy = sBc[dy][qy];
3489  const double wDy = sGc[dy][qy];
3490 
3491  for (int dx = 0; dx < D1Dx; ++dx)
3492  {
3493  const double t = sX[dz][dy][dx];
3494  const double wx = t * sBc[dx][qx];
3495  const double wDx = t * sGc[dx][qx];
3496 
3497  u += wDx * wy * wz;
3498  v += wx * wDy * wz;
3499  }
3500  }
3501  }
3502 
3503  curl[qy][qx][0] += v; // (u_2)_{x_1}
3504  curl[qy][qx][1] -= u; // -(u_2)_{x_0}
3505  }
3506  } // qx
3507  } // qy
3508  } // tidz == qz
3509 
3510  osc += D1Dx * D1Dy * D1Dz;
3511  MFEM_SYNC_THREAD;
3512  } // c
3513 
3514  double dxyz1 = 0.0;
3515  double dxyz2 = 0.0;
3516  double dxyz3 = 0.0;
3517 
3518  MFEM_FOREACH_THREAD(dz,z,D1D)
3519  {
3520  const double wcz = sBc[dz][qz];
3521  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
3522 
3523  MFEM_FOREACH_THREAD(dy,y,D1D)
3524  {
3525  MFEM_FOREACH_THREAD(dx,x,D1D)
3526  {
3527  for (int qy = 0; qy < Q1D; ++qy)
3528  {
3529  const double wcy = sBc[dy][qy];
3530  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
3531 
3532  for (int qx = 0; qx < Q1D; ++qx)
3533  {
3534  const double O1 = sop[0][qx][qy];
3535  const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
3536  const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
3537 
3538  const double c1 = O1 * curl[qy][qx][0];
3539  const double c2 = O2 * curl[qy][qx][1];
3540  const double c3 = O3 * curl[qy][qx][2];
3541 
3542  const double wcx = sBc[dx][qx];
3543 
3544  if (dx < D1D-1)
3545  {
3546  const double wx = sBo[dx][qx];
3547  dxyz1 += c1 * wx * wcy * wcz;
3548  }
3549 
3550  dxyz2 += c2 * wcx * wy * wcz;
3551  dxyz3 += c3 * wcx * wcy * wz;
3552  } // qx
3553  } // qy
3554  } // dx
3555  } // dy
3556  } // dz
3557 
3558  MFEM_SYNC_THREAD;
3559 
3560  MFEM_FOREACH_THREAD(dz,z,D1D)
3561  {
3562  MFEM_FOREACH_THREAD(dy,y,D1D)
3563  {
3564  MFEM_FOREACH_THREAD(dx,x,D1D)
3565  {
3566  if (dx < D1D-1)
3567  {
3568  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
3569  }
3570  if (dy < D1D-1)
3571  {
3572  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
3573  }
3574  if (dz < D1D-1)
3575  {
3576  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
3577  }
3578  }
3579  }
3580  }
3581  } // qz
3582  }); // end of element loop
3583 }
3584 
3585 // Apply to x corresponding to DOF's in H(curl) (trial), whose curl is
3586 // integrated against H(div) test functions corresponding to y.
3587 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
3588 static void PAHcurlHdivApply3D(const int D1D,
3589  const int D1Dtest,
3590  const int Q1D,
3591  const int NE,
3592  const Array<double> &bo,
3593  const Array<double> &bc,
3594  const Array<double> &bot,
3595  const Array<double> &bct,
3596  const Array<double> &gc,
3597  const Vector &pa_data,
3598  const Vector &x,
3599  Vector &y)
3600 {
3601  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
3602  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
3603  // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u}
3604  // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get
3605  // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w}
3606  // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3607  // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3608  // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3609 
3610  constexpr static int VDIM = 3;
3611 
3612  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
3613  auto Bc = Reshape(bc.Read(), Q1D, D1D);
3614  auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D);
3615  auto Bct = Reshape(bct.Read(), D1Dtest, Q1D);
3616  auto Gc = Reshape(gc.Read(), Q1D, D1D);
3617  auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE);
3618  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
3619  auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE);
3620 
3621  MFEM_FORALL(e, NE,
3622  {
3623  double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
3624  // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point.
3625 
3626  for (int qz = 0; qz < Q1D; ++qz)
3627  {
3628  for (int qy = 0; qy < Q1D; ++qy)
3629  {
3630  for (int qx = 0; qx < Q1D; ++qx)
3631  {
3632  for (int c = 0; c < VDIM; ++c)
3633  {
3634  curl[qz][qy][qx][c] = 0.0;
3635  }
3636  }
3637  }
3638  }
3639 
3640  // We treat x, y, z components separately for optimization specific to each.
3641 
3642  int osc = 0;
3643 
3644  {
3645  // x component
3646  const int D1Dz = D1D;
3647  const int D1Dy = D1D;
3648  const int D1Dx = D1D - 1;
3649 
3650  for (int dz = 0; dz < D1Dz; ++dz)
3651  {
3652  double gradXY[MAX_Q1D][MAX_Q1D][2];
3653  for (int qy = 0; qy < Q1D; ++qy)
3654  {
3655  for (int qx = 0; qx < Q1D; ++qx)
3656  {
3657  for (int d = 0; d < 2; ++d)
3658  {
3659  gradXY[qy][qx][d] = 0.0;
3660  }
3661  }
3662  }
3663 
3664  for (int dy = 0; dy < D1Dy; ++dy)
3665  {
3666  double massX[MAX_Q1D];
3667  for (int qx = 0; qx < Q1D; ++qx)
3668  {
3669  massX[qx] = 0.0;
3670  }
3671 
3672  for (int dx = 0; dx < D1Dx; ++dx)
3673  {
3674  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3675  for (int qx = 0; qx < Q1D; ++qx)
3676  {
3677  massX[qx] += t * Bo(qx,dx);
3678  }
3679  }
3680 
3681  for (int qy = 0; qy < Q1D; ++qy)
3682  {
3683  const double wy = Bc(qy,dy);
3684  const double wDy = Gc(qy,dy);
3685  for (int qx = 0; qx < Q1D; ++qx)
3686  {
3687  const double wx = massX[qx];
3688  gradXY[qy][qx][0] += wx * wDy;
3689  gradXY[qy][qx][1] += wx * wy;
3690  }
3691  }
3692  }
3693 
3694  for (int qz = 0; qz < Q1D; ++qz)
3695  {
3696  const double wz = Bc(qz,dz);
3697  const double wDz = Gc(qz,dz);
3698  for (int qy = 0; qy < Q1D; ++qy)
3699  {
3700  for (int qx = 0; qx < Q1D; ++qx)
3701  {
3702  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
3703  curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2}
3704  curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1}
3705  }
3706  }
3707  }
3708  }
3709 
3710  osc += D1Dx * D1Dy * D1Dz;
3711  }
3712 
3713  {
3714  // y component
3715  const int D1Dz = D1D;
3716  const int D1Dy = D1D - 1;
3717  const int D1Dx = D1D;
3718 
3719  for (int dz = 0; dz < D1Dz; ++dz)
3720  {
3721  double gradXY[MAX_Q1D][MAX_Q1D][2];
3722  for (int qy = 0; qy < Q1D; ++qy)
3723  {
3724  for (int qx = 0; qx < Q1D; ++qx)
3725  {
3726  for (int d = 0; d < 2; ++d)
3727  {
3728  gradXY[qy][qx][d] = 0.0;
3729  }
3730  }
3731  }
3732 
3733  for (int dx = 0; dx < D1Dx; ++dx)
3734  {
3735  double massY[MAX_Q1D];
3736  for (int qy = 0; qy < Q1D; ++qy)
3737  {
3738  massY[qy] = 0.0;
3739  }
3740 
3741  for (int dy = 0; dy < D1Dy; ++dy)
3742  {
3743  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3744  for (int qy = 0; qy < Q1D; ++qy)
3745  {
3746  massY[qy] += t * Bo(qy,dy);
3747  }
3748  }
3749 
3750  for (int qx = 0; qx < Q1D; ++qx)
3751  {
3752  const double wx = Bc(qx,dx);
3753  const double wDx = Gc(qx,dx);
3754  for (int qy = 0; qy < Q1D; ++qy)
3755  {
3756  const double wy = massY[qy];
3757  gradXY[qy][qx][0] += wDx * wy;
3758  gradXY[qy][qx][1] += wx * wy;
3759  }
3760  }
3761  }
3762 
3763  for (int qz = 0; qz < Q1D; ++qz)
3764  {
3765  const double wz = Bc(qz,dz);
3766  const double wDz = Gc(qz,dz);
3767  for (int qy = 0; qy < Q1D; ++qy)
3768  {
3769  for (int qx = 0; qx < Q1D; ++qx)
3770  {
3771  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
3772  curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2}
3773  curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0}
3774  }
3775  }
3776  }
3777  }
3778 
3779  osc += D1Dx * D1Dy * D1Dz;
3780  }
3781 
3782  {
3783  // z component
3784  const int D1Dz = D1D - 1;
3785  const int D1Dy = D1D;
3786  const int D1Dx = D1D;
3787 
3788  for (int dx = 0; dx < D1Dx; ++dx)
3789  {
3790  double gradYZ[MAX_Q1D][MAX_Q1D][2];
3791  for (int qz = 0; qz < Q1D; ++qz)
3792  {
3793  for (int qy = 0; qy < Q1D; ++qy)
3794  {
3795  for (int d = 0; d < 2; ++d)
3796  {
3797  gradYZ[qz][qy][d] = 0.0;
3798  }
3799  }
3800  }
3801 
3802  for (int dy = 0; dy < D1Dy; ++dy)
3803  {
3804  double massZ[MAX_Q1D];
3805  for (int qz = 0; qz < Q1D; ++qz)
3806  {
3807  massZ[qz] = 0.0;
3808  }
3809 
3810  for (int dz = 0; dz < D1Dz; ++dz)
3811  {
3812  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
3813  for (int qz = 0; qz < Q1D; ++qz)
3814  {
3815  massZ[qz] += t * Bo(qz,dz);
3816  }
3817  }
3818 
3819  for (int qy = 0; qy < Q1D; ++qy)
3820  {
3821  const double wy = Bc(qy,dy);
3822  const double wDy = Gc(qy,dy);
3823  for (int qz = 0; qz < Q1D; ++qz)
3824  {
3825  const double wz = massZ[qz];
3826  gradYZ[qz][qy][0] += wz * wy;
3827  gradYZ[qz][qy][1] += wz * wDy;
3828  }
3829  }
3830  }
3831 
3832  for (int qx = 0; qx < Q1D; ++qx)
3833  {
3834  const double wx = Bc(qx,dx);
3835  const double wDx = Gc(qx,dx);
3836 
3837  for (int qy = 0; qy < Q1D; ++qy)
3838  {
3839  for (int qz = 0; qz < Q1D; ++qz)
3840  {
3841  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
3842  curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1}
3843  curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0}
3844  }
3845  }
3846  }
3847  }
3848  }
3849 
3850  // Apply D operator.
3851  for (int qz = 0; qz < Q1D; ++qz)
3852  {
3853  for (int qy = 0; qy < Q1D; ++qy)
3854  {
3855  for (int qx = 0; qx < Q1D; ++qx)
3856  {
3857  const double O11 = op(qx,qy,qz,0,e);
3858  const double O12 = op(qx,qy,qz,1,e);
3859  const double O13 = op(qx,qy,qz,2,e);
3860  const double O22 = op(qx,qy,qz,3,e);
3861  const double O23 = op(qx,qy,qz,4,e);
3862  const double O33 = op(qx,qy,qz,5,e);
3863 
3864  const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) +
3865  (O13 * curl[qz][qy][qx][2]);
3866  const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) +
3867  (O23 * curl[qz][qy][qx][2]);
3868  const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) +
3869  (O33 * curl[qz][qy][qx][2]);
3870 
3871  curl[qz][qy][qx][0] = c1;
3872  curl[qz][qy][qx][1] = c2;
3873  curl[qz][qy][qx][2] = c3;
3874  }
3875  }
3876  }
3877 
3878  for (int qz = 0; qz < Q1D; ++qz)
3879  {
3880  double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D
3881 
3882  osc = 0;
3883 
3884  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
3885  {
3886  const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1;
3887  const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1;
3888  const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1;
3889 
3890  for (int dy = 0; dy < D1Dy; ++dy)
3891  {
3892  for (int dx = 0; dx < D1Dx; ++dx)
3893  {
3894  massXY[dy][dx] = 0;
3895  }
3896  }
3897  for (int qy = 0; qy < Q1D; ++qy)
3898  {
3899  double massX[HCURL_MAX_D1D];
3900  for (int dx = 0; dx < D1Dx; ++dx)
3901  {
3902  massX[dx] = 0;
3903  }
3904  for (int qx = 0; qx < Q1D; ++qx)
3905  {
3906  for (int dx = 0; dx < D1Dx; ++dx)
3907  {
3908  massX[dx] += curl[qz][qy][qx][c] *
3909  ((c == 0) ? Bct(dx,qx) : Bot(dx,qx));
3910  }
3911  }
3912  for (int dy = 0; dy < D1Dy; ++dy)
3913  {
3914  const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy);
3915  for (int dx = 0; dx < D1Dx; ++dx)
3916  {
3917  massXY[dy][dx] += massX[dx] * wy;
3918  }
3919  }
3920  }
3921 
3922  for (int dz = 0; dz < D1Dz; ++dz)
3923  {
3924  const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz);
3925  for (int dy = 0; dy < D1Dy; ++dy)
3926  {
3927  for (int dx = 0; dx < D1Dx; ++dx)
3928  {
3929  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
3930  massXY[dy][dx] * wz;
3931  }
3932  }
3933  }
3934 
3935  osc += D1Dx * D1Dy * D1Dz;
3936  } // loop c
3937  } // loop qz
3938  }); // end of element loop
3939 }
3940 
3941 void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
3942 {
3943  if (testType == mfem::FiniteElement::CURL &&
3944  trialType == mfem::FiniteElement::CURL && dim == 3)
3945  {
3946  if (Device::Allows(Backend::DEVICE_MASK))
3947  {
3948  const int ID = (dofs1D << 4) | quad1D;
3949  switch (ID)
3950  {
3951  case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, coeffDim, ne,
3952  mapsO->B, mapsC->B,
3953  mapsC->G, pa_data, x, y);
3954  case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, coeffDim, ne,
3955  mapsO->B, mapsC->B,
3956  mapsC->G, pa_data, x, y);
3957  case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, coeffDim, ne,
3958  mapsO->B, mapsC->B,
3959  mapsC->G, pa_data, x, y);
3960  case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, coeffDim, ne,
3961  mapsO->B, mapsC->B,
3962  mapsC->G, pa_data, x, y);
3963  default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B,
3964  mapsC->B,
3965  mapsC->G, pa_data, x, y);
3966  }
3967  }
3968  else
3969  PAHcurlL2Apply3D(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
3970  mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y);
3971  }
3972  else if (testType == mfem::FiniteElement::DIV &&
3973  trialType == mfem::FiniteElement::CURL && dim == 3)
3974  PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B,
3975  mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G,
3976  pa_data, x, y);
3977  else
3978  {
3979  MFEM_ABORT("Unsupported dimension or space!");
3980  }
3981 }
3982 
3983 void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace
3984  &trial_fes,
3985  const FiniteElementSpace &test_fes)
3986 {
3987  // Assumes tensor-product elements, with vector test and trial spaces.
3988  Mesh *mesh = trial_fes.GetMesh();
3989  const FiniteElement *trial_fel = trial_fes.GetFE(0);
3990  const FiniteElement *test_fel = test_fes.GetFE(0);
3991 
3992  const VectorTensorFiniteElement *trial_el =
3993  dynamic_cast<const VectorTensorFiniteElement*>(trial_fel);
3994  MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!");
3995 
3996  const VectorTensorFiniteElement *test_el =
3997  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
3998  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
3999 
4000  const IntegrationRule *ir
4001  = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el,
4002  *mesh->GetElementTransformation(0));
4003  const int dims = trial_el->GetDim();
4004  MFEM_VERIFY(dims == 3, "");
4005 
4006  const int nq = ir->GetNPoints();
4007  dim = mesh->Dimension();
4008  MFEM_VERIFY(dim == 3, "");
4009 
4010  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
4011 
4012  ne = trial_fes.GetNE();
4013  geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS);
4014  mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR);
4015  mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR);
4016  dofs1D = mapsC->ndof;
4017  quad1D = mapsC->nqpt;
4018 
4019  MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, "");
4020 
4021  coeffDim = DQ ? 3 : 1;
4022 
4023  pa_data.SetSize(coeffDim * nq * ne, Device::GetMemoryType());
4024 
4025  Vector coeff(coeffDim * nq * ne);
4026  coeff = 1.0;
4027  auto coeffh = Reshape(coeff.HostWrite(), coeffDim, nq, ne);
4028  if (Q || DQ)
4029  {
4030  Vector V(coeffDim);
4031  if (DQ)
4032  {
4033  MFEM_VERIFY(DQ->GetVDim() == coeffDim, "");
4034  }
4035 
4036  for (int e=0; e<ne; ++e)
4037  {
4038  ElementTransformation *tr = mesh->GetElementTransformation(e);
4039 
4040  for (int p=0; p<nq; ++p)
4041  {
4042  if (DQ)
4043  {
4044  DQ->Eval(V, *tr, ir->IntPoint(p));
4045  for (int i=0; i<coeffDim; ++i)
4046  {
4047  coeffh(i, p, e) = V[i];
4048  }
4049  }
4050  else
4051  {
4052  coeffh(0, p, e) = Q->Eval(*tr, ir->IntPoint(p));
4053  }
4054  }
4055  }
4056  }
4057 
4058  testType = test_el->GetDerivType();
4059  trialType = trial_el->GetDerivType();
4060 
4061  if (trialType == mfem::FiniteElement::CURL && dim == 3)
4062  {
4063  PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data);
4064  }
4065  else
4066  {
4067  MFEM_ABORT("Unknown kernel.");
4068  }
4069 }
4070 
4071 // Apply to x corresponding to DOF's in H(curl) (trial), integrated against curl
4072 // of H(curl) test functions corresponding to y.
4073 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4074 static void PAHcurlL2Apply3DTranspose(const int D1D,
4075  const int Q1D,
4076  const int coeffDim,
4077  const int NE,
4078  const Array<double> &bo,
4079  const Array<double> &bc,
4080  const Array<double> &bot,
4081  const Array<double> &bct,
4082  const Array<double> &gct,
4083  const Vector &pa_data,
4084  const Vector &x,
4085  Vector &y)
4086 {
4087  // See PAHcurlL2Apply3D for comments.
4088 
4089  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4090  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4091 
4092  constexpr static int VDIM = 3;
4093 
4094  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4095  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4096  auto Bot = Reshape(bot.Read(), D1D-1, Q1D);
4097  auto Bct = Reshape(bct.Read(), D1D, Q1D);
4098  auto Gct = Reshape(gct.Read(), D1D, Q1D);
4099  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4100  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4101  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4102 
4103  MFEM_FORALL(e, NE,
4104  {
4105  double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
4106 
4107  for (int qz = 0; qz < Q1D; ++qz)
4108  {
4109  for (int qy = 0; qy < Q1D; ++qy)
4110  {
4111  for (int qx = 0; qx < Q1D; ++qx)
4112  {
4113  for (int c = 0; c < VDIM; ++c)
4114  {
4115  mass[qz][qy][qx][c] = 0.0;
4116  }
4117  }
4118  }
4119  }
4120 
4121  int osc = 0;
4122 
4123  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4124  {
4125  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4126  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4127  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4128 
4129  for (int dz = 0; dz < D1Dz; ++dz)
4130  {
4131  double massXY[MAX_Q1D][MAX_Q1D];
4132  for (int qy = 0; qy < Q1D; ++qy)
4133  {
4134  for (int qx = 0; qx < Q1D; ++qx)
4135  {
4136  massXY[qy][qx] = 0.0;
4137  }
4138  }
4139 
4140  for (int dy = 0; dy < D1Dy; ++dy)
4141  {
4142  double massX[MAX_Q1D];
4143  for (int qx = 0; qx < Q1D; ++qx)
4144  {
4145  massX[qx] = 0.0;
4146  }
4147 
4148  for (int dx = 0; dx < D1Dx; ++dx)
4149  {
4150  const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4151  for (int qx = 0; qx < Q1D; ++qx)
4152  {
4153  massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx));
4154  }
4155  }
4156 
4157  for (int qy = 0; qy < Q1D; ++qy)
4158  {
4159  const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy);
4160  for (int qx = 0; qx < Q1D; ++qx)
4161  {
4162  const double wx = massX[qx];
4163  massXY[qy][qx] += wx * wy;
4164  }
4165  }
4166  }
4167 
4168  for (int qz = 0; qz < Q1D; ++qz)
4169  {
4170  const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz);
4171  for (int qy = 0; qy < Q1D; ++qy)
4172  {
4173  for (int qx = 0; qx < Q1D; ++qx)
4174  {
4175  mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
4176  }
4177  }
4178  }
4179  }
4180 
4181  osc += D1Dx * D1Dy * D1Dz;
4182  } // loop (c) over components
4183 
4184  // Apply D operator.
4185  for (int qz = 0; qz < Q1D; ++qz)
4186  {
4187  for (int qy = 0; qy < Q1D; ++qy)
4188  {
4189  for (int qx = 0; qx < Q1D; ++qx)
4190  {
4191  for (int c=0; c<VDIM; ++c)
4192  {
4193  mass[qz][qy][qx][c] *= op(coeffDim == 3 ? c : 0, qx,qy,qz,e);
4194  }
4195  }
4196  }
4197  }
4198 
4199  // x component
4200  osc = 0;
4201  {
4202  const int D1Dz = D1D;
4203  const int D1Dy = D1D;
4204  const int D1Dx = D1D - 1;
4205 
4206  for (int qz = 0; qz < Q1D; ++qz)
4207  {
4208  double gradXY12[MAX_D1D][MAX_D1D];
4209  double gradXY21[MAX_D1D][MAX_D1D];
4210 
4211  for (int dy = 0; dy < D1Dy; ++dy)
4212  {
4213  for (int dx = 0; dx < D1Dx; ++dx)
4214  {
4215  gradXY12[dy][dx] = 0.0;
4216  gradXY21[dy][dx] = 0.0;
4217  }
4218  }
4219  for (int qy = 0; qy < Q1D; ++qy)
4220  {
4221  double massX[MAX_D1D][2];
4222  for (int dx = 0; dx < D1Dx; ++dx)
4223  {
4224  for (int n = 0; n < 2; ++n)
4225  {
4226  massX[dx][n] = 0.0;
4227  }
4228  }
4229  for (int qx = 0; qx < Q1D; ++qx)
4230  {
4231  for (int dx = 0; dx < D1Dx; ++dx)
4232  {
4233  const double wx = Bot(dx,qx);
4234 
4235  massX[dx][0] += wx * mass[qz][qy][qx][1];
4236  massX[dx][1] += wx * mass[qz][qy][qx][2];
4237  }
4238  }
4239  for (int dy = 0; dy < D1Dy; ++dy)
4240  {
4241  const double wy = Bct(dy,qy);
4242  const double wDy = Gct(dy,qy);
4243 
4244  for (int dx = 0; dx < D1Dx; ++dx)
4245  {
4246  gradXY21[dy][dx] += massX[dx][0] * wy;
4247  gradXY12[dy][dx] += massX[dx][1] * wDy;
4248  }
4249  }
4250  }
4251 
4252  for (int dz = 0; dz < D1Dz; ++dz)
4253  {
4254  const double wz = Bct(dz,qz);
4255  const double wDz = Gct(dz,qz);
4256  for (int dy = 0; dy < D1Dy; ++dy)
4257  {
4258  for (int dx = 0; dx < D1Dx; ++dx)
4259  {
4260  // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}]
4261  // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2
4262  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4263  e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz);
4264  }
4265  }
4266  }
4267  } // loop qz
4268 
4269  osc += D1Dx * D1Dy * D1Dz;
4270  }
4271 
4272  // y component
4273  {
4274  const int D1Dz = D1D;
4275  const int D1Dy = D1D - 1;
4276  const int D1Dx = D1D;
4277 
4278  for (int qz = 0; qz < Q1D; ++qz)
4279  {
4280  double gradXY02[MAX_D1D][MAX_D1D];
4281  double gradXY20[MAX_D1D][MAX_D1D];
4282 
4283  for (int dy = 0; dy < D1Dy; ++dy)
4284  {
4285  for (int dx = 0; dx < D1Dx; ++dx)
4286  {
4287  gradXY02[dy][dx] = 0.0;
4288  gradXY20[dy][dx] = 0.0;
4289  }
4290  }
4291  for (int qx = 0; qx < Q1D; ++qx)
4292  {
4293  double massY[MAX_D1D][2];
4294  for (int dy = 0; dy < D1Dy; ++dy)
4295  {
4296  massY[dy][0] = 0.0;
4297  massY[dy][1] = 0.0;
4298  }
4299  for (int qy = 0; qy < Q1D; ++qy)
4300  {
4301  for (int dy = 0; dy < D1Dy; ++dy)
4302  {
4303  const double wy = Bot(dy,qy);
4304 
4305  massY[dy][0] += wy * mass[qz][qy][qx][2];
4306  massY[dy][1] += wy * mass[qz][qy][qx][0];
4307  }
4308  }
4309  for (int dx = 0; dx < D1Dx; ++dx)
4310  {
4311  const double wx = Bct(dx,qx);
4312  const double wDx = Gct(dx,qx);
4313 
4314  for (int dy = 0; dy < D1Dy; ++dy)
4315  {
4316  gradXY02[dy][dx] += massY[dy][0] * wDx;
4317  gradXY20[dy][dx] += massY[dy][1] * wx;
4318  }
4319  }
4320  }
4321 
4322  for (int dz = 0; dz < D1Dz; ++dz)
4323  {
4324  const double wz = Bct(dz,qz);
4325  const double wDz = Gct(dz,qz);
4326  for (int dy = 0; dy < D1Dy; ++dy)
4327  {
4328  for (int dx = 0; dx < D1Dx; ++dx)
4329  {
4330  // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}]
4331  // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2
4332  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4333  e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz);
4334  }
4335  }
4336  }
4337  } // loop qz
4338 
4339  osc += D1Dx * D1Dy * D1Dz;
4340  }
4341 
4342  // z component
4343  {
4344  const int D1Dz = D1D - 1;
4345  const int D1Dy = D1D;
4346  const int D1Dx = D1D;
4347 
4348  for (int qx = 0; qx < Q1D; ++qx)
4349  {
4350  double gradYZ01[MAX_D1D][MAX_D1D];
4351  double gradYZ10[MAX_D1D][MAX_D1D];
4352 
4353  for (int dy = 0; dy < D1Dy; ++dy)
4354  {
4355  for (int dz = 0; dz < D1Dz; ++dz)
4356  {
4357  gradYZ01[dz][dy] = 0.0;
4358  gradYZ10[dz][dy] = 0.0;
4359  }
4360  }
4361  for (int qy = 0; qy < Q1D; ++qy)
4362  {
4363  double massZ[MAX_D1D][2];
4364  for (int dz = 0; dz < D1Dz; ++dz)
4365  {
4366  for (int n = 0; n < 2; ++n)
4367  {
4368  massZ[dz][n] = 0.0;
4369  }
4370  }
4371  for (int qz = 0; qz < Q1D; ++qz)
4372  {
4373  for (int dz = 0; dz < D1Dz; ++dz)
4374  {
4375  const double wz = Bot(dz,qz);
4376 
4377  massZ[dz][0] += wz * mass[qz][qy][qx][0];
4378  massZ[dz][1] += wz * mass[qz][qy][qx][1];
4379  }
4380  }
4381  for (int dy = 0; dy < D1Dy; ++dy)
4382  {
4383  const double wy = Bct(dy,qy);
4384  const double wDy = Gct(dy,qy);
4385 
4386  for (int dz = 0; dz < D1Dz; ++dz)
4387  {
4388  gradYZ01[dz][dy] += wy * massZ[dz][1];
4389  gradYZ10[dz][dy] += wDy * massZ[dz][0];
4390  }
4391  }
4392  }
4393 
4394  for (int dx = 0; dx < D1Dx; ++dx)
4395  {
4396  const double wx = Bct(dx,qx);
4397  const double wDx = Gct(dx,qx);
4398 
4399  for (int dy = 0; dy < D1Dy; ++dy)
4400  {
4401  for (int dz = 0; dz < D1Dz; ++dz)
4402  {
4403  // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0]
4404  // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1
4405  Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc,
4406  e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx);
4407  }
4408  }
4409  }
4410  } // loop qx
4411  }
4412  });
4413 }
4414 
4415 template<int MAX_D1D = HCURL_MAX_D1D, int MAX_Q1D = HCURL_MAX_Q1D>
4416 static void SmemPAHcurlL2Apply3DTranspose(const int D1D,
4417  const int Q1D,
4418  const int coeffDim,
4419  const int NE,
4420  const Array<double> &bo,
4421  const Array<double> &bc,
4422  const Array<double> &gc,
4423  const Vector &pa_data,
4424  const Vector &x,
4425  Vector &y)
4426 {
4427  MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D");
4428  MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D");
4429 
4430  auto Bo = Reshape(bo.Read(), Q1D, D1D-1);
4431  auto Bc = Reshape(bc.Read(), Q1D, D1D);
4432  auto Gc = Reshape(gc.Read(), Q1D, D1D);
4433  auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
4434  auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE);
4435  auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE);
4436 
4437  MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
4438  {
4439  constexpr int VDIM = 3;
4440  constexpr int maxCoeffDim = 3;
4441 
4442  MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D];
4443  MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D];
4444  MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D];
4445 
4446  double opc[maxCoeffDim];
4447  MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D];
4448  MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3];
4449 
4450  MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D];
4451 
4452  MFEM_FOREACH_THREAD(qx,x,Q1D)
4453  {
4454  MFEM_FOREACH_THREAD(qy,y,Q1D)
4455  {
4456  MFEM_FOREACH_THREAD(qz,z,Q1D)
4457  {
4458  for (int i=0; i<coeffDim; ++i)
4459  {
4460  opc[i] = op(i,qx,qy,qz,e);
4461  }
4462  }
4463  }
4464  }
4465 
4466  const int tidx = MFEM_THREAD_ID(x);
4467  const int tidy = MFEM_THREAD_ID(y);
4468  const int tidz = MFEM_THREAD_ID(z);
4469 
4470  if (tidz == 0)
4471  {
4472  MFEM_FOREACH_THREAD(d,y,D1D)
4473  {
4474  MFEM_FOREACH_THREAD(q,x,Q1D)
4475  {
4476  sBc[d][q] = Bc(q,d);
4477  sGc[d][q] = Gc(q,d);
4478  if (d < D1D-1)
4479  {
4480  sBo[d][q] = Bo(q,d);
4481  }
4482  }
4483  }
4484  }
4485  MFEM_SYNC_THREAD;
4486 
4487  for (int qz=0; qz < Q1D; ++qz)
4488  {
4489  if (tidz == qz)
4490  {
4491  MFEM_FOREACH_THREAD(qy,y,Q1D)
4492  {
4493  MFEM_FOREACH_THREAD(qx,x,Q1D)
4494  {
4495  for (int i=0; i<3; ++i)
4496  {
4497  mass[qy][qx][i] = 0.0;
4498  }
4499  }
4500  }
4501  }
4502 
4503  int osc = 0;
4504  for (int c = 0; c < VDIM; ++c) // loop over x, y, z components
4505  {
4506  const int D1Dz = (c == 2) ? D1D - 1 : D1D;
4507  const int D1Dy = (c == 1) ? D1D - 1 : D1D;
4508  const int D1Dx = (c == 0) ? D1D - 1 : D1D;
4509 
4510  MFEM_FOREACH_THREAD(dz,z,D1Dz)
4511  {
4512  MFEM_FOREACH_THREAD(dy,y,D1Dy)
4513  {
4514  MFEM_FOREACH_THREAD(dx,x,D1Dx)
4515  {
4516  sX[dz][dy][dx] = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
4517  }
4518  }
4519  }
4520  MFEM_SYNC_THREAD;
4521 
4522  if (tidz == qz)
4523  {
4524  if (c == 0)
4525  {
4526  for (int i=0; i<coeffDim; ++i)
4527  {
4528  sop[i][tidx][tidy] = opc[i];
4529  }
4530  }
4531 
4532  MFEM_FOREACH_THREAD(qy,y,Q1D)
4533  {
4534  MFEM_FOREACH_THREAD(qx,x,Q1D)
4535  {
4536  double u = 0.0;
4537 
4538  for (int dz = 0; dz < D1Dz; ++dz)
4539  {
4540  const double wz = (c == 2) ? sBo[dz][qz] : sBc[dz][qz];
4541 
4542  for (int dy = 0; dy < D1Dy; ++dy)
4543  {
4544  const double wy = (c == 1) ? sBo[dy][qy] : sBc[dy][qy];
4545 
4546  for (int dx = 0; dx < D1Dx; ++dx)
4547  {
4548  const double wx = sX[dz][dy][dx] * ((c == 0) ? sBo[dx][qx] : sBc[dx][qx]);
4549  u += wx * wy * wz;
4550  }
4551  }
4552  }
4553 
4554  mass[qy][qx][c] += u;
4555  } // qx
4556  } // qy
4557  } // tidz == qz
4558 
4559  osc += D1Dx * D1Dy * D1Dz;
4560  MFEM_SYNC_THREAD;
4561  } // c
4562 
4563  double dxyz1 = 0.0;
4564  double dxyz2 = 0.0;
4565  double dxyz3 = 0.0;
4566 
4567  MFEM_FOREACH_THREAD(dz,z,D1D)
4568  {
4569  const double wcz = sBc[dz][qz];
4570  const double wcDz = sGc[dz][qz];
4571  const double wz = (dz < D1D-1) ? sBo[dz][qz] : 0.0;
4572 
4573  MFEM_FOREACH_THREAD(dy,y,D1D)
4574  {
4575  MFEM_FOREACH_THREAD(dx,x,D1D)
4576  {
4577  for (int qy = 0; qy < Q1D; ++qy)
4578  {
4579  const double wcy = sBc[dy][qy];
4580  const double wcDy = sGc[dy][qy];
4581  const double wy = (dy < D1D-1) ? sBo[dy][qy] : 0.0;
4582 
4583  for (int qx = 0; qx < Q1D; ++qx)
4584  {
4585  const double O1 = sop[0][qx][qy];
4586  const double O2 = (coeffDim == 3) ? sop[1][qx][qy] : O1;
4587  const double O3 = (coeffDim == 3) ? sop[2][qx][qy] : O1;
4588 
4589  const double c1 = O1 * mass[qy][qx][0];
4590  const double c2 = O2 * mass[qy][qx][1];
4591  const double c3 = O3 * mass[qy][qx][2];
4592 
4593  const double wcx = sBc[dx][qx];
4594  const double wDx = sGc[dx][qx];
4595 
4596  if (dx < D1D-1)
4597  {
4598  const double wx = sBo[dx][qx];
4599  dxyz1 += (wx * c2 * wcy * wcDz) - (wx * c3 * wcDy * wcz);
4600  }
4601 
4602  dxyz2 += (-wy * c1 * wcx * wcDz) + (wy * c3 * wDx * wcz);
4603 
4604  dxyz3 += (wcDy * wz * c1 * wcx) - (wcy * wz * c2 * wDx);
4605  } // qx
4606  } // qy
4607  } // dx
4608  } // dy
4609  } // dz
4610 
4611  MFEM_SYNC_THREAD;
4612 
4613  MFEM_FOREACH_THREAD(dz,z,D1D)
4614  {
4615  MFEM_FOREACH_THREAD(dy,y,D1D)
4616  {
4617  MFEM_FOREACH_THREAD(dx,x,D1D)
4618  {
4619  if (dx < D1D-1)
4620  {
4621  Y(dx + ((dy + (dz * D1D)) * (D1D-1)), e) += dxyz1;
4622  }
4623  if (dy < D1D-1)
4624  {
4625  Y(dx + ((dy + (dz * (D1D-1))) * D1D) + ((D1D-1)*D1D*D1D), e) += dxyz2;
4626  }
4627  if (dz < D1D-1)
4628  {
4629  Y(dx + ((dy + (dz * D1D)) * D1D) + (2*(D1D-1)*D1D*D1D), e) += dxyz3;
4630  }
4631  }
4632  }
4633  }
4634  } // qz
4635  }); // end of element loop
4636 }
4637 
4638 void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const
4639 {
4640  if (testType == mfem::FiniteElement::CURL &&
4641  trialType == mfem::FiniteElement::CURL && dim == 3)
4642  {
4643  if (Device::Allows(Backend::DEVICE_MASK))
4644  {
4645  const int ID = (dofs1D << 4) | quad1D;
4646  switch (ID)
4647  {
4648  case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, coeffDim,
4649  ne, mapsO->B, mapsC->B,
4650  mapsC->G, pa_data, x, y);
4651  case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, coeffDim,
4652  ne, mapsO->B, mapsC->B,
4653  mapsC->G, pa_data, x, y);
4654  case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, coeffDim,
4655  ne, mapsO->B, mapsC->B,
4656  mapsC->G, pa_data, x, y);
4657  case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, coeffDim,
4658  ne, mapsO->B, mapsC->B,
4659  mapsC->G, pa_data, x, y);
4660  default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne,
4661  mapsO->B, mapsC->B,
4662  mapsC->G, pa_data, x, y);
4663  }
4664  }
4665  else
4666  PAHcurlL2Apply3DTranspose(dofs1D, quad1D, coeffDim, ne, mapsO->B, mapsC->B,
4667  mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y);
4668  }
4669  else
4670  {
4671  MFEM_ABORT("Unsupported dimension or space!");
4672  }
4673 }
4674 
4675 // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient
4676 // to get a dof in H(curl) (range). You can think of the range as the "test" space
4677 // and the domain as the "trial" space, but there's no integration.
4678 static void PAHcurlApplyGradient2D(const int c_dofs1D,
4679  const int o_dofs1D,
4680  const int NE,
4681  const Array<double> &B_,
4682  const Array<double> &G_,
4683  const Vector &x_,
4684  Vector &y_)
4685 {
4686  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4687  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4688 
4689  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
4690  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
4691 
4692  constexpr static int MAX_D1D = HCURL_MAX_D1D;
4693  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4694 
4695  MFEM_FORALL(e, NE,
4696  {
4697  double w[MAX_D1D][MAX_D1D];
4698 
4699  // horizontal part
4700  for (int dx = 0; dx < c_dofs1D; ++dx)
4701  {
4702  for (int ey = 0; ey < c_dofs1D; ++ey)
4703  {
4704  w[dx][ey] = 0.0;
4705  for (int dy = 0; dy < c_dofs1D; ++dy)
4706  {
4707  w[dx][ey] += B(ey, dy) * x(dx, dy, e);
4708  }
4709  }
4710  }
4711 
4712  for (int ey = 0; ey < c_dofs1D; ++ey)
4713  {
4714  for (int ex = 0; ex < o_dofs1D; ++ex)
4715  {
4716  double s = 0.0;
4717  for (int dx = 0; dx < c_dofs1D; ++dx)
4718  {
4719  s += G(ex, dx) * w[dx][ey];
4720  }
4721  const int local_index = ey*o_dofs1D + ex;
4722  y(local_index, e) += s;
4723  }
4724  }
4725 
4726  // vertical part
4727  for (int dx = 0; dx < c_dofs1D; ++dx)
4728  {
4729  for (int ey = 0; ey < o_dofs1D; ++ey)
4730  {
4731  w[dx][ey] = 0.0;
4732  for (int dy = 0; dy < c_dofs1D; ++dy)
4733  {
4734  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
4735  }
4736  }
4737  }
4738 
4739  for (int ey = 0; ey < o_dofs1D; ++ey)
4740  {
4741  for (int ex = 0; ex < c_dofs1D; ++ex)
4742  {
4743  double s = 0.0;
4744  for (int dx = 0; dx < c_dofs1D; ++dx)
4745  {
4746  s += B(ex, dx) * w[dx][ey];
4747  }
4748  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4749  y(local_index, e) += s;
4750  }
4751  }
4752  });
4753 }
4754 
4755 // Specialization of PAHcurlApplyGradient2D to the case where B is identity
4756 static void PAHcurlApplyGradient2DBId(const int c_dofs1D,
4757  const int o_dofs1D,
4758  const int NE,
4759  const Array<double> &G_,
4760  const Vector &x_,
4761  Vector &y_)
4762 {
4763  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4764 
4765  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE);
4766  auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE);
4767 
4768  constexpr static int MAX_D1D = HCURL_MAX_D1D;
4769  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4770 
4771  MFEM_FORALL(e, NE,
4772  {
4773  double w[MAX_D1D][MAX_D1D];
4774 
4775  // horizontal part
4776  for (int dx = 0; dx < c_dofs1D; ++dx)
4777  {
4778  for (int ey = 0; ey < c_dofs1D; ++ey)
4779  {
4780  const int dy = ey;
4781  w[dx][ey] = x(dx, dy, e);
4782  }
4783  }
4784 
4785  for (int ey = 0; ey < c_dofs1D; ++ey)
4786  {
4787  for (int ex = 0; ex < o_dofs1D; ++ex)
4788  {
4789  double s = 0.0;
4790  for (int dx = 0; dx < c_dofs1D; ++dx)
4791  {
4792  s += G(ex, dx) * w[dx][ey];
4793  }
4794  const int local_index = ey*o_dofs1D + ex;
4795  y(local_index, e) += s;
4796  }
4797  }
4798 
4799  // vertical part
4800  for (int dx = 0; dx < c_dofs1D; ++dx)
4801  {
4802  for (int ey = 0; ey < o_dofs1D; ++ey)
4803  {
4804  w[dx][ey] = 0.0;
4805  for (int dy = 0; dy < c_dofs1D; ++dy)
4806  {
4807  w[dx][ey] += G(ey, dy) * x(dx, dy, e);
4808  }
4809  }
4810  }
4811 
4812  for (int ey = 0; ey < o_dofs1D; ++ey)
4813  {
4814  for (int ex = 0; ex < c_dofs1D; ++ex)
4815  {
4816  const int dx = ex;
4817  const double s = w[dx][ey];
4818  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4819  y(local_index, e) += s;
4820  }
4821  }
4822  });
4823 }
4824 
4825 static void PAHcurlApplyGradientTranspose2D(
4826  const int c_dofs1D, const int o_dofs1D, const int NE,
4827  const Array<double> &B_, const Array<double> &G_,
4828  const Vector &x_, Vector &y_)
4829 {
4830  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4831  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4832 
4833  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
4834  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
4835 
4836  constexpr static int MAX_D1D = HCURL_MAX_D1D;
4837  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4838 
4839  MFEM_FORALL(e, NE,
4840  {
4841  double w[MAX_D1D][MAX_D1D];
4842 
4843  // horizontal part (open x, closed y)
4844  for (int dy = 0; dy < c_dofs1D; ++dy)
4845  {
4846  for (int ex = 0; ex < o_dofs1D; ++ex)
4847  {
4848  w[dy][ex] = 0.0;
4849  for (int ey = 0; ey < c_dofs1D; ++ey)
4850  {
4851  const int local_index = ey*o_dofs1D + ex;
4852  w[dy][ex] += B(ey, dy) * x(local_index, e);
4853  }
4854  }
4855  }
4856 
4857  for (int dy = 0; dy < c_dofs1D; ++dy)
4858  {
4859  for (int dx = 0; dx < c_dofs1D; ++dx)
4860  {
4861  double s = 0.0;
4862  for (int ex = 0; ex < o_dofs1D; ++ex)
4863  {
4864  s += G(ex, dx) * w[dy][ex];
4865  }
4866  y(dx, dy, e) += s;
4867  }
4868  }
4869 
4870  // vertical part (open y, closed x)
4871  for (int dy = 0; dy < c_dofs1D; ++dy)
4872  {
4873  for (int ex = 0; ex < c_dofs1D; ++ex)
4874  {
4875  w[dy][ex] = 0.0;
4876  for (int ey = 0; ey < o_dofs1D; ++ey)
4877  {
4878  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4879  w[dy][ex] += G(ey, dy) * x(local_index, e);
4880  }
4881  }
4882  }
4883 
4884  for (int dy = 0; dy < c_dofs1D; ++dy)
4885  {
4886  for (int dx = 0; dx < c_dofs1D; ++dx)
4887  {
4888  double s = 0.0;
4889  for (int ex = 0; ex < c_dofs1D; ++ex)
4890  {
4891  s += B(ex, dx) * w[dy][ex];
4892  }
4893  y(dx, dy, e) += s;
4894  }
4895  }
4896  });
4897 }
4898 
4899 // Specialization of PAHcurlApplyGradientTranspose2D to the case where
4900 // B is identity
4901 static void PAHcurlApplyGradientTranspose2DBId(
4902  const int c_dofs1D, const int o_dofs1D, const int NE,
4903  const Array<double> &G_,
4904  const Vector &x_, Vector &y_)
4905 {
4906  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4907 
4908  auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE);
4909  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE);
4910 
4911  constexpr static int MAX_D1D = HCURL_MAX_D1D;
4912  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4913 
4914  MFEM_FORALL(e, NE,
4915  {
4916  double w[MAX_D1D][MAX_D1D];
4917 
4918  // horizontal part (open x, closed y)
4919  for (int dy = 0; dy < c_dofs1D; ++dy)
4920  {
4921  for (int ex = 0; ex < o_dofs1D; ++ex)
4922  {
4923  const int ey = dy;
4924  const int local_index = ey*o_dofs1D + ex;
4925  w[dy][ex] = x(local_index, e);
4926  }
4927  }
4928 
4929  for (int dy = 0; dy < c_dofs1D; ++dy)
4930  {
4931  for (int dx = 0; dx < c_dofs1D; ++dx)
4932  {
4933  double s = 0.0;
4934  for (int ex = 0; ex < o_dofs1D; ++ex)
4935  {
4936  s += G(ex, dx) * w[dy][ex];
4937  }
4938  y(dx, dy, e) += s;
4939  }
4940  }
4941 
4942  // vertical part (open y, closed x)
4943  for (int dy = 0; dy < c_dofs1D; ++dy)
4944  {
4945  for (int ex = 0; ex < c_dofs1D; ++ex)
4946  {
4947  w[dy][ex] = 0.0;
4948  for (int ey = 0; ey < o_dofs1D; ++ey)
4949  {
4950  const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex;
4951  w[dy][ex] += G(ey, dy) * x(local_index, e);
4952  }
4953  }
4954  }
4955 
4956  for (int dy = 0; dy < c_dofs1D; ++dy)
4957  {
4958  for (int dx = 0; dx < c_dofs1D; ++dx)
4959  {
4960  const int ex = dx;
4961  const double s = w[dy][ex];
4962  y(dx, dy, e) += s;
4963  }
4964  }
4965  });
4966 }
4967 
4968 static void PAHcurlApplyGradient3D(const int c_dofs1D,
4969  const int o_dofs1D,
4970  const int NE,
4971  const Array<double> &B_,
4972  const Array<double> &G_,
4973  const Vector &x_,
4974  Vector &y_)
4975 {
4976  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
4977  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
4978 
4979  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
4980  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
4981 
4982  constexpr static int MAX_D1D = HCURL_MAX_D1D;
4983  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
4984 
4985  MFEM_FORALL(e, NE,
4986  {
4987  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
4988  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
4989 
4990  // ---
4991  // dofs that point parallel to x-axis (open in x, closed in y, z)
4992  // ---
4993 
4994  // contract in z
4995  for (int ez = 0; ez < c_dofs1D; ++ez)
4996  {
4997  for (int dx = 0; dx < c_dofs1D; ++dx)
4998  {
4999  for (int dy = 0; dy < c_dofs1D; ++dy)
5000  {
5001  w1[dx][dy][ez] = 0.0;
5002  for (int dz = 0; dz < c_dofs1D; ++dz)
5003  {
5004  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5005  }
5006  }
5007  }
5008  }
5009 
5010  // contract in y
5011  for (int ez = 0; ez < c_dofs1D; ++ez)
5012  {
5013  for (int ey = 0; ey < c_dofs1D; ++ey)
5014  {
5015  for (int dx = 0; dx < c_dofs1D; ++dx)
5016  {
5017  w2[dx][ey][ez] = 0.0;
5018  for (int dy = 0; dy < c_dofs1D; ++dy)
5019  {
5020  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5021  }
5022  }
5023  }
5024  }
5025 
5026  // contract in x
5027  for (int ez = 0; ez < c_dofs1D; ++ez)
5028  {
5029  for (int ey = 0; ey < c_dofs1D; ++ey)
5030  {
5031  for (int ex = 0; ex < o_dofs1D; ++ex)
5032  {
5033  double s = 0.0;
5034  for (int dx = 0; dx < c_dofs1D; ++dx)
5035  {
5036  s += G(ex, dx) * w2[dx][ey][ez];
5037  }
5038  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5039  y(local_index, e) += s;
5040  }
5041  }
5042  }
5043 
5044  // ---
5045  // dofs that point parallel to y-axis (open in y, closed in x, z)
5046  // ---
5047 
5048  // contract in z
5049  for (int ez = 0; ez < c_dofs1D; ++ez)
5050  {
5051  for (int dx = 0; dx < c_dofs1D; ++dx)
5052  {
5053  for (int dy = 0; dy < c_dofs1D; ++dy)
5054  {
5055  w1[dx][dy][ez] = 0.0;
5056  for (int dz = 0; dz < c_dofs1D; ++dz)
5057  {
5058  w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e);
5059  }
5060  }
5061  }
5062  }
5063 
5064  // contract in y
5065  for (int ez = 0; ez < c_dofs1D; ++ez)
5066  {
5067  for (int ey = 0; ey < o_dofs1D; ++ey)
5068  {
5069  for (int dx = 0; dx < c_dofs1D; ++dx)
5070  {
5071  w2[dx][ey][ez] = 0.0;
5072  for (int dy = 0; dy < c_dofs1D; ++dy)
5073  {
5074  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5075  }
5076  }
5077  }
5078  }
5079 
5080  // contract in x
5081  for (int ez = 0; ez < c_dofs1D; ++ez)
5082  {
5083  for (int ey = 0; ey < o_dofs1D; ++ey)
5084  {
5085  for (int ex = 0; ex < c_dofs1D; ++ex)
5086  {
5087  double s = 0.0;
5088  for (int dx = 0; dx < c_dofs1D; ++dx)
5089  {
5090  s += B(ex, dx) * w2[dx][ey][ez];
5091  }
5092  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5093  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5094  y(local_index, e) += s;
5095  }
5096  }
5097  }
5098 
5099  // ---
5100  // dofs that point parallel to z-axis (open in z, closed in x, y)
5101  // ---
5102 
5103  // contract in z
5104  for (int ez = 0; ez < o_dofs1D; ++ez)
5105  {
5106  for (int dx = 0; dx < c_dofs1D; ++dx)
5107  {
5108  for (int dy = 0; dy < c_dofs1D; ++dy)
5109  {
5110  w1[dx][dy][ez] = 0.0;
5111  for (int dz = 0; dz < c_dofs1D; ++dz)
5112  {
5113  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5114  }
5115  }
5116  }
5117  }
5118 
5119  // contract in y
5120  for (int ez = 0; ez < o_dofs1D; ++ez)
5121  {
5122  for (int ey = 0; ey < c_dofs1D; ++ey)
5123  {
5124  for (int dx = 0; dx < c_dofs1D; ++dx)
5125  {
5126  w2[dx][ey][ez] = 0.0;
5127  for (int dy = 0; dy < c_dofs1D; ++dy)
5128  {
5129  w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez];
5130  }
5131  }
5132  }
5133  }
5134 
5135  // contract in x
5136  for (int ez = 0; ez < o_dofs1D; ++ez)
5137  {
5138  for (int ey = 0; ey < c_dofs1D; ++ey)
5139  {
5140  for (int ex = 0; ex < c_dofs1D; ++ex)
5141  {
5142  double s = 0.0;
5143  for (int dx = 0; dx < c_dofs1D; ++dx)
5144  {
5145  s += B(ex, dx) * w2[dx][ey][ez];
5146  }
5147  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5148  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5149  y(local_index, e) += s;
5150  }
5151  }
5152  }
5153  });
5154 }
5155 
5156 // Specialization of PAHcurlApplyGradient3D to the case where
5157 static void PAHcurlApplyGradient3DBId(const int c_dofs1D,
5158  const int o_dofs1D,
5159  const int NE,
5160  const Array<double> &G_,
5161  const Vector &x_,
5162  Vector &y_)
5163 {
5164  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5165 
5166  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5167  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5168 
5169  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5170  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5171 
5172  MFEM_FORALL(e, NE,
5173  {
5174  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5175  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5176 
5177  // ---
5178  // dofs that point parallel to x-axis (open in x, closed in y, z)
5179  // ---
5180 
5181  // contract in z
5182  for (int ez = 0; ez < c_dofs1D; ++ez)
5183  {
5184  for (int dx = 0; dx < c_dofs1D; ++dx)
5185  {
5186  for (int dy = 0; dy < c_dofs1D; ++dy)
5187  {
5188  const int dz = ez;
5189  w1[dx][dy][ez] = x(dx, dy, dz, e);
5190  }
5191  }
5192  }
5193 
5194  // contract in y
5195  for (int ez = 0; ez < c_dofs1D; ++ez)
5196  {
5197  for (int ey = 0; ey < c_dofs1D; ++ey)
5198  {
5199  for (int dx = 0; dx < c_dofs1D; ++dx)
5200  {
5201  const int dy = ey;
5202  w2[dx][ey][ez] = w1[dx][dy][ez];
5203  }
5204  }
5205  }
5206 
5207  // contract in x
5208  for (int ez = 0; ez < c_dofs1D; ++ez)
5209  {
5210  for (int ey = 0; ey < c_dofs1D; ++ey)
5211  {
5212  for (int ex = 0; ex < o_dofs1D; ++ex)
5213  {
5214  double s = 0.0;
5215  for (int dx = 0; dx < c_dofs1D; ++dx)
5216  {
5217  s += G(ex, dx) * w2[dx][ey][ez];
5218  }
5219  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5220  y(local_index, e) += s;
5221  }
5222  }
5223  }
5224 
5225  // ---
5226  // dofs that point parallel to y-axis (open in y, closed in x, z)
5227  // ---
5228 
5229  // contract in z
5230  for (int ez = 0; ez < c_dofs1D; ++ez)
5231  {
5232  for (int dx = 0; dx < c_dofs1D; ++dx)
5233  {
5234  for (int dy = 0; dy < c_dofs1D; ++dy)
5235  {
5236  const int dz = ez;
5237  w1[dx][dy][ez] = x(dx, dy, dz, e);
5238  }
5239  }
5240  }
5241 
5242  // contract in y
5243  for (int ez = 0; ez < c_dofs1D; ++ez)
5244  {
5245  for (int ey = 0; ey < o_dofs1D; ++ey)
5246  {
5247  for (int dx = 0; dx < c_dofs1D; ++dx)
5248  {
5249  w2[dx][ey][ez] = 0.0;
5250  for (int dy = 0; dy < c_dofs1D; ++dy)
5251  {
5252  w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez];
5253  }
5254  }
5255  }
5256  }
5257 
5258  // contract in x
5259  for (int ez = 0; ez < c_dofs1D; ++ez)
5260  {
5261  for (int ey = 0; ey < o_dofs1D; ++ey)
5262  {
5263  for (int ex = 0; ex < c_dofs1D; ++ex)
5264  {
5265  const int dx = ex;
5266  const double s = w2[dx][ey][ez];
5267  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5268  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5269  y(local_index, e) += s;
5270  }
5271  }
5272  }
5273 
5274  // ---
5275  // dofs that point parallel to z-axis (open in z, closed in x, y)
5276  // ---
5277 
5278  // contract in z
5279  for (int ez = 0; ez < o_dofs1D; ++ez)
5280  {
5281  for (int dx = 0; dx < c_dofs1D; ++dx)
5282  {
5283  for (int dy = 0; dy < c_dofs1D; ++dy)
5284  {
5285  w1[dx][dy][ez] = 0.0;
5286  for (int dz = 0; dz < c_dofs1D; ++dz)
5287  {
5288  w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e);
5289  }
5290  }
5291  }
5292  }
5293 
5294  // contract in y
5295  for (int ez = 0; ez < o_dofs1D; ++ez)
5296  {
5297  for (int ey = 0; ey < c_dofs1D; ++ey)
5298  {
5299  for (int dx = 0; dx < c_dofs1D; ++dx)
5300  {
5301  const int dy = ey;
5302  w2[dx][ey][ez] = w1[dx][dy][ez];
5303  }
5304  }
5305  }
5306 
5307  // contract in x
5308  for (int ez = 0; ez < o_dofs1D; ++ez)
5309  {
5310  for (int ey = 0; ey < c_dofs1D; ++ey)
5311  {
5312  for (int ex = 0; ex < c_dofs1D; ++ex)
5313  {
5314  const int dx = ex;
5315  const double s = w2[dx][ey][ez];
5316  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5317  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5318  y(local_index, e) += s;
5319  }
5320  }
5321  }
5322  });
5323 }
5324 
5325 static void PAHcurlApplyGradientTranspose3D(
5326  const int c_dofs1D, const int o_dofs1D, const int NE,
5327  const Array<double> &B_, const Array<double> &G_,
5328  const Vector &x_, Vector &y_)
5329 {
5330  auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D);
5331  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5332 
5333  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5334  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5335 
5336  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5337  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5338 
5339  MFEM_FORALL(e, NE,
5340  {
5341  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5342  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5343  // ---
5344  // dofs that point parallel to x-axis (open in x, closed in y, z)
5345  // ---
5346 
5347  // contract in z
5348  for (int dz = 0; dz < c_dofs1D; ++dz)
5349  {
5350  for (int ex = 0; ex < o_dofs1D; ++ex)
5351  {
5352  for (int ey = 0; ey < c_dofs1D; ++ey)
5353  {
5354  w1[ex][ey][dz] = 0.0;
5355  for (int ez = 0; ez < c_dofs1D; ++ez)
5356  {
5357  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5358  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
5359  }
5360  }
5361  }
5362  }
5363 
5364  // contract in y
5365  for (int dz = 0; dz < c_dofs1D; ++dz)
5366  {
5367  for (int dy = 0; dy < c_dofs1D; ++dy)
5368  {
5369  for (int ex = 0; ex < o_dofs1D; ++ex)
5370  {
5371  w2[ex][dy][dz] = 0.0;
5372  for (int ey = 0; ey < c_dofs1D; ++ey)
5373  {
5374  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
5375  }
5376  }
5377  }
5378  }
5379 
5380  // contract in x
5381  for (int dz = 0; dz < c_dofs1D; ++dz)
5382  {
5383  for (int dy = 0; dy < c_dofs1D; ++dy)
5384  {
5385  for (int dx = 0; dx < c_dofs1D; ++dx)
5386  {
5387  double s = 0.0;
5388  for (int ex = 0; ex < o_dofs1D; ++ex)
5389  {
5390  s += G(ex, dx) * w2[ex][dy][dz];
5391  }
5392  y(dx, dy, dz, e) += s;
5393  }
5394  }
5395  }
5396 
5397  // ---
5398  // dofs that point parallel to y-axis (open in y, closed in x, z)
5399  // ---
5400 
5401  // contract in z
5402  for (int dz = 0; dz < c_dofs1D; ++dz)
5403  {
5404  for (int ex = 0; ex < c_dofs1D; ++ex)
5405  {
5406  for (int ey = 0; ey < o_dofs1D; ++ey)
5407  {
5408  w1[ex][ey][dz] = 0.0;
5409  for (int ez = 0; ez < c_dofs1D; ++ez)
5410  {
5411  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5412  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5413  w1[ex][ey][dz] += B(ez, dz) * x(local_index, e);
5414  }
5415  }
5416  }
5417  }
5418 
5419  // contract in y
5420  for (int dz = 0; dz < c_dofs1D; ++dz)
5421  {
5422  for (int dy = 0; dy < c_dofs1D; ++dy)
5423  {
5424  for (int ex = 0; ex < c_dofs1D; ++ex)
5425  {
5426  w2[ex][dy][dz] = 0.0;
5427  for (int ey = 0; ey < o_dofs1D; ++ey)
5428  {
5429  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
5430  }
5431  }
5432  }
5433  }
5434 
5435  // contract in x
5436  for (int dz = 0; dz < c_dofs1D; ++dz)
5437  {
5438  for (int dy = 0; dy < c_dofs1D; ++dy)
5439  {
5440  for (int dx = 0; dx < c_dofs1D; ++dx)
5441  {
5442  double s = 0.0;
5443  for (int ex = 0; ex < c_dofs1D; ++ex)
5444  {
5445  s += B(ex, dx) * w2[ex][dy][dz];
5446  }
5447  y(dx, dy, dz, e) += s;
5448  }
5449  }
5450  }
5451 
5452  // ---
5453  // dofs that point parallel to z-axis (open in z, closed in x, y)
5454  // ---
5455 
5456  // contract in z
5457  for (int dz = 0; dz < c_dofs1D; ++dz)
5458  {
5459  for (int ex = 0; ex < c_dofs1D; ++ex)
5460  {
5461  for (int ey = 0; ey < c_dofs1D; ++ey)
5462  {
5463  w1[ex][ey][dz] = 0.0;
5464  for (int ez = 0; ez < o_dofs1D; ++ez)
5465  {
5466  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5467  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5468  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
5469  }
5470  }
5471  }
5472  }
5473 
5474  // contract in y
5475  for (int dz = 0; dz < c_dofs1D; ++dz)
5476  {
5477  for (int dy = 0; dy < c_dofs1D; ++dy)
5478  {
5479  for (int ex = 0; ex < c_dofs1D; ++ex)
5480  {
5481  w2[ex][dy][dz] = 0.0;
5482  for (int ey = 0; ey < c_dofs1D; ++ey)
5483  {
5484  w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz];
5485  }
5486  }
5487  }
5488  }
5489 
5490  // contract in x
5491  for (int dz = 0; dz < c_dofs1D; ++dz)
5492  {
5493  for (int dy = 0; dy < c_dofs1D; ++dy)
5494  {
5495  for (int dx = 0; dx < c_dofs1D; ++dx)
5496  {
5497  double s = 0.0;
5498  for (int ex = 0; ex < c_dofs1D; ++ex)
5499  {
5500  s += B(ex, dx) * w2[ex][dy][dz];
5501  }
5502  y(dx, dy, dz, e) += s;
5503  }
5504  }
5505  }
5506  });
5507 }
5508 
5509 // Specialization of PAHcurlApplyGradientTranspose3D to the case where
5510 static void PAHcurlApplyGradientTranspose3DBId(
5511  const int c_dofs1D, const int o_dofs1D, const int NE,
5512  const Array<double> &G_,
5513  const Vector &x_, Vector &y_)
5514 {
5515  auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D);
5516 
5517  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5518  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE);
5519 
5520  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5521  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5522 
5523  MFEM_FORALL(e, NE,
5524  {
5525  double w1[MAX_D1D][MAX_D1D][MAX_D1D];
5526  double w2[MAX_D1D][MAX_D1D][MAX_D1D];
5527  // ---
5528  // dofs that point parallel to x-axis (open in x, closed in y, z)
5529  // ---
5530 
5531  // contract in z
5532  for (int dz = 0; dz < c_dofs1D; ++dz)
5533  {
5534  for (int ex = 0; ex < o_dofs1D; ++ex)
5535  {
5536  for (int ey = 0; ey < c_dofs1D; ++ey)
5537  {
5538  const int ez = dz;
5539  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5540  w1[ex][ey][dz] = x(local_index, e);
5541  }
5542  }
5543  }
5544 
5545  // contract in y
5546  for (int dz = 0; dz < c_dofs1D; ++dz)
5547  {
5548  for (int dy = 0; dy < c_dofs1D; ++dy)
5549  {
5550  for (int ex = 0; ex < o_dofs1D; ++ex)
5551  {
5552  const int ey = dy;
5553  w2[ex][dy][dz] = w1[ex][ey][dz];
5554  }
5555  }
5556  }
5557 
5558  // contract in x
5559  for (int dz = 0; dz < c_dofs1D; ++dz)
5560  {
5561  for (int dy = 0; dy < c_dofs1D; ++dy)
5562  {
5563  for (int dx = 0; dx < c_dofs1D; ++dx)
5564  {
5565  double s = 0.0;
5566  for (int ex = 0; ex < o_dofs1D; ++ex)
5567  {
5568  s += G(ex, dx) * w2[ex][dy][dz];
5569  }
5570  y(dx, dy, dz, e) += s;
5571  }
5572  }
5573  }
5574 
5575  // ---
5576  // dofs that point parallel to y-axis (open in y, closed in x, z)
5577  // ---
5578 
5579  // contract in z
5580  for (int dz = 0; dz < c_dofs1D; ++dz)
5581  {
5582  for (int ex = 0; ex < c_dofs1D; ++ex)
5583  {
5584  for (int ey = 0; ey < o_dofs1D; ++ey)
5585  {
5586  const int ez = dz;
5587  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5588  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5589  w1[ex][ey][dz] = x(local_index, e);
5590  }
5591  }
5592  }
5593 
5594  // contract in y
5595  for (int dz = 0; dz < c_dofs1D; ++dz)
5596  {
5597  for (int dy = 0; dy < c_dofs1D; ++dy)
5598  {
5599  for (int ex = 0; ex < c_dofs1D; ++ex)
5600  {
5601  w2[ex][dy][dz] = 0.0;
5602  for (int ey = 0; ey < o_dofs1D; ++ey)
5603  {
5604  w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz];
5605  }
5606  }
5607  }
5608  }
5609 
5610  // contract in x
5611  for (int dz = 0; dz < c_dofs1D; ++dz)
5612  {
5613  for (int dy = 0; dy < c_dofs1D; ++dy)
5614  {
5615  for (int dx = 0; dx < c_dofs1D; ++dx)
5616  {
5617  const int ex = dx;
5618  double s = w2[ex][dy][dz];
5619  y(dx, dy, dz, e) += s;
5620  }
5621  }
5622  }
5623 
5624  // ---
5625  // dofs that point parallel to z-axis (open in z, closed in x, y)
5626  // ---
5627 
5628  // contract in z
5629  for (int dz = 0; dz < c_dofs1D; ++dz)
5630  {
5631  for (int ex = 0; ex < c_dofs1D; ++ex)
5632  {
5633  for (int ey = 0; ey < c_dofs1D; ++ey)
5634  {
5635  w1[ex][ey][dz] = 0.0;
5636  for (int ez = 0; ez < o_dofs1D; ++ez)
5637  {
5638  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5639  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5640  w1[ex][ey][dz] += G(ez, dz) * x(local_index, e);
5641  }
5642  }
5643  }
5644  }
5645 
5646  // contract in y
5647  for (int dz = 0; dz < c_dofs1D; ++dz)
5648  {
5649  for (int dy = 0; dy < c_dofs1D; ++dy)
5650  {
5651  for (int ex = 0; ex < c_dofs1D; ++ex)
5652  {
5653  const int ey = dy;
5654  w2[ex][dy][dz] = w1[ex][ey][dz];
5655  }
5656  }
5657  }
5658 
5659  // contract in x
5660  for (int dz = 0; dz < c_dofs1D; ++dz)
5661  {
5662  for (int dy = 0; dy < c_dofs1D; ++dy)
5663  {
5664  for (int dx = 0; dx < c_dofs1D; ++dx)
5665  {
5666  const int ex = dx;
5667  double s = w2[ex][dy][dz];
5668  y(dx, dy, dz, e) += s;
5669  }
5670  }
5671  }
5672  });
5673 }
5674 
5675 void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
5676  const FiniteElementSpace &test_fes)
5677 {
5678  // Assumes tensor-product elements, with a vector test space and H^1 trial space.
5679  Mesh *mesh = trial_fes.GetMesh();
5680  const FiniteElement *trial_fel = trial_fes.GetFE(0);
5681  const FiniteElement *test_fel = test_fes.GetFE(0);
5682 
5683  const NodalTensorFiniteElement *trial_el =
5684  dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
5685  MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
5686 
5687  const VectorTensorFiniteElement *test_el =
5688  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
5689  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
5690 
5691  const int dims = trial_el->GetDim();
5692  MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!");
5693  dim = mesh->Dimension();
5694  MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!");
5695  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(),
5696  "Orders do not match!");
5697  ne = trial_fes.GetNE();
5698 
5699  const int order = trial_el->GetOrder();
5700  dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType());
5701  mfem::QuadratureFunctions1D qf1d;
5702  mfem::IntegrationRule closed_ir;
5703  closed_ir.SetSize(order + 1);
5704  qf1d.GaussLobatto(order + 1, &closed_ir);
5705  mfem::IntegrationRule open_ir;
5706  open_ir.SetSize(order);
5707  qf1d.GaussLegendre(order, &open_ir);
5708 
5709  maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
5710  o_dofs1D = maps_O_C->nqpt;
5711  if (trial_el->GetBasisType() == BasisType::GaussLobatto)
5712  {
5713  B_id = true;
5714  c_dofs1D = maps_O_C->ndof;
5715  }
5716  else
5717  {
5718  B_id = false;
5719  maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
5720  c_dofs1D = maps_C_C->nqpt;
5721  }
5722 }
5723 
5724 void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const
5725 {
5726  if (dim == 3)
5727  {
5728  if (B_id)
5729  {
5730  PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne,
5731  maps_O_C->G, x, y);
5732  }
5733  else
5734  {
5735  PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5736  maps_O_C->G, x, y);
5737  }
5738  }
5739  else if (dim == 2)
5740  {
5741  if (B_id)
5742  {
5743  PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne,
5744  maps_O_C->G, x, y);
5745  }
5746  else
5747  {
5748  PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G,
5749  x, y);
5750  }
5751  }
5752  else
5753  {
5754  mfem_error("Bad dimension!");
5755  }
5756 }
5757 
5758 void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
5759 {
5760  if (dim == 3)
5761  {
5762  if (B_id)
5763  {
5764  PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne,
5765  maps_O_C->G, x, y);
5766  }
5767  else
5768  {
5769  PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5770  maps_O_C->G, x, y);
5771  }
5772  }
5773  else if (dim == 2)
5774  {
5775  if (B_id)
5776  {
5777  PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne,
5778  maps_O_C->G, x, y);
5779  }
5780  else
5781  {
5782  PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
5783  maps_O_C->G, x, y);
5784  }
5785  }
5786  else
5787  {
5788  mfem_error("Bad dimension!");
5789  }
5790 }
5791 
5792 static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D,
5793  const int o_dofs1D,
5794  const int NE,
5795  const Array<double> &Bclosed,
5796  const Array<double> &Bopen,
5797  const Vector &pa_data,
5798  const Vector &x_,
5799  Vector &y_)
5800 {
5801  auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
5802  auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
5803 
5804  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
5805  auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
5806 
5807  auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
5808  NE);
5809 
5810  constexpr static int MAX_D1D = HCURL_MAX_D1D;
5811  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
5812 
5813  MFEM_FORALL(e, NE,
5814  {
5815  double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
5816  double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
5817 
5818  // dofs that point parallel to x-axis (open in x, closed in y, z)
5819 
5820  // contract in z
5821  for (int ez = 0; ez < c_dofs1D; ++ez)
5822  {
5823  for (int dx = 0; dx < c_dofs1D; ++dx)
5824  {
5825  for (int dy = 0; dy < c_dofs1D; ++dy)
5826  {
5827  for (int j=0; j<3; ++j)
5828  {
5829  w1[j][dx][dy][ez] = 0.0;
5830  for (int dz = 0; dz < c_dofs1D; ++dz)
5831  {
5832  w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
5833  }
5834  }
5835  }
5836  }
5837  }
5838 
5839  // contract in y
5840  for (int ez = 0; ez < c_dofs1D; ++ez)
5841  {
5842  for (int ey = 0; ey < c_dofs1D; ++ey)
5843  {
5844  for (int dx = 0; dx < c_dofs1D; ++dx)
5845  {
5846  for (int j=0; j<3; ++j)
5847  {
5848  w2[j][dx][ey][ez] = 0.0;
5849  for (int dy = 0; dy < c_dofs1D; ++dy)
5850  {
5851  w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
5852  }
5853  }
5854  }
5855  }
5856  }
5857 
5858  // contract in x
5859  for (int ez = 0; ez < c_dofs1D; ++ez)
5860  {
5861  for (int ey = 0; ey < c_dofs1D; ++ey)
5862  {
5863  for (int ex = 0; ex < o_dofs1D; ++ex)
5864  {
5865  for (int j=0; j<3; ++j)
5866  {
5867  double s = 0.0;
5868  for (int dx = 0; dx < c_dofs1D; ++dx)
5869  {
5870  s += Bo(ex, dx) * w2[j][dx][ey][ez];
5871  }
5872  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
5873  y(local_index, e) += s * vk(j, local_index, e);
5874  }
5875  }
5876  }
5877  }
5878 
5879  // dofs that point parallel to y-axis (open in y, closed in x, z)
5880 
5881  // contract in z
5882  for (int ez = 0; ez < c_dofs1D; ++ez)
5883  {
5884  for (int dx = 0; dx < c_dofs1D; ++dx)
5885  {
5886  for (int dy = 0; dy < c_dofs1D; ++dy)
5887  {
5888  for (int j=0; j<3; ++j)
5889  {
5890  w1[j][dx][dy][ez] = 0.0;
5891  for (int dz = 0; dz < c_dofs1D; ++dz)
5892  {
5893  w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e);
5894  }
5895  }
5896  }
5897  }
5898  }
5899 
5900  // contract in y
5901  for (int ez = 0; ez < c_dofs1D; ++ez)
5902  {
5903  for (int ey = 0; ey < o_dofs1D; ++ey)
5904  {
5905  for (int dx = 0; dx < c_dofs1D; ++dx)
5906  {
5907  for (int j=0; j<3; ++j)
5908  {
5909  w2[j][dx][ey][ez] = 0.0;
5910  for (int dy = 0; dy < c_dofs1D; ++dy)
5911  {
5912  w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez];
5913  }
5914  }
5915  }
5916  }
5917  }
5918 
5919  // contract in x
5920  for (int ez = 0; ez < c_dofs1D; ++ez)
5921  {
5922  for (int ey = 0; ey < o_dofs1D; ++ey)
5923  {
5924  for (int ex = 0; ex < c_dofs1D; ++ex)
5925  {
5926  for (int j=0; j<3; ++j)
5927  {
5928  double s = 0.0;
5929  for (int dx = 0; dx < c_dofs1D; ++dx)
5930  {
5931  s += Bc(ex, dx) * w2[j][dx][ey][ez];
5932  }
5933  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
5934  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
5935  y(local_index, e) += s * vk(j, local_index, e);
5936  }
5937  }
5938  }
5939  }
5940 
5941  // dofs that point parallel to z-axis (open in z, closed in x, y)
5942 
5943  // contract in z
5944  for (int ez = 0; ez < o_dofs1D; ++ez)
5945  {
5946  for (int dx = 0; dx < c_dofs1D; ++dx)
5947  {
5948  for (int dy = 0; dy < c_dofs1D; ++dy)
5949  {
5950  for (int j=0; j<3; ++j)
5951  {
5952  w1[j][dx][dy][ez] = 0.0;
5953  for (int dz = 0; dz < c_dofs1D; ++dz)
5954  {
5955  w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e);
5956  }
5957  }
5958  }
5959  }
5960  }
5961 
5962  // contract in y
5963  for (int ez = 0; ez < o_dofs1D; ++ez)
5964  {
5965  for (int ey = 0; ey < c_dofs1D; ++ey)
5966  {
5967  for (int dx = 0; dx < c_dofs1D; ++dx)
5968  {
5969  for (int j=0; j<3; ++j)
5970  {
5971  w2[j][dx][ey][ez] = 0.0;
5972  for (int dy = 0; dy < c_dofs1D; ++dy)
5973  {
5974  w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez];
5975  }
5976  }
5977  }
5978  }
5979  }
5980 
5981  // contract in x
5982  for (int ez = 0; ez < o_dofs1D; ++ez)
5983  {
5984  for (int ey = 0; ey < c_dofs1D; ++ey)
5985  {
5986  for (int ex = 0; ex < c_dofs1D; ++ex)
5987  {
5988  for (int j=0; j<3; ++j)
5989  {
5990  double s = 0.0;
5991  for (int dx = 0; dx < c_dofs1D; ++dx)
5992  {
5993  s += Bc(ex, dx) * w2[j][dx][ey][ez];
5994  }
5995  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
5996  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
5997  y(local_index, e) += s * vk(j, local_index, e);
5998  }
5999  }
6000  }
6001  }
6002  });
6003 }
6004 
6005 static void PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D,
6006  const int o_dofs1D,
6007  const int NE,
6008  const Array<double> &Bclosed,
6009  const Array<double> &Bopen,
6010  const Vector &pa_data,
6011  const Vector &x_,
6012  Vector &y_)
6013 {
6014  auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6015  auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6016 
6017  auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE);
6018  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE);
6019 
6020  auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D),
6021  NE);
6022 
6023  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6024 
6025  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6026 
6027  MFEM_FORALL(e, NE,
6028  {
6029  double w1[3][MAX_D1D][MAX_D1D][MAX_D1D];
6030  double w2[3][MAX_D1D][MAX_D1D][MAX_D1D];
6031 
6032  // dofs that point parallel to x-axis (open in x, closed in y, z)
6033 
6034  // contract in x
6035  for (int ez = 0; ez < c_dofs1D; ++ez)
6036  {
6037  for (int ey = 0; ey < c_dofs1D; ++ey)
6038  {
6039  for (int j=0; j<3; ++j)
6040  {
6041  for (int dx = 0; dx < c_dofs1D; ++dx)
6042  {
6043  w2[j][dx][ey][ez] = 0.0;
6044  }
6045  for (int ex = 0; ex < o_dofs1D; ++ex)
6046  {
6047  const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex;
6048  const double xv = x(local_index, e) * vk(j, local_index, e);
6049  for (int dx = 0; dx < c_dofs1D; ++dx)
6050  {
6051  w2[j][dx][ey][ez] += xv * Bo(ex, dx);
6052  }
6053  }
6054  }
6055  }
6056  }
6057 
6058  // contract in y
6059  for (int ez = 0; ez < c_dofs1D; ++ez)
6060  {
6061  for (int dx = 0; dx < c_dofs1D; ++dx)
6062  {
6063  for (int dy = 0; dy < c_dofs1D; ++dy)
6064  {
6065  for (int j=0; j<3; ++j)
6066  {
6067  w1[j][dx][dy][ez] = 0.0;
6068  for (int ey = 0; ey < c_dofs1D; ++ey)
6069  {
6070  w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy);
6071  }
6072  }
6073  }
6074  }
6075  }
6076 
6077  // contract in z
6078  for (int dx = 0; dx < c_dofs1D; ++dx)
6079  {
6080  for (int dy = 0; dy < c_dofs1D; ++dy)
6081  {
6082  for (int dz = 0; dz < c_dofs1D; ++dz)
6083  {
6084  for (int j=0; j<3; ++j)
6085  {
6086  double s = 0.0;
6087  for (int ez = 0; ez < c_dofs1D; ++ez)
6088  {
6089  s += w1[j][dx][dy][ez] * Bc(ez, dz);
6090  }
6091  y(dx, dy, dz, j, e) += s;
6092  }
6093  }
6094  }
6095  }
6096 
6097  // dofs that point parallel to y-axis (open in y, closed in x, z)
6098 
6099  // contract in x
6100  for (int ez = 0; ez < c_dofs1D; ++ez)
6101  {
6102  for (int ey = 0; ey < o_dofs1D; ++ey)
6103  {
6104  for (int j=0; j<3; ++j)
6105  {
6106  for (int dx = 0; dx < c_dofs1D; ++dx)
6107  {
6108  w2[j][dx][ey][ez] = 0.0;
6109  }
6110  for (int ex = 0; ex < c_dofs1D; ++ex)
6111  {
6112  const int local_index = c_dofs1D*c_dofs1D*o_dofs1D +
6113  ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6114  const double xv = x(local_index, e) * vk(j, local_index, e);
6115  for (int dx = 0; dx < c_dofs1D; ++dx)
6116  {
6117  w2[j][dx][ey][ez] += xv * Bc(ex, dx);
6118  }
6119  }
6120  }
6121  }
6122  }
6123 
6124  // contract in y
6125  for (int ez = 0; ez < c_dofs1D; ++ez)
6126  {
6127  for (int dx = 0; dx < c_dofs1D; ++dx)
6128  {
6129  for (int dy = 0; dy < c_dofs1D; ++dy)
6130  {
6131  for (int j=0; j<3; ++j)
6132  {
6133  w1[j][dx][dy][ez] = 0.0;
6134  for (int ey = 0; ey < o_dofs1D; ++ey)
6135  {
6136  w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bo(ey, dy);
6137  }
6138  }
6139  }
6140  }
6141  }
6142 
6143  // contract in z
6144  for (int dx = 0; dx < c_dofs1D; ++dx)
6145  {
6146  for (int dy = 0; dy < c_dofs1D; ++dy)
6147  {
6148  for (int dz = 0; dz < c_dofs1D; ++dz)
6149  {
6150  for (int j=0; j<3; ++j)
6151  {
6152  double s = 0.0;
6153  for (int ez = 0; ez < c_dofs1D; ++ez)
6154  {
6155  s += w1[j][dx][dy][ez] * Bc(ez, dz);
6156  }
6157  y(dx, dy, dz, j, e) += s;
6158  }
6159  }
6160  }
6161  }
6162 
6163  // dofs that point parallel to z-axis (open in z, closed in x, y)
6164 
6165  // contract in x
6166  for (int ez = 0; ez < o_dofs1D; ++ez)
6167  {
6168  for (int ey = 0; ey < c_dofs1D; ++ey)
6169  {
6170  for (int j=0; j<3; ++j)
6171  {
6172  for (int dx = 0; dx < c_dofs1D; ++dx)
6173  {
6174  w2[j][dx][ey][ez] = 0.0;
6175  }
6176  for (int ex = 0; ex < c_dofs1D; ++ex)
6177  {
6178  const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D +
6179  ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex;
6180  const double xv = x(local_index, e) * vk(j, local_index, e);
6181  for (int dx = 0; dx < c_dofs1D; ++dx)
6182  {
6183  w2[j][dx][ey][ez] += xv * Bc(ex, dx);
6184  }
6185  }
6186  }
6187  }
6188  }
6189 
6190  // contract in y
6191  for (int ez = 0; ez < o_dofs1D; ++ez)
6192  {
6193  for (int dx = 0; dx < c_dofs1D; ++dx)
6194  {
6195  for (int dy = 0; dy < c_dofs1D; ++dy)
6196  {
6197  for (int j=0; j<3; ++j)
6198  {
6199  w1[j][dx][dy][ez] = 0.0;
6200  for (int ey = 0; ey < c_dofs1D; ++ey)
6201  {
6202  w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy);
6203  }
6204  }
6205  }
6206  }
6207  }
6208 
6209  // contract in z
6210  for (int dx = 0; dx < c_dofs1D; ++dx)
6211  {
6212  for (int dy = 0; dy < c_dofs1D; ++dy)
6213  {
6214  for (int dz = 0; dz < c_dofs1D; ++dz)
6215  {
6216  for (int j=0; j<3; ++j)
6217  {
6218  double s = 0.0;
6219  for (int ez = 0; ez < o_dofs1D; ++ez)
6220  {
6221  s += w1[j][dx][dy][ez] * Bo(ez, dz);
6222  }
6223  y(dx, dy, dz, j, e) += s;
6224  }
6225  }
6226  }
6227  }
6228  });
6229 }
6230 
6231 static void PAHcurlVecH1IdentityApply2D(const int c_dofs1D,
6232  const int o_dofs1D,
6233  const int NE,
6234  const Array<double> &Bclosed,
6235  const Array<double> &Bopen,
6236  const Vector &pa_data,
6237  const Vector &x_,
6238  Vector &y_)
6239 {
6240  auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6241  auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6242 
6243  auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, 2, NE);
6244  auto y = Reshape(y_.ReadWrite(), (2 * c_dofs1D * o_dofs1D), NE);
6245 
6246  auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE);
6247 
6248  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6249 
6250  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6251 
6252  MFEM_FORALL(e, NE,
6253  {
6254  double w[2][MAX_D1D][MAX_D1D];
6255 
6256  // dofs that point parallel to x-axis (open in x, closed in y)
6257 
6258  // contract in y
6259  for (int ey = 0; ey < c_dofs1D; ++ey)
6260  {
6261  for (int dx = 0; dx < c_dofs1D; ++dx)
6262  {
6263  for (int j=0; j<2; ++j)
6264  {
6265  w[j][dx][ey] = 0.0;
6266  for (int dy = 0; dy < c_dofs1D; ++dy)
6267  {
6268  w[j][dx][ey] += Bc(ey, dy) * x(dx, dy, j, e);
6269  }
6270  }
6271  }
6272  }
6273 
6274  // contract in x
6275  for (int ey = 0; ey < c_dofs1D; ++ey)
6276  {
6277  for (int ex = 0; ex < o_dofs1D; ++ex)
6278  {
6279  for (int j=0; j<2; ++j)
6280  {
6281  double s = 0.0;
6282  for (int dx = 0; dx < c_dofs1D; ++dx)
6283  {
6284  s += Bo(ex, dx) * w[j][dx][ey];
6285  }
6286  const int local_index = ey*o_dofs1D + ex;
6287  y(local_index, e) += s * vk(j, local_index, e);
6288  }
6289  }
6290  }
6291 
6292  // dofs that point parallel to y-axis (open in y, closed in x)
6293 
6294  // contract in y
6295  for (int ey = 0; ey < o_dofs1D; ++ey)
6296  {
6297  for (int dx = 0; dx < c_dofs1D; ++dx)
6298  {
6299  for (int j=0; j<2; ++j)
6300  {
6301  w[j][dx][ey] = 0.0;
6302  for (int dy = 0; dy < c_dofs1D; ++dy)
6303  {
6304  w[j][dx][ey] += Bo(ey, dy) * x(dx, dy, j, e);
6305  }
6306  }
6307  }
6308  }
6309 
6310  // contract in x
6311  for (int ey = 0; ey < o_dofs1D; ++ey)
6312  {
6313  for (int ex = 0; ex < c_dofs1D; ++ex)
6314  {
6315  for (int j=0; j<2; ++j)
6316  {
6317  double s = 0.0;
6318  for (int dx = 0; dx < c_dofs1D; ++dx)
6319  {
6320  s += Bc(ex, dx) * w[j][dx][ey];
6321  }
6322  const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6323  y(local_index, e) += s * vk(j, local_index, e);
6324  }
6325  }
6326  }
6327  });
6328 }
6329 
6330 static void PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D,
6331  const int o_dofs1D,
6332  const int NE,
6333  const Array<double> &Bclosed,
6334  const Array<double> &Bopen,
6335  const Vector &pa_data,
6336  const Vector &x_,
6337  Vector &y_)
6338 {
6339  auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D);
6340  auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D);
6341 
6342  auto x = Reshape(x_.Read(), (2 * c_dofs1D * o_dofs1D), NE);
6343  auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, 2, NE);
6344 
6345  auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE);
6346 
6347  constexpr static int MAX_D1D = HCURL_MAX_D1D;
6348  //constexpr static int MAX_Q1D = HCURL_MAX_Q1D;
6349 
6350  MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, "");
6351 
6352  MFEM_FORALL(e, NE,
6353  {
6354  double w[2][MAX_D1D][MAX_D1D];
6355 
6356  // dofs that point parallel to x-axis (open in x, closed in y)
6357 
6358  // contract in x
6359  for (int ey = 0; ey < c_dofs1D; ++ey)
6360  {
6361  for (int dx = 0; dx < c_dofs1D; ++dx)
6362  {
6363  for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; }
6364  }
6365  for (int ex = 0; ex < o_dofs1D; ++ex)
6366  {
6367  const int local_index = ey*o_dofs1D + ex;
6368  const double xd = x(local_index, e);
6369 
6370  for (int dx = 0; dx < c_dofs1D; ++dx)
6371  {
6372  for (int j=0; j<2; ++j)
6373  {
6374  w[j][dx][ey] += Bo(ex, dx) * xd * vk(j, local_index, e);
6375  }
6376  }
6377  }
6378  }
6379 
6380  // contract in y
6381  for (int dx = 0; dx < c_dofs1D; ++dx)
6382  {
6383  for (int dy = 0; dy < c_dofs1D; ++dy)
6384  {
6385  for (int j=0; j<2; ++j)
6386  {
6387  double s = 0.0;
6388  for (int ey = 0; ey < c_dofs1D; ++ey)
6389  {
6390  s += w[j][dx][ey] * Bc(ey, dy);
6391  }
6392  y(dx, dy, j, e) += s;
6393  }
6394  }
6395  }
6396 
6397  // dofs that point parallel to y-axis (open in y, closed in x)
6398 
6399  // contract in x
6400  for (int ey = 0; ey < o_dofs1D; ++ey)
6401  {
6402  for (int dx = 0; dx < c_dofs1D; ++dx)
6403  {
6404  for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; }
6405  }
6406  for (int ex = 0; ex < c_dofs1D; ++ex)
6407  {
6408  const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex;
6409  const double xd = x(local_index, e);
6410  for (int dx = 0; dx < c_dofs1D; ++dx)
6411  {
6412  for (int j=0; j<2; ++j)
6413  {
6414  w[j][dx][ey] += Bc(ex, dx) * xd * vk(j, local_index, e);
6415  }
6416  }
6417  }
6418  }
6419 
6420  // contract in y
6421  for (int dx = 0; dx < c_dofs1D; ++dx)
6422  {
6423  for (int dy = 0; dy < c_dofs1D; ++dy)
6424  {
6425  for (int j=0; j<2; ++j)
6426  {
6427  double s = 0.0;
6428  for (int ey = 0; ey < o_dofs1D; ++ey)
6429  {
6430  s += w[j][dx][ey] * Bo(ey, dy);
6431  }
6432  y(dx, dy, j, e) += s;
6433  }
6434  }
6435  }
6436  });
6437 }
6438 
6439 void IdentityInterpolator::AssemblePA(const FiniteElementSpace &trial_fes,
6440  const FiniteElementSpace &test_fes)
6441 {
6442  // Assumes tensor-product elements, with a vector test space and H^1 trial space.
6443  Mesh *mesh = trial_fes.GetMesh();
6444  const FiniteElement *trial_fel = trial_fes.GetFE(0);
6445  const FiniteElement *test_fel = test_fes.GetFE(0);
6446 
6447  const NodalTensorFiniteElement *trial_el =
6448  dynamic_cast<const NodalTensorFiniteElement*>(trial_fel);
6449  MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!");
6450 
6451  const VectorTensorFiniteElement *test_el =
6452  dynamic_cast<const VectorTensorFiniteElement*>(test_fel);
6453  MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!");
6454 
6455  const int dims = trial_el->GetDim();
6456  MFEM_VERIFY(dims == 2 || dims == 3, "");
6457 
6458  dim = mesh->Dimension();
6459  MFEM_VERIFY(dim == 2 || dim == 3, "");
6460 
6461  MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), "");
6462 
6463  ne = trial_fes.GetNE();
6464 
6465  const int order = trial_el->GetOrder();
6466  dofquad_fe = new H1_SegmentElement(order);
6467  mfem::QuadratureFunctions1D qf1d;
6468  mfem::IntegrationRule closed_ir;
6469  closed_ir.SetSize(order + 1);
6470  qf1d.GaussLobatto(order + 1, &closed_ir);
6471  mfem::IntegrationRule open_ir;
6472  open_ir.SetSize(order);
6473  qf1d.GaussLegendre(order, &open_ir);
6474 
6475  maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR);
6476  maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR);
6477 
6478  o_dofs1D = maps_O_C->nqpt;
6479  c_dofs1D = maps_C_C->nqpt;
6480  MFEM_VERIFY(maps_O_C->ndof == c_dofs1D &&
6481  maps_C_C->ndof == c_dofs1D, "Discrepancy in the number of DOFs");
6482 
6483  const int ndof_test = (dim == 3) ? 3 * c_dofs1D * c_dofs1D * o_dofs1D
6484  : 2 * c_dofs1D * o_dofs1D;
6485 
6486  const IntegrationRule & Nodes = test_el->GetNodes();
6487 
6488  pa_data.SetSize(dim * ndof_test * ne, Device::GetMemoryType());
6489  auto op = Reshape(pa_data.HostWrite(), dim, ndof_test, ne);
6490 
6491  const Array<int> &dofmap = test_el->GetDofMap();
6492 
6493  if (dim == 3)
6494  {
6495  // Note that ND_HexahedronElement uses 6 vectors in tk rather than 3, with
6496  // the last 3 having negative signs. Here the signs are all positive, as
6497  // signs are applied in ElementRestriction.
6498 
6499  const double tk[9] = { 1.,0.,0., 0.,1.,0., 0.,0.,1. };
6500 
6501  for (int c=0; c<3; ++c)
6502  {
6503  for (int i=0; i<ndof_test/3; ++i)
6504  {
6505  const int d = (c*ndof_test/3) + i;
6506  // ND_HexahedronElement sets dof2tk = (dofmap < 0) ? 3+c : c, but here
6507  // no signs should be applied due to ElementRestriction.
6508  const int dof2tk = c;
6509  const int id = (dofmap[d] >= 0) ? dofmap[d] : -1 - dofmap[d];
6510 
6511  for (int e=0; e<ne; ++e)
6512  {
6513  double v[3];
6514  ElementTransformation *tr = mesh->GetElementTransformation(e);
6515  tr->SetIntPoint(&Nodes.IntPoint(id));
6516  tr->Jacobian().Mult(tk + dof2tk*dim, v);
6517 
6518  for (int j=0; j<3; ++j)
6519  {
6520  op(j,d,e) = v[j];
6521  }
6522  }
6523  }
6524  }
6525  }
6526  else // 2D case
6527  {
6528  const double tk[4] = { 1.,0., 0.,1. };
6529  for (int c=0; c<2; ++c)
6530  {
6531  for (int i=0; i<ndof_test/2; ++i)
6532  {
6533  const int d = (c*ndof_test/2) + i;
6534  // ND_QuadrilateralElement sets dof2tk = (dofmap < 0) ? 2+c : c, but here
6535  // no signs should be applied due to ElementRestriction.
6536  const int dof2tk = c;
6537  const int id = (dofmap[d] >= 0) ? dofmap[d] : -1 - dofmap[d];
6538 
6539  for (int e=0; e<ne; ++e)
6540  {
6541  double v[2];
6542  ElementTransformation *tr = mesh->GetElementTransformation(e);
6543  tr->SetIntPoint(&Nodes.IntPoint(id));
6544  tr->Jacobian().Mult(tk + dof2tk*dim, v);
6545 
6546  for (int j=0; j<2; ++j)
6547  {
6548  op(j,d,e) = v[j];
6549  }
6550  }
6551  }
6552  }
6553  }
6554 }
6555 
6556 void IdentityInterpolator::AddMultPA(const Vector &x, Vector &y) const
6557 {
6558  if (dim == 3)
6559  {
6560  PAHcurlVecH1IdentityApply3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B,
6561  pa_data, x, y);
6562  }
6563  else if (dim == 2)
6564  {
6565  PAHcurlVecH1IdentityApply2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B,
6566  pa_data, x, y);
6567  }
6568  else
6569  {
6570  mfem_error("Bad dimension!");
6571  }
6572 }
6573 
6574 void IdentityInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const
6575 {
6576  if (dim == 3)
6577  {
6578  PAHcurlVecH1IdentityApplyTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6579  maps_O_C->B, pa_data, x, y);
6580  }
6581  else if (dim == 2)
6582  {
6583  PAHcurlVecH1IdentityApplyTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B,
6584  maps_O_C->B, pa_data, x, y);
6585  }
6586  else
6587  {
6588  mfem_error("Bad dimension!");
6589  }
6590 }
6591 
6592 template void SmemPAHcurlMassAssembleDiagonal3D<0,0>(const int D1D,
6593  const int Q1D,
6594  const int NE,
6595  const bool symmetric,
6596  const Array<double> &bo,
6597  const Array<double> &bc,
6598  const Vector &pa_data,
6599  Vector &diag);
6600 
6601 template void SmemPAHcurlMassAssembleDiagonal3D<2,3>(const int D1D,
6602  const int Q1D,
6603  const int NE,
6604  const bool symmetric,
6605  const Array<double> &bo,
6606  const Array<double> &bc,
6607  const Vector &pa_data,
6608  Vector &diag);
6609 
6610 template void SmemPAHcurlMassAssembleDiagonal3D<3,4>(const int D1D,
6611  const int Q1D,
6612  const int NE,
6613  const bool symmetric,
6614  const Array<double> &bo,
6615  const Array<double> &bc,
6616  const Vector &pa_data,
6617  Vector &diag);
6618 
6619 template void SmemPAHcurlMassAssembleDiagonal3D<4,5>(const int D1D,
6620  const int Q1D,
6621  const int NE,
6622  const bool symmetric,
6623  const Array<double> &bo,
6624  const Array<double> &bc,
6625  const Vector &pa_data,
6626  Vector &diag);
6627 
6628 template void SmemPAHcurlMassAssembleDiagonal3D<5,6>(const int D1D,
6629  const int Q1D,
6630  const int NE,
6631  const bool symmetric,
6632  const Array<double> &bo,
6633  const Array<double> &bc,
6634  const Vector &pa_data,
6635  Vector &diag);
6636 
6637 template void SmemPAHcurlMassApply3D<0,0>(const int D1D,
6638  const int Q1D,
6639  const int NE,
6640  const bool symmetric,
6641  const Array<double> &bo,
6642  const Array<double> &bc,
6643  const Array<double> &bot,
6644  const Array<double> &bct,
6645  const Vector &pa_data,
6646  const Vector &x,
6647  Vector &y);
6648 
6649 template void SmemPAHcurlMassApply3D<2,3>(const int D1D,
6650  const int Q1D,
6651  const int NE,
6652  const bool symmetric,
6653  const Array<double> &bo,
6654  const Array<double> &bc,
6655  const Array<double> &bot,
6656  const Array<double> &bct,
6657  const Vector &pa_data,
6658  const Vector &x,
6659  Vector &y);
6660 
6661 template void SmemPAHcurlMassApply3D<3,4>(const int D1D,
6662  const int Q1D,
6663  const int NE,
6664  const bool symmetric,
6665  const Array<double> &bo,
6666  const Array<double> &bc,
6667  const Array<double> &bot,
6668  const Array<double> &bct,
6669  const Vector &pa_data,
6670  const Vector &x,
6671  Vector &y);
6672 
6673 template void SmemPAHcurlMassApply3D<4,5>(const int D1D,
6674  const int Q1D,
6675  const int NE,
6676  const bool symmetric,
6677  const Array<double> &bo,
6678  const Array<double> &bc,
6679  const Array<double> &bot,
6680  const Array<double> &bct,
6681  const Vector &pa_data,
6682  const Vector &x,
6683  Vector &y);
6684 
6685 template void SmemPAHcurlMassApply3D<5,6>(const int D1D,
6686  const int Q1D,
6687  const int NE,
6688  const bool symmetric,
6689  const Array<double> &bo,
6690  const Array<double> &bc,
6691  const Array<double> &bot,
6692  const Array<double> &bct,
6693  const Vector &pa_data,
6694  const Vector &x,
6695  Vector &y);
6696 
6697 } // namespace mfem
DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims...dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:136
void PAHcurlMassAssembleDiagonal2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Vector &pa_data, Vector &diag)
const int MAX_Q1D
Definition: forall.hpp:28
constexpr int HCURL_MAX_D1D
Definition: bilininteg.hpp:23
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:300
string space
void PAHcurlMassAssembleDiagonal3D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Vector &pa_data, Vector &diag)
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:442
const int MAX_D1D
Definition: forall.hpp:27
void PAHcurlMassApply2D(const int D1D, const int Q1D, const int NE, const bool symmetric, const Array< double > &bo, const Array< double > &bc, const Array< double > &bot, const Array< double > &bct, const Vector &pa_data, const Vector &x, Vector &y)
RefCoord t[3]
Vector data type.
Definition: vector.hpp:60
constexpr int HCURL_MAX_Q1D
Definition: bilininteg.hpp:25
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:426