MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_gradient_pa.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../bilininteg.hpp"
14#include "../gridfunc.hpp"
15#include "../qfunction.hpp"
16
17namespace mfem
18{
19
20/* Description of the *SetupND functions
21 Inputs are as follows
22 \b Q1D number of quadrature points in one dimension.
23 \b NE number of elements.
24 \b MAP_TYPE map type of test fe.
25 \b w quadrature weights.
26 \b j element Jacobians.
27 \b detj element Jacobian determinants.
28 \b c coefficient at quadrature points.
29
30 The function is used to precompute data needed at quadrature points during
31 the action. */
32
33/* Description of the *ApplyND functions
34 The template parameters are
35 \b T_D1D number of degrees of freedom in one dimension,
36 \b T_Q1D number of quadrature points in one dimension,
37 and are necessary to allow for compiler optimizations inside the kernel.
38
39 Inputs are as follows
40 \b NE number of elements.
41 \b B matrix of basis functions.
42 \b G matrix of derivatives of the basis functions.
43 \b Bt transpose of matrix of basis functions.
44 \b Gt transpose matrix of derivatives of the basis functions.
45 \b op data used during action of the element matrix in the tensor
46 product application.
47
48 \b x input vector of degrees of freedom on the element.
49 \b y output vector of degrees of freedom on the element.
50
51 The function computes the kernel for one dimension that is suitable for
52 tensor product action to form ND operators.
53 Most of the ND inputs are reshaped as NQ*(ND*ND)*NE data structure, i.e.
54 to allow indexing such as op(qpt,i,j,el).
55
56 The output data structure is dependent on the kernel and layout of the
57 dimension ND and element number, but in general resembles the action of the
58 element matrix in the tensor product application. */
59
60/* Description of the Smem*ApplyND functions
61 The shared memory (Smem) versions of the kernels differ from the regular
62 versions in the following properties.
63
64 \b mfem::forall is using only one level of parallelism.
65 \b mfem::forall_ND uses an additional level of parallelism
66 \b MFEM_FOREACH_THREAD
67
68 These macros allow automatic mapping of manually defined blocks to
69 underlying hardware threads. These threads can share memory by using
70 the \b MFEM_SHARED keyword for local arrays. */
71
72// PA Gradient Assemble 2D kernel
73static void PAGradientSetup2D(const int Q1D,
74 const int NE,
75 const int MAP_TYPE,
76 const Array<real_t> &w,
77 const Vector &j,
78 const Vector &detj,
79 const Vector &c,
80 Vector &op)
81{
82 const bool by_val = (MAP_TYPE == FiniteElement::VALUE);
83 const int NQ = Q1D*Q1D;
84 auto W = w.Read();
85 auto J = Reshape(j.Read(), NQ, 2, 2, NE);
86 auto DETJ = Reshape(detj.Read(), NQ, NE);
87 auto y = Reshape(op.Write(), NQ, 2, 2, NE);
88
89 const bool const_c = c.Size() == 1;
90 const auto C = const_c ? Reshape(c.Read(), 1,1) :
91 Reshape(c.Read(), NQ, NE);
92
93 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
94 {
95 for (int q = 0; q < NQ; ++q)
96 {
97 const real_t J11 = J(q,0,0,e);
98 const real_t J12 = J(q,0,1,e);
99 const real_t J21 = J(q,1,0,e);
100 const real_t J22 = J(q,1,1,e);
101 // Coefficient and weight
102 const real_t Co = const_c ? C(0,0) : C(q,e);
103 const real_t cw = W[q] * Co * (by_val ? 1.0 : 1.0/DETJ(q,e));
104 // Store wq * Q * adj(J)
105 y(q,0,0,e) = cw * J22; // 1,1
106 y(q,0,1,e) = cw * -J12; // 1,2
107 y(q,1,0,e) = cw * -J21; // 2,1
108 y(q,1,1,e) = cw * J11; // 2,2
109 }
110 });
111}
112
113// PA Gradient Assemble 3D kernel
114static void PAGradientSetup3D(const int Q1D,
115 const int NE,
116 const int MAP_TYPE,
117 const Array<real_t> &w,
118 const Vector &j,
119 const Vector &detj,
120 const Vector &c,
121 Vector &op)
122{
123 const bool by_val = (MAP_TYPE == FiniteElement::VALUE);
124 const int NQ = Q1D*Q1D*Q1D;
125 auto W = w.Read();
126 auto J = Reshape(j.Read(), NQ, 3, 3, NE);
127 auto DETJ = Reshape(detj.Read(), NQ, NE);
128 auto y = Reshape(op.Write(), NQ, 3, 3, NE);
129
130 const bool const_c = c.Size() == 1;
131 const auto C = const_c ? Reshape(c.Read(), 1,1) :
132 Reshape(c.Read(), NQ,NE);
133
134 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
135 {
136 for (int q = 0; q < NQ; ++q)
137 {
138 const real_t J11 = J(q,0,0,e);
139 const real_t J21 = J(q,1,0,e);
140 const real_t J31 = J(q,2,0,e);
141 const real_t J12 = J(q,0,1,e);
142 const real_t J22 = J(q,1,1,e);
143 const real_t J32 = J(q,2,1,e);
144 const real_t J13 = J(q,0,2,e);
145 const real_t J23 = J(q,1,2,e);
146 const real_t J33 = J(q,2,2,e);
147 // adj(J)
148 const real_t A11 = (J22 * J33) - (J23 * J32);
149 const real_t A12 = (J32 * J13) - (J12 * J33);
150 const real_t A13 = (J12 * J23) - (J22 * J13);
151 const real_t A21 = (J31 * J23) - (J21 * J33);
152 const real_t A22 = (J11 * J33) - (J13 * J31);
153 const real_t A23 = (J21 * J13) - (J11 * J23);
154 const real_t A31 = (J21 * J32) - (J31 * J22);
155 const real_t A32 = (J31 * J12) - (J11 * J32);
156 const real_t A33 = (J11 * J22) - (J12 * J21);
157 // Coefficient and weight
158 const real_t Co = const_c ? C(0,0) : C(q,e);
159 const real_t cw = W[q] * Co * (by_val ? 1.0 : 1.0/DETJ(q,e));
160 // Store wq * Q * adj(J)
161 y(q,0,0,e) = cw * A11; // 1,1
162 y(q,0,1,e) = cw * A12; // 1,2
163 y(q,0,2,e) = cw * A13; // 1,3
164 y(q,1,0,e) = cw * A21; // 2,1
165 y(q,1,1,e) = cw * A22; // 2,2
166 y(q,1,2,e) = cw * A23; // 2,3
167 y(q,2,0,e) = cw * A31; // 3,1
168 y(q,2,1,e) = cw * A32; // 3,2
169 y(q,2,2,e) = cw * A33; // 3,3
170 }
171 });
172}
173
174static void PAGradientSetup(const int dim,
175 const int TR_D1D,
176 const int TE_D1D,
177 const int Q1D,
178 const int NE,
179 const int MAP_TYPE,
180 const Array<real_t> &W,
181 const Vector &J,
182 const Vector &DET_J,
183 const Vector &COEFF,
184 Vector &op)
185{
186 if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAGradientSetup"); }
187 if (dim == 2)
188 {
189 PAGradientSetup2D(Q1D, NE, MAP_TYPE, W, J, DET_J, COEFF, op);
190 }
191 if (dim == 3)
192 {
193 PAGradientSetup3D(Q1D, NE, MAP_TYPE, W, J, DET_J, COEFF, op);
194 }
195}
196
198 const FiniteElementSpace &test_fes)
199{
200 // Assumes tensor-product elements ordered by nodes
201 MFEM_ASSERT(trial_fes.GetOrdering() == Ordering::byNODES,
202 "PA Only supports Ordering::byNODES!");
203 // Assuming the same element type
204 Mesh *mesh = trial_fes.GetMesh();
205 const FiniteElement &trial_fe = *trial_fes.GetTypicalFE(); // H1
206 const FiniteElement &test_fe = *test_fes.GetTypicalFE(); // H1^d or L2^d
208 const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe,
209 *trans);
210 const int dims = trial_fe.GetDim();
211 const int dimsToStore = dims * dims;
212 nq = ir->GetNPoints();
213 dim = mesh->Dimension();
214 ne = trial_fes.GetNE();
217 trial_maps = &trial_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
218 trial_dofs1D = trial_maps->ndof;
219 quad1D = trial_maps->nqpt;
220 test_maps = &test_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
221 test_dofs1D = test_maps->ndof;
222 MFEM_ASSERT(quad1D == test_maps->nqpt,
223 "PA requires test and trial space to have same number of quadrature points!");
224 pa_data.SetSize(nq * dimsToStore * ne, Device::GetMemoryType());
225
226 QuadratureSpace qs(*mesh, *ir);
228
229 PAGradientSetup(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
230 test_fe.GetMapType(), ir->GetWeights(), geom->J, geom->detJ,
231 coeff, pa_data);
232}
233
234// PA Gradient Apply 2D kernel
235template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
236static void PAGradientApply2D(const int NE,
237 const Array<real_t> &b,
238 const Array<real_t> &g,
239 const Array<real_t> &bt,
240 const Vector &op_,
241 const Vector &x_,
242 Vector &y_,
243 const int tr_d1d = 0,
244 const int te_d1d = 0,
245 const int q1d = 0)
246{
247 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
248 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
249 const int Q1D = T_Q1D ? T_Q1D : q1d;
250 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
251 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
252 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
253 auto B = Reshape(b.Read(), Q1D, TR_D1D);
254 auto G = Reshape(g.Read(), Q1D, TR_D1D);
255 auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
256 auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
257 auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, NE);
258 auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, 2, NE);
259 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
260 {
261 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
262 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
263 const int Q1D = T_Q1D ? T_Q1D : q1d;
264 const int VDIM = 2;
265 // the following variables are evaluated at compile time
266 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
267 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
268
269 real_t grad[max_Q1D][max_Q1D][VDIM];
270 for (int qy = 0; qy < Q1D; ++qy)
271 {
272 for (int qx = 0; qx < Q1D; ++qx)
273 {
274 grad[qy][qx][0] = 0.0;
275 grad[qy][qx][1] = 0.0;
276 }
277 }
278 for (int dy = 0; dy < TR_D1D; ++dy)
279 {
280 real_t gradX[max_Q1D][VDIM];
281 for (int qx = 0; qx < Q1D; ++qx)
282 {
283 gradX[qx][0] = 0.0;
284 gradX[qx][1] = 0.0;
285 }
286 for (int dx = 0; dx < TR_D1D; ++dx)
287 {
288 const real_t s = x(dx,dy,e);
289 for (int qx = 0; qx < Q1D; ++qx)
290 {
291 gradX[qx][0] += s * G(qx,dx);
292 gradX[qx][1] += s * B(qx,dx);
293 }
294 }
295 for (int qy = 0; qy < Q1D; ++qy)
296 {
297 const real_t wy = B(qy,dy);
298 const real_t wDy = G(qy,dy);
299 for (int qx = 0; qx < Q1D; ++qx)
300 {
301 grad[qy][qx][0] += gradX[qx][0] * wy;
302 grad[qy][qx][1] += gradX[qx][1] * wDy;
303 }
304 }
305 }
306 // We've now calculated grad(p) = [Dxy, xDy] in plane
307 for (int qy = 0; qy < Q1D; ++qy)
308 {
309 for (int qx = 0; qx < Q1D; ++qx)
310 {
311 const int q = qx + qy * Q1D;
312 const real_t gradX = grad[qy][qx][0];
313 const real_t gradY = grad[qy][qx][1];
314
315 grad[qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e);
316 grad[qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e);
317 }
318 }
319 // We've now calculated grad = grad p * op
320 for (int qy = 0; qy < Q1D; ++qy)
321 {
322 real_t opX[max_TE_D1D][VDIM];
323 for (int dx = 0; dx < TE_D1D; ++dx)
324 {
325 opX[dx][0] = 0.0;
326 opX[dx][1] = 0.0;
327 for (int qx = 0; qx < Q1D; ++qx)
328 {
329 opX[dx][0] += Bt(dx,qx)*grad[qy][qx][0];
330 opX[dx][1] += Bt(dx,qx)*grad[qy][qx][1];
331 }
332 }
333 for (int dy = 0; dy < TE_D1D; ++dy)
334 {
335 const real_t wy = Bt(dy,qy);
336 for (int dx = 0; dx < TE_D1D; ++dx)
337 {
338 y(dx,dy,0,e) += wy*opX[dx][0];
339 y(dx,dy,1,e) += wy*opX[dx][1];
340 }
341 }
342 }
343 // We've now calculated y = u * grad
344 });
345}
346
347// PA Gradient Apply 2D kernel transpose
348template<int T_TR_D1D = 0, int T_TE_D1D = 0, int T_Q1D = 0>
349static void PAGradientApplyTranspose2D(const int NE,
350 const Array<real_t> &bt,
351 const Array<real_t> &gt,
352 const Array<real_t> &b,
353 const Vector &op_,
354 const Vector &x_,
355 Vector &y_,
356 const int tr_d1d = 0,
357 const int te_d1d = 0,
358 const int q1d = 0)
359{
360 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
361 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
362 const int Q1D = T_Q1D ? T_Q1D : q1d;
363 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
364 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
365 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
366 auto Bt = Reshape(bt.Read(), TR_D1D, Q1D);
367 auto Gt = Reshape(gt.Read(), TR_D1D, Q1D);
368 auto B = Reshape(b.Read(), Q1D, TE_D1D);
369 auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
370 auto x = Reshape(x_.Read(), TE_D1D, TE_D1D, 2, NE);
371 auto y = Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, NE);
372 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
373 {
374 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
375 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
376 const int Q1D = T_Q1D ? T_Q1D : q1d;
377 const int VDIM = 2;
378 // the following variables are evaluated at compile time
379 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
380 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
381
382 // B: testdof-to-quad
383 real_t Bxy[max_Q1D][max_Q1D][VDIM];
384 for (int qy = 0; qy < Q1D; ++qy)
385 {
386 for (int qx = 0; qx < Q1D; ++qx)
387 {
388 Bxy[qy][qx][0] = 0.0;
389 Bxy[qy][qx][1] = 0.0;
390 }
391 }
392 for (int dy = 0; dy < TE_D1D; ++dy)
393 {
394 real_t Bx[max_Q1D][VDIM];
395 for (int qx = 0; qx < Q1D; ++qx)
396 {
397 Bx[qx][0] = 0.0;
398 Bx[qx][1] = 0.0;
399 }
400 for (int dx = 0; dx < TE_D1D; ++dx)
401 {
402 const real_t s0 = x(dx,dy,0,e);
403 const real_t s1 = x(dx,dy,1,e);
404 for (int qx = 0; qx < Q1D; ++qx)
405 {
406 Bx[qx][0] += s0 * B(qx,dx);
407 Bx[qx][1] += s1 * B(qx,dx);
408 }
409 }
410 for (int qy = 0; qy < Q1D; ++qy)
411 {
412 const real_t wy = B(qy,dy);
413 for (int qx = 0; qx < Q1D; ++qx)
414 {
415 Bxy[qy][qx][0] += Bx[qx][0] * wy;
416 Bxy[qy][qx][1] += Bx[qx][1] * wy;
417 }
418 }
419 }
420 // D: quad-to-quad
421 for (int qy = 0; qy < Q1D; ++qy)
422 {
423 for (int qx = 0; qx < Q1D; ++qx)
424 {
425 const int q = qx + qy * Q1D;
426 const real_t Bxy0 = Bxy[qy][qx][0];
427 const real_t Bxy1 = Bxy[qy][qx][1];
428
429 Bxy[qy][qx][0] = Bxy0*op(q,0,0,e) + Bxy1*op(q,0,1,e);
430 Bxy[qy][qx][1] = Bxy0*op(q,1,0,e) + Bxy1*op(q,1,1,e);
431 }
432 }
433 // Bt: quad-to-trialdof
434 for (int qy = 0; qy < Q1D; ++qy)
435 {
436 real_t Btx[max_TR_D1D][VDIM];
437 for (int dx = 0; dx < TR_D1D; ++dx)
438 {
439 Btx[dx][0] = 0.0;
440 Btx[dx][1] = 0.0;
441 for (int qx = 0; qx < Q1D; ++qx)
442 {
443 Btx[dx][0] += Gt(dx,qx)*Bxy[qy][qx][0];
444 Btx[dx][1] += Bt(dx,qx)*Bxy[qy][qx][1];
445 }
446 }
447 for (int dy = 0; dy < TR_D1D; ++dy)
448 {
449 const real_t wy = Bt(dy,qy);
450 const real_t wDy = Gt(dy,qy);
451 for (int dx = 0; dx < TR_D1D; ++dx)
452 {
453 y(dx,dy,e) += wy*Btx[dx][0] + wDy*Btx[dx][1];
454 }
455 }
456 }
457 });
458}
459
460// PA Gradient Apply 3D kernel
461template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
462static void PAGradientApply3D(const int NE,
463 const Array<real_t> &b,
464 const Array<real_t> &g,
465 const Array<real_t> &bt,
466 const Vector &op_,
467 const Vector &x_,
468 Vector &y_,
469 int tr_d1d = 0,
470 int te_d1d = 0,
471 int q1d = 0)
472{
473 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
474 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
475 const int Q1D = T_Q1D ? T_Q1D : q1d;
476 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
477 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
478 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
479 auto B = Reshape(b.Read(), Q1D, TR_D1D);
480 auto G = Reshape(g.Read(), Q1D, TR_D1D);
481 auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
482 auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
483 auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
484 auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
485 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
486 {
487 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
488 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
489 const int Q1D = T_Q1D ? T_Q1D : q1d;
490 const int VDIM = 3;
491 // the following variables are evaluated at compile time
492 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
493 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
494
495 real_t grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
496 for (int qz = 0; qz < Q1D; ++qz)
497 {
498 for (int qy = 0; qy < Q1D; ++qy)
499 {
500 for (int qx = 0; qx < Q1D; ++qx)
501 {
502 grad[qz][qy][qx][0] = 0.0;
503 grad[qz][qy][qx][1] = 0.0;
504 grad[qz][qy][qx][2] = 0.0;
505 }
506 }
507 }
508 for (int dz = 0; dz < TR_D1D; ++dz)
509 {
510 real_t gradXY[max_Q1D][max_Q1D][3];
511 for (int qy = 0; qy < Q1D; ++qy)
512 {
513 for (int qx = 0; qx < Q1D; ++qx)
514 {
515 gradXY[qy][qx][0] = 0.0;
516 gradXY[qy][qx][1] = 0.0;
517 gradXY[qy][qx][2] = 0.0;
518 }
519 }
520 for (int dy = 0; dy < TR_D1D; ++dy)
521 {
522 real_t gradX[max_Q1D][2];
523 for (int qx = 0; qx < Q1D; ++qx)
524 {
525 gradX[qx][0] = 0.0;
526 gradX[qx][1] = 0.0;
527 }
528 for (int dx = 0; dx < TR_D1D; ++dx)
529 {
530 const real_t s = x(dx,dy,dz,e);
531 for (int qx = 0; qx < Q1D; ++qx)
532 {
533 gradX[qx][0] += s * B(qx,dx);
534 gradX[qx][1] += s * G(qx,dx);
535 }
536 }
537 for (int qy = 0; qy < Q1D; ++qy)
538 {
539 const real_t wy = B(qy,dy);
540 const real_t wDy = G(qy,dy);
541 for (int qx = 0; qx < Q1D; ++qx)
542 {
543 const real_t wx = gradX[qx][0];
544 const real_t wDx = gradX[qx][1];
545 gradXY[qy][qx][0] += wDx * wy;
546 gradXY[qy][qx][1] += wx * wDy;
547 gradXY[qy][qx][2] += wx * wy;
548 }
549 }
550 }
551 for (int qz = 0; qz < Q1D; ++qz)
552 {
553 const real_t wz = B(qz,dz);
554 const real_t wDz = G(qz,dz);
555 for (int qy = 0; qy < Q1D; ++qy)
556 {
557 for (int qx = 0; qx < Q1D; ++qx)
558 {
559 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
560 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
561 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
562 }
563 }
564 }
565 }
566 // We've now calculated grad(p) = [Dxyz, xDyz, xyDz] in plane
567 for (int qz = 0; qz < Q1D; ++qz)
568 {
569 for (int qy = 0; qy < Q1D; ++qy)
570 {
571 for (int qx = 0; qx < Q1D; ++qx)
572 {
573 const int q = qx + (qy + qz * Q1D) * Q1D;
574 const real_t gradX = grad[qz][qy][qx][0];
575 const real_t gradY = grad[qz][qy][qx][1];
576 const real_t gradZ = grad[qz][qy][qx][2];
577
578 grad[qz][qy][qx][0] = gradX*op(q,0,0,e) + gradY*op(q,1,0,e) + gradZ*op(q,2,0,e);
579 grad[qz][qy][qx][1] = gradX*op(q,0,1,e) + gradY*op(q,1,1,e) + gradZ*op(q,2,1,e);
580 grad[qz][qy][qx][2] = gradX*op(q,0,2,e) + gradY*op(q,1,2,e) + gradZ*op(q,2,2,e);
581 }
582 }
583 }
584 // We've now calculated grad = grad p * op
585 for (int qz = 0; qz < Q1D; ++qz)
586 {
587 real_t opXY[max_TE_D1D][max_TE_D1D][VDIM];
588 for (int dy = 0; dy < TE_D1D; ++dy)
589 {
590 for (int dx = 0; dx < TE_D1D; ++dx)
591 {
592 opXY[dy][dx][0] = 0.0;
593 opXY[dy][dx][1] = 0.0;
594 opXY[dy][dx][2] = 0.0;
595 }
596 }
597 for (int qy = 0; qy < Q1D; ++qy)
598 {
599 real_t opX[max_TE_D1D][VDIM];
600 for (int dx = 0; dx < TE_D1D; ++dx)
601 {
602 opX[dx][0] = 0.0;
603 opX[dx][1] = 0.0;
604 opX[dx][2] = 0.0;
605 for (int qx = 0; qx < Q1D; ++qx)
606 {
607 opX[dx][0] += Bt(dx,qx)*grad[qz][qy][qx][0];
608 opX[dx][1] += Bt(dx,qx)*grad[qz][qy][qx][1];
609 opX[dx][2] += Bt(dx,qx)*grad[qz][qy][qx][2];
610 }
611 }
612 for (int dy = 0; dy < TE_D1D; ++dy)
613 {
614 for (int dx = 0; dx < TE_D1D; ++dx)
615 {
616 opXY[dy][dx][0] += Bt(dy,qy)*opX[dx][0];
617 opXY[dy][dx][1] += Bt(dy,qy)*opX[dx][1];
618 opXY[dy][dx][2] += Bt(dy,qy)*opX[dx][2];
619 }
620 }
621 }
622 for (int dz = 0; dz < TE_D1D; ++dz)
623 {
624 for (int dy = 0; dy < TE_D1D; ++dy)
625 {
626 for (int dx = 0; dx < TE_D1D; ++dx)
627 {
628 y(dx,dy,dz,0,e) += Bt(dz,qz)*opXY[dy][dx][0];
629 y(dx,dy,dz,1,e) += Bt(dz,qz)*opXY[dy][dx][1];
630 y(dx,dy,dz,2,e) += Bt(dz,qz)*opXY[dy][dx][2];
631 }
632 }
633 }
634 }
635 // We've now calculated y = u * grad
636 });
637}
638
639// PA Gradient Apply 3D kernel
640template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
641static void PAGradientApplyTranspose3D(const int NE,
642 const Array<real_t> &bt,
643 const Array<real_t> &gt,
644 const Array<real_t> &b,
645 const Vector &op_,
646 const Vector &x_,
647 Vector &y_,
648 int tr_d1d = 0,
649 int te_d1d = 0,
650 int q1d = 0)
651{
652 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
653 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
654 const int Q1D = T_Q1D ? T_Q1D : q1d;
655 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
656 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
657 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
658 auto Bt = Reshape(bt.Read(), TR_D1D, Q1D);
659 auto Gt = Reshape(gt.Read(), TR_D1D, Q1D);
660 auto B = Reshape(b.Read(), Q1D, TE_D1D);
661 auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
662 auto x = Reshape(x_.Read(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
663 auto y = Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, TR_D1D, NE);
664 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
665 {
666 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
667 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
668 const int Q1D = T_Q1D ? T_Q1D : q1d;
669 const int VDIM = 3;
670 // the following variables are evaluated at compile time
671 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
672 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
673
674 // B: testdof-to-quad
675 real_t Bxyz[max_Q1D][max_Q1D][max_Q1D][VDIM];
676 for (int qz = 0; qz < Q1D; ++qz)
677 {
678 for (int qy = 0; qy < Q1D; ++qy)
679 {
680 for (int qx = 0; qx < Q1D; ++qx)
681 {
682 Bxyz[qz][qy][qx][0] = 0.0;
683 Bxyz[qz][qy][qx][1] = 0.0;
684 Bxyz[qz][qy][qx][2] = 0.0;
685 }
686 }
687 }
688 for (int dz = 0; dz < TE_D1D; ++dz)
689 {
690 real_t Bxy[max_Q1D][max_Q1D][3];
691 for (int qy = 0; qy < Q1D; ++qy)
692 {
693 for (int qx = 0; qx < Q1D; ++qx)
694 {
695 Bxy[qy][qx][0] = 0.0;
696 Bxy[qy][qx][1] = 0.0;
697 Bxy[qy][qx][2] = 0.0;
698 }
699 }
700 for (int dy = 0; dy < TE_D1D; ++dy)
701 {
702 real_t Bx[max_Q1D][3];
703 for (int qx = 0; qx < Q1D; ++qx)
704 {
705 Bx[qx][0] = 0.0;
706 Bx[qx][1] = 0.0;
707 Bx[qx][2] = 0.0;
708 }
709 for (int dx = 0; dx < TE_D1D; ++dx)
710 {
711 const real_t s0 = x(dx,dy,dz,0,e);
712 const real_t s1 = x(dx,dy,dz,1,e);
713 const real_t s2 = x(dx,dy,dz,2,e);
714 for (int qx = 0; qx < Q1D; ++qx)
715 {
716 Bx[qx][0] += s0 * B(qx,dx);
717 Bx[qx][1] += s1 * B(qx,dx);
718 Bx[qx][2] += s2 * B(qx,dx);
719 }
720 }
721 for (int qy = 0; qy < Q1D; ++qy)
722 {
723 const real_t wy = B(qy,dy);
724 for (int qx = 0; qx < Q1D; ++qx)
725 {
726 Bxy[qy][qx][0] += Bx[qx][0] * wy;
727 Bxy[qy][qx][1] += Bx[qx][1] * wy;
728 Bxy[qy][qx][2] += Bx[qx][2] * wy;
729 }
730 }
731 }
732 for (int qz = 0; qz < Q1D; ++qz)
733 {
734 const real_t wz = B(qz,dz);
735 for (int qy = 0; qy < Q1D; ++qy)
736 {
737 for (int qx = 0; qx < Q1D; ++qx)
738 {
739 Bxyz[qz][qy][qx][0] += Bxy[qy][qx][0] * wz;
740 Bxyz[qz][qy][qx][1] += Bxy[qy][qx][1] * wz;
741 Bxyz[qz][qy][qx][2] += Bxy[qy][qx][2] * wz;
742 }
743 }
744 }
745 }
746 // D: quad-to-quad
747 for (int qz = 0; qz < Q1D; ++qz)
748 {
749 for (int qy = 0; qy < Q1D; ++qy)
750 {
751 for (int qx = 0; qx < Q1D; ++qx)
752 {
753 const int q = qx + (qy + qz * Q1D) * Q1D;
754 const real_t Bxyz0 = Bxyz[qz][qy][qx][0];
755 const real_t Bxyz1 = Bxyz[qz][qy][qx][1];
756 const real_t Bxyz2 = Bxyz[qz][qy][qx][2];
757
758 Bxyz[qz][qy][qx][0] = Bxyz0*op(q,0,0,e) + Bxyz1*op(q,0,1,e) + Bxyz2*op(q,0,2,e);
759 Bxyz[qz][qy][qx][1] = Bxyz0*op(q,1,0,e) + Bxyz1*op(q,1,1,e) + Bxyz2*op(q,1,2,e);
760 Bxyz[qz][qy][qx][2] = Bxyz0*op(q,2,0,e) + Bxyz1*op(q,2,1,e) + Bxyz2*op(q,2,2,e);
761 }
762 }
763 }
764 // Bt: quad-to-trialdof
765 for (int qz = 0; qz < Q1D; ++qz)
766 {
767 real_t Btxy[max_TR_D1D][max_TR_D1D][VDIM];
768 for (int dy = 0; dy < TR_D1D; ++dy)
769 {
770 for (int dx = 0; dx < TR_D1D; ++dx)
771 {
772 Btxy[dy][dx][0] = 0.0;
773 Btxy[dy][dx][1] = 0.0;
774 Btxy[dy][dx][2] = 0.0;
775 }
776 }
777 for (int qy = 0; qy < Q1D; ++qy)
778 {
779 real_t Btx[max_TR_D1D][VDIM];
780 for (int dx = 0; dx < TR_D1D; ++dx)
781 {
782 Btx[dx][0] = 0.0;
783 Btx[dx][1] = 0.0;
784 Btx[dx][2] = 0.0;
785 for (int qx = 0; qx < Q1D; ++qx)
786 {
787 Btx[dx][0] += Gt(dx,qx)*Bxyz[qz][qy][qx][0];
788 Btx[dx][1] += Bt(dx,qx)*Bxyz[qz][qy][qx][1];
789 Btx[dx][2] += Bt(dx,qx)*Bxyz[qz][qy][qx][2];
790 }
791 }
792 for (int dy = 0; dy < TR_D1D; ++dy)
793 {
794 const real_t wy = Bt(dy,qy);
795 const real_t wDy = Gt(dy,qy);
796 for (int dx = 0; dx < TR_D1D; ++dx)
797 {
798 Btxy[dy][dx][0] += wy *Btx[dx][0];
799 Btxy[dy][dx][1] += wDy*Btx[dx][1];
800 Btxy[dy][dx][2] += wy *Btx[dx][2];
801 }
802 }
803 }
804 for (int dz = 0; dz < TR_D1D; ++dz)
805 {
806 const real_t wz = Bt(dz,qz);
807 const real_t wDz = Gt(dz,qz);
808 for (int dy = 0; dy < TR_D1D; ++dy)
809 {
810 for (int dx = 0; dx < TR_D1D; ++dx)
811 {
812 y(dx,dy,dz,e) += wz *Btxy[dy][dx][0] +
813 wz *Btxy[dy][dx][1] +
814 wDz*Btxy[dy][dx][2];
815 }
816 }
817 }
818 }
819 });
820}
821
822// Shared memory PA Gradient Apply 3D kernel
823template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
824static void SmemPAGradientApply3D(const int NE,
825 const Array<real_t> &b_,
826 const Array<real_t> &g_,
827 const Array<real_t> &bt_,
828 const Vector &d_,
829 const Vector &x_,
830 Vector &y_,
831 const int tr_d1d = 0,
832 const int te_d1d = 0,
833 const int q1d = 0)
834{
835 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
836 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
837 const int Q1D = T_Q1D ? T_Q1D : q1d;
838
839 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
840 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
841 MFEM_VERIFY(TR_D1D <= Q1D, "");
842 MFEM_VERIFY(TE_D1D <= Q1D, "");
843 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
844
845 auto b = Reshape(b_.Read(), Q1D, TR_D1D);
846 auto g = Reshape(g_.Read(), Q1D, TR_D1D);
847 auto bt = Reshape(bt_.Read(), TE_D1D, Q1D);
848 auto D = Reshape(d_.Read(), Q1D*Q1D*Q1D, 3, 3, NE);
849 auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, NE);
850 auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, 3, NE);
851
852 mfem::forall_3D(NE, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D, (Q1D>8)?8:Q1D,
853 [=] MFEM_HOST_DEVICE (int e)
854 {
855 const int tidz = MFEM_THREAD_ID(z);
856 const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
857 const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
858 const int Q1D = T_Q1D ? T_Q1D : q1d;
859 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
860 constexpr int MD1R = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
861 constexpr int MD1E = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
862 constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
863 constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
864 MFEM_SHARED real_t sBG[2][MQ1*MD1];
865 real_t (*B)[MD1] = (real_t (*)[MD1]) (sBG+0);
866 real_t (*G)[MD1] = (real_t (*)[MD1]) (sBG+1);
867 real_t (*Bt)[MQ1] = (real_t (*)[MQ1]) (sBG+0);
868 MFEM_SHARED real_t sm0[3][MDQ*MDQ*MDQ];
869 MFEM_SHARED real_t sm1[3][MDQ*MDQ*MDQ];
870 real_t (*X)[MD1][MD1] = (real_t (*)[MD1][MD1]) (sm0+2);
871 real_t (*DDQ0)[MD1][MQ1] = (real_t (*)[MD1][MQ1]) (sm0+0);
872 real_t (*DDQ1)[MD1][MQ1] = (real_t (*)[MD1][MQ1]) (sm0+1);
873
874 real_t (*DQQ0)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm1+0);
875 real_t (*DQQ1)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm1+1);
876 real_t (*DQQ2)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm1+2);
877
878 real_t (*QQQ0)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm0+0);
879 real_t (*QQQ1)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm0+1);
880 real_t (*QQQ2)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm0+2);
881
882 real_t (*QQD0)[MQ1][MD1] = (real_t (*)[MQ1][MD1]) (sm1+0);
883 real_t (*QQD1)[MQ1][MD1] = (real_t (*)[MQ1][MD1]) (sm1+1);
884 real_t (*QQD2)[MQ1][MD1] = (real_t (*)[MQ1][MD1]) (sm1+2);
885
886 real_t (*QDD0)[MD1][MD1] = (real_t (*)[MD1][MD1]) (sm0+0);
887 real_t (*QDD1)[MD1][MD1] = (real_t (*)[MD1][MD1]) (sm0+1);
888 real_t (*QDD2)[MD1][MD1] = (real_t (*)[MD1][MD1]) (sm0+2);
889 MFEM_FOREACH_THREAD(dz,z,D1DR)
890 {
891 MFEM_FOREACH_THREAD(dy,y,D1DR)
892 {
893 MFEM_FOREACH_THREAD(dx,x,D1DR)
894 {
895 X[dz][dy][dx] = x(dx,dy,dz,e);
896 }
897 }
898 }
899 if (tidz == 0)
900 {
901 MFEM_FOREACH_THREAD(d,y,D1DR)
902 {
903 MFEM_FOREACH_THREAD(q,x,Q1D)
904 {
905 B[q][d] = b(q,d);
906 G[q][d] = g(q,d);
907 }
908 }
909 }
910 MFEM_SYNC_THREAD;
911 MFEM_FOREACH_THREAD(dz,z,D1DR)
912 {
913 MFEM_FOREACH_THREAD(dy,y,D1DR)
914 {
915 MFEM_FOREACH_THREAD(qx,x,Q1D)
916 {
917 real_t u = 0.0;
918 real_t v = 0.0;
919 for (int dx = 0; dx < D1DR; ++dx)
920 {
921 const real_t coord = X[dz][dy][dx];
922 u += coord * B[qx][dx];
923 v += coord * G[qx][dx];
924 }
925 DDQ0[dz][dy][qx] = u;
926 DDQ1[dz][dy][qx] = v;
927 }
928 }
929 }
930 MFEM_SYNC_THREAD;
931 MFEM_FOREACH_THREAD(dz,z,D1DR)
932 {
933 MFEM_FOREACH_THREAD(qy,y,Q1D)
934 {
935 MFEM_FOREACH_THREAD(qx,x,Q1D)
936 {
937 real_t u = 0.0;
938 real_t v = 0.0;
939 real_t w = 0.0;
940 for (int dy = 0; dy < D1DR; ++dy)
941 {
942 u += DDQ1[dz][dy][qx] * B[qy][dy];
943 v += DDQ0[dz][dy][qx] * G[qy][dy];
944 w += DDQ0[dz][dy][qx] * B[qy][dy];
945 }
946 DQQ0[dz][qy][qx] = u;
947 DQQ1[dz][qy][qx] = v;
948 DQQ2[dz][qy][qx] = w;
949 }
950 }
951 }
952 MFEM_SYNC_THREAD;
953 MFEM_FOREACH_THREAD(qz,z,Q1D)
954 {
955 MFEM_FOREACH_THREAD(qy,y,Q1D)
956 {
957 MFEM_FOREACH_THREAD(qx,x,Q1D)
958 {
959 real_t u = 0.0;
960 real_t v = 0.0;
961 real_t w = 0.0;
962 for (int dz = 0; dz < D1DR; ++dz)
963 {
964 u += DQQ0[dz][qy][qx] * B[qz][dz];
965 v += DQQ1[dz][qy][qx] * B[qz][dz];
966 w += DQQ2[dz][qy][qx] * G[qz][dz];
967 }
968 QQQ0[qz][qy][qx] = u;
969 QQQ1[qz][qy][qx] = v;
970 QQQ2[qz][qy][qx] = w;
971 }
972 }
973 }
974 MFEM_SYNC_THREAD;
975 MFEM_FOREACH_THREAD(qz,z,Q1D)
976 {
977 MFEM_FOREACH_THREAD(qy,y,Q1D)
978 {
979 MFEM_FOREACH_THREAD(qx,x,Q1D)
980 {
981 const int q = qx + (qy + qz * Q1D) * Q1D;
982 const real_t gX = QQQ0[qz][qy][qx];
983 const real_t gY = QQQ1[qz][qy][qx];
984 const real_t gZ = QQQ2[qz][qy][qx];
985 QQQ0[qz][qy][qx] = (D(q,0,0,e)*gX) + (D(q,1,0,e)*gY) + (D(q,2,0,e)*gZ);
986 QQQ1[qz][qy][qx] = (D(q,0,1,e)*gX) + (D(q,1,1,e)*gY) + (D(q,2,1,e)*gZ);
987 QQQ2[qz][qy][qx] = (D(q,0,2,e)*gX) + (D(q,1,2,e)*gY) + (D(q,2,2,e)*gZ);
988 }
989 }
990 }
991 MFEM_SYNC_THREAD;
992 if (tidz == 0)
993 {
994 MFEM_FOREACH_THREAD(d,y,D1DE)
995 {
996 MFEM_FOREACH_THREAD(q,x,Q1D)
997 {
998 Bt[d][q] = bt(d,q);
999 }
1000 }
1001 }
1002 MFEM_SYNC_THREAD;
1003 MFEM_FOREACH_THREAD(qz,z,Q1D)
1004 {
1005 MFEM_FOREACH_THREAD(qy,y,Q1D)
1006 {
1007 MFEM_FOREACH_THREAD(dx,x,D1DE)
1008 {
1009 real_t u = 0.0;
1010 real_t v = 0.0;
1011 real_t w = 0.0;
1012 for (int qx = 0; qx < Q1D; ++qx)
1013 {
1014 u += QQQ0[qz][qy][qx] * Bt[dx][qx];
1015 v += QQQ1[qz][qy][qx] * Bt[dx][qx];
1016 w += QQQ2[qz][qy][qx] * Bt[dx][qx];
1017 }
1018 QQD0[qz][qy][dx] = u;
1019 QQD1[qz][qy][dx] = v;
1020 QQD2[qz][qy][dx] = w;
1021 }
1022 }
1023 }
1024 MFEM_SYNC_THREAD;
1025 MFEM_FOREACH_THREAD(qz,z,Q1D)
1026 {
1027 MFEM_FOREACH_THREAD(dy,y,D1DE)
1028 {
1029 MFEM_FOREACH_THREAD(dx,x,D1DE)
1030 {
1031 real_t u = 0.0;
1032 real_t v = 0.0;
1033 real_t w = 0.0;
1034 for (int qy = 0; qy < Q1D; ++qy)
1035 {
1036 u += QQD0[qz][qy][dx] * Bt[dy][qy];
1037 v += QQD1[qz][qy][dx] * Bt[dy][qy];
1038 w += QQD2[qz][qy][dx] * Bt[dy][qy];
1039 }
1040 QDD0[qz][dy][dx] = u;
1041 QDD1[qz][dy][dx] = v;
1042 QDD2[qz][dy][dx] = w;
1043 }
1044 }
1045 }
1046 MFEM_SYNC_THREAD;
1047 MFEM_FOREACH_THREAD(dz,z,D1DE)
1048 {
1049 MFEM_FOREACH_THREAD(dy,y,D1DE)
1050 {
1051 MFEM_FOREACH_THREAD(dx,x,D1DE)
1052 {
1053 real_t u = 0.0;
1054 real_t v = 0.0;
1055 real_t w = 0.0;
1056 for (int qz = 0; qz < Q1D; ++qz)
1057 {
1058 u += QDD0[qz][dy][dx] * Bt[dz][qz];
1059 v += QDD1[qz][dy][dx] * Bt[dz][qz];
1060 w += QDD2[qz][dy][dx] * Bt[dz][qz];
1061 }
1062 y(dx,dy,dz,0,e) += u;
1063 y(dx,dy,dz,1,e) += v;
1064 y(dx,dy,dz,2,e) += w;
1065 }
1066 }
1067 }
1068 });
1069}
1070
1071static void PAGradientApply(const int dim,
1072 const int TR_D1D,
1073 const int TE_D1D,
1074 const int Q1D,
1075 const int NE,
1076 const Array<real_t> &B,
1077 const Array<real_t> &G,
1078 const Array<real_t> &Bt,
1079 const Vector &op,
1080 const Vector &x,
1081 Vector &y)
1082{
1083 if (dim == 2)
1084 {
1085 return PAGradientApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1086 }
1087 if (dim == 3)
1088 {
1089 return PAGradientApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1090 }
1091 MFEM_ABORT("Unknown kernel.");
1092}
1093
1094static void PAGradientApplyTranspose(const int dim,
1095 const int TR_D1D,
1096 const int TE_D1D,
1097 const int Q1D,
1098 const int NE,
1099 const Array<real_t> &Bt,
1100 const Array<real_t> &Gt,
1101 const Array<real_t> &B,
1102 const Vector &op,
1103 const Vector &x,
1104 Vector &y)
1105{
1106 if (dim == 2)
1107 {
1108 return PAGradientApplyTranspose2D(NE,Bt,Gt,B,op,x,y,TR_D1D,TE_D1D,Q1D);
1109 }
1110 if (dim == 3)
1111 {
1112 return PAGradientApplyTranspose3D(NE,Bt,Gt,B,op,x,y,TR_D1D,TE_D1D,Q1D);
1113 }
1114 MFEM_ABORT("Unknown kernel.");
1115}
1116
1117// PA Gradient Apply kernel
1119{
1120 PAGradientApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1121 trial_maps->B, trial_maps->G, test_maps->Bt, pa_data,
1122 x, y);
1123}
1124
1125// PA Gradient Apply transpose kernel
1127{
1128 PAGradientApplyTranspose(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1129 trial_maps->Bt, trial_maps->Gt, test_maps->B, pa_data,
1130 x, y);
1131}
1132
1133} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
Class to represent a coefficient evaluated at quadrature points.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition device.hpp:278
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition fe_base.hpp:165
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:178
Array< real_t > Gt
Transpose of G.
Definition fe_base.hpp:221
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:182
Array< real_t > Bt
Transpose of B.
Definition fe_base.hpp:199
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
Ordering::Type GetOrdering() const
Return the ordering method.
Definition fespace.hpp:876
int GetNE() const
Returns number of elements in the mesh.
Definition fespace.hpp:891
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:679
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Definition fespace.cpp:3871
Abstract class for all finite elements.
Definition fe_base.hpp:244
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition fe_base.cpp:365
int GetDim() const
Returns the reference space dimension for the finite element.
Definition fe_base.hpp:321
int GetMapType() const
Returns the FiniteElement::MapType of the element describing how reference functions are mapped to ph...
Definition fe_base.hpp:360
Vector detJ
Determinants of the Jacobians at all quadrature points.
Definition mesh.hpp:2982
Vector J
Jacobians of the element transformations at all quadrature points.
Definition mesh.hpp:2976
static const IntegrationRule & GetRule(const FiniteElement &trial_fe, const FiniteElement &test_fe, const ElementTransformation &Trans)
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
void AddMultPA(const Vector &x, Vector &y) const override
Method for partially assembled action.
void AssemblePA(const FiniteElementSpace &trial_fes, const FiniteElementSpace &test_fes) override
Class for an integration rule - an Array of IntegrationPoint.
Definition intrules.hpp:100
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition intrules.hpp:256
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition intrules.cpp:86
const IntegrationRule * IntRule
Mesh data type.
Definition mesh.hpp:64
int Dimension() const
Dimension of the reference space used within the elements.
Definition mesh.hpp:1216
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
Definition mesh.cpp:390
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition mesh.cpp:880
Class representing the storage layout of a QuadratureFunction.
Definition qspace.hpp:120
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
int dim
Definition ex24.cpp:53
void trans(const Vector &u, Vector &x)
Definition ex27.cpp:412
real_t b
Definition lissajous.cpp:42
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:341
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
@ COMPRESSED
Enable all above compressions.
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
void forall(int N, lambda &&body)
Definition forall.hpp:753
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128