MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_vecdiv_pa.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../bilininteg.hpp"
14#include "../gridfunc.hpp"
15
16namespace mfem
17{
18
19// PA Divergence Assemble 2D kernel
20static void PADivergenceSetup2D(const int Q1D,
21 const int NE,
22 const Array<real_t> &w,
23 const Vector &j,
24 const real_t COEFF,
25 Vector &op)
26{
27 const int NQ = Q1D*Q1D;
28 auto W = w.Read();
29 auto J = Reshape(j.Read(), NQ, 2, 2, NE);
30 auto y = Reshape(op.Write(), NQ, 2, 2, NE);
31
32 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
33 {
34 for (int q = 0; q < NQ; ++q)
35 {
36 const real_t J11 = J(q,0,0,e);
37 const real_t J12 = J(q,0,1,e);
38 const real_t J21 = J(q,1,0,e);
39 const real_t J22 = J(q,1,1,e);
40 // Store wq * Q * adj(J)
41 y(q,0,0,e) = W[q] * COEFF * J22; // 1,1
42 y(q,0,1,e) = W[q] * COEFF * -J12; // 1,2
43 y(q,1,0,e) = W[q] * COEFF * -J21; // 2,1
44 y(q,1,1,e) = W[q] * COEFF * J11; // 2,2
45 }
46 });
47}
48
49// PA Divergence Assemble 3D kernel
50static void PADivergenceSetup3D(const int Q1D,
51 const int NE,
52 const Array<real_t> &w,
53 const Vector &j,
54 const real_t COEFF,
55 Vector &op)
56{
57 const int NQ = Q1D*Q1D*Q1D;
58 auto W = w.Read();
59 auto J = Reshape(j.Read(), NQ, 3, 3, NE);
60 auto y = Reshape(op.Write(), NQ, 3, 3, NE);
61 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
62 {
63 for (int q = 0; q < NQ; ++q)
64 {
65 const real_t J11 = J(q,0,0,e);
66 const real_t J21 = J(q,1,0,e);
67 const real_t J31 = J(q,2,0,e);
68 const real_t J12 = J(q,0,1,e);
69 const real_t J22 = J(q,1,1,e);
70 const real_t J32 = J(q,2,1,e);
71 const real_t J13 = J(q,0,2,e);
72 const real_t J23 = J(q,1,2,e);
73 const real_t J33 = J(q,2,2,e);
74 const real_t cw = W[q] * COEFF;
75 // adj(J)
76 const real_t A11 = (J22 * J33) - (J23 * J32);
77 const real_t A12 = (J32 * J13) - (J12 * J33);
78 const real_t A13 = (J12 * J23) - (J22 * J13);
79 const real_t A21 = (J31 * J23) - (J21 * J33);
80 const real_t A22 = (J11 * J33) - (J13 * J31);
81 const real_t A23 = (J21 * J13) - (J11 * J23);
82 const real_t A31 = (J21 * J32) - (J31 * J22);
83 const real_t A32 = (J31 * J12) - (J11 * J32);
84 const real_t A33 = (J11 * J22) - (J12 * J21);
85 // Store wq * Q * adj(J)
86 y(q,0,0,e) = cw * A11; // 1,1
87 y(q,0,1,e) = cw * A12; // 1,2
88 y(q,0,2,e) = cw * A13; // 1,3
89 y(q,1,0,e) = cw * A21; // 2,1
90 y(q,1,1,e) = cw * A22; // 2,2
91 y(q,1,2,e) = cw * A23; // 2,3
92 y(q,2,0,e) = cw * A31; // 3,1
93 y(q,2,1,e) = cw * A32; // 3,2
94 y(q,2,2,e) = cw * A33; // 3,3
95 }
96 });
97}
98
99static void PADivergenceSetup(const int dim,
100 const int TR_D1D,
101 const int TE_D1D,
102 const int Q1D,
103 const int NE,
104 const Array<real_t> &W,
105 const Vector &J,
106 const real_t COEFF,
107 Vector &op)
108{
109 if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADivergenceSetup"); }
110 if (dim == 2)
111 {
112 PADivergenceSetup2D(Q1D, NE, W, J, COEFF, op);
113 }
114 if (dim == 3)
115 {
116 PADivergenceSetup3D(Q1D, NE, W, J, COEFF, op);
117 }
118}
119
121 const FiniteElementSpace &test_fes)
122{
123 // Assumes tensor-product elements ordered by nodes
124 MFEM_ASSERT(trial_fes.GetOrdering() == Ordering::byNODES,
125 "PA Only supports Ordering::byNODES!");
126 Mesh *mesh = trial_fes.GetMesh();
127 const FiniteElement &trial_fe = *trial_fes.GetTypicalFE();
128 const FiniteElement &test_fe = *test_fes.GetTypicalFE();
130 const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe,
131 *trans);
132 const int dims = trial_fe.GetDim();
133 const int dimsToStore = dims * dims;
134 nq = ir->GetNPoints();
135 dim = mesh->Dimension();
136 ne = trial_fes.GetNE();
138 trial_maps = &trial_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
139 trial_dofs1D = trial_maps->ndof;
140 quad1D = trial_maps->nqpt;
141 test_maps = &test_fe.GetDofToQuad(*ir, DofToQuad::TENSOR);
142 test_dofs1D = test_maps->ndof;
143 MFEM_ASSERT(quad1D == test_maps->nqpt,
144 "PA requires test and trial space to have same number of quadrature points!");
145 pa_data.SetSize(nq * dimsToStore * ne, Device::GetMemoryType());
146 real_t coeff = 1.0;
147 if (Q)
148 {
149 ConstantCoefficient *cQ = dynamic_cast<ConstantCoefficient*>(Q);
150 MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!");
151 coeff = cQ->constant;
152 }
153 PADivergenceSetup(dim, trial_dofs1D, test_dofs1D, quad1D,
154 ne, ir->GetWeights(), geom->J, coeff, pa_data);
155}
156
157// PA Divergence Apply 2D kernel
158template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
159static void PADivergenceApply2D(const int NE,
160 const Array<real_t> &b,
161 const Array<real_t> &g,
162 const Array<real_t> &bt,
163 const Vector &op_,
164 const Vector &x_,
165 Vector &y_,
166 const int tr_d1d = 0,
167 const int te_d1d = 0,
168 const int q1d = 0)
169{
170 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
171 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
172 const int Q1D = T_Q1D ? T_Q1D : q1d;
173 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
174 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
175 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
176 auto B = Reshape(b.Read(), Q1D, TR_D1D);
177 auto G = Reshape(g.Read(), Q1D, TR_D1D);
178 auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
179 auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
180 auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, 2, NE);
181 auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, NE);
182 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
183 {
184 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
185 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
186 const int Q1D = T_Q1D ? T_Q1D : q1d;
187 const int VDIM = 2;
188 // the following variables are evaluated at compile time
189 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
190 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
191
192 real_t grad[max_Q1D][max_Q1D][VDIM];
193 real_t div[max_Q1D][max_Q1D];
194 for (int qy = 0; qy < Q1D; ++qy)
195 {
196 for (int qx = 0; qx < Q1D; ++qx)
197 {
198 div[qy][qx] = 0.0;
199 }
200 }
201
202 for (int c = 0; c < VDIM; ++c)
203 {
204 for (int qy = 0; qy < Q1D; ++qy)
205 {
206 for (int qx = 0; qx < Q1D; ++qx)
207 {
208 grad[qy][qx][0] = 0.0;
209 grad[qy][qx][1] = 0.0;
210 }
211 }
212 for (int dy = 0; dy < TR_D1D; ++dy)
213 {
214 real_t gradX[max_Q1D][VDIM];
215 for (int qx = 0; qx < Q1D; ++qx)
216 {
217 gradX[qx][0] = 0.0;
218 gradX[qx][1] = 0.0;
219 }
220 for (int dx = 0; dx < TR_D1D; ++dx)
221 {
222 const real_t s = x(dx,dy,c,e);
223 for (int qx = 0; qx < Q1D; ++qx)
224 {
225 gradX[qx][0] += s * G(qx,dx);
226 gradX[qx][1] += s * B(qx,dx);
227 }
228 }
229 for (int qy = 0; qy < Q1D; ++qy)
230 {
231 const real_t wy = B(qy,dy);
232 const real_t wDy = G(qy,dy);
233 for (int qx = 0; qx < Q1D; ++qx)
234 {
235 grad[qy][qx][0] += gradX[qx][0] * wy;
236 grad[qy][qx][1] += gradX[qx][1] * wDy;
237 }
238 }
239 }
240 // We've now calculated grad(u_c) = [Dxy_1, xDy_2] in plane
241 for (int qy = 0; qy < Q1D; ++qy)
242 {
243 for (int qx = 0; qx < Q1D; ++qx)
244 {
245 const int q = qx + qy * Q1D;
246 const real_t gradX = grad[qy][qx][0];
247 const real_t gradY = grad[qy][qx][1];
248
249 div[qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e);
250 }
251 }
252 }
253 // We've now calculated div = reshape(div phi * op) * u
254 for (int qy = 0; qy < Q1D; ++qy)
255 {
256 real_t opX[max_TE_D1D];
257 for (int dx = 0; dx < TE_D1D; ++dx)
258 {
259 opX[dx] = 0.0;
260 for (int qx = 0; qx < Q1D; ++qx)
261 {
262 opX[dx] += Bt(dx,qx)*div[qy][qx];
263 }
264 }
265 for (int dy = 0; dy < TE_D1D; ++dy)
266 {
267 for (int dx = 0; dx < TE_D1D; ++dx)
268 {
269 y(dx,dy,e) += Bt(dy,qy)*opX[dx];
270 }
271 }
272 }
273 // We've now calculated y = p * div
274 });
275}
276
277// Shared memory PA Divergence Apply 2D kernel
278template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0,
279 const int T_NBZ = 0>
280static void SmemPADivergenceApply2D(const int NE,
281 const Array<real_t> &b_,
282 const Array<real_t> &g_,
283 const Array<real_t> &bt_,
284 const Vector &op_,
285 const Vector &x_,
286 Vector &y_,
287 const int tr_d1d = 0,
288 const int te_d1d = 0,
289 const int q1d = 0)
290{
291 // TODO
292 MFEM_ASSERT(false, "SHARED MEM NOT PROGRAMMED YET");
293}
294
295// PA Divergence Apply 2D kernel transpose
296template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
297static void PADivergenceApplyTranspose2D(const int NE,
298 const Array<real_t> &bt,
299 const Array<real_t> &gt,
300 const Array<real_t> &b,
301 const Vector &op_,
302 const Vector &x_,
303 Vector &y_,
304 const int tr_d1d = 0,
305 const int te_d1d = 0,
306 const int q1d = 0)
307{
308 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
309 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
310 const int Q1D = T_Q1D ? T_Q1D : q1d;
311 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
312 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
313 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
314 auto Bt = Reshape(bt.Read(), TR_D1D, Q1D);
315 auto Gt = Reshape(gt.Read(), TR_D1D, Q1D);
316 auto B = Reshape(b.Read(), Q1D, TE_D1D);
317 auto op = Reshape(op_.Read(), Q1D*Q1D, 2,2, NE);
318 auto x = Reshape(x_.Read(), TE_D1D, TE_D1D, NE);
319 auto y = Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, 2, NE);
320 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
321 {
322 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
323 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
324 const int Q1D = T_Q1D ? T_Q1D : q1d;
325 const int VDIM = 2;
326 // the following variables are evaluated at compile time
327 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
328 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
329
330 real_t quadTest[max_Q1D][max_Q1D];
331 real_t grad[max_Q1D][max_Q1D][VDIM];
332 for (int qy = 0; qy < Q1D; ++qy)
333 {
334 for (int qx = 0; qx < Q1D; ++qx)
335 {
336 quadTest[qy][qx] = 0.0;
337 }
338 }
339 for (int dy = 0; dy < TE_D1D; ++dy)
340 {
341 real_t quadTestX[max_Q1D];
342 for (int qx = 0; qx < Q1D; ++qx)
343 {
344 quadTestX[qx] = 0.0;
345 }
346 for (int dx = 0; dx < TE_D1D; ++dx)
347 {
348 const real_t s = x(dx,dy,e);
349 for (int qx = 0; qx < Q1D; ++qx)
350 {
351 quadTestX[qx] += s * B(qx,dx);
352 }
353 }
354 for (int qy = 0; qy < Q1D; ++qy)
355 {
356 const real_t wy = B(qy,dy);
357 for (int qx = 0; qx < Q1D; ++qx)
358 {
359 quadTest[qy][qx] += quadTestX[qx] * wy;
360 }
361 }
362 }
363 // We've now calculated x on the quads
364 for (int c = 0; c < VDIM; ++c)
365 {
366 for (int qy = 0; qy < Q1D; ++qy)
367 {
368 for (int qx = 0; qx < Q1D; ++qx)
369 {
370 const int q = qx + qy * Q1D;
371 grad[qy][qx][0] = quadTest[qy][qx]*op(q,0,c,e);
372 grad[qy][qx][1] = quadTest[qy][qx]*op(q,1,c,e);
373 }
374 }
375 // We've now calculated op_c^T * x
376 for (int qy = 0; qy < Q1D; ++qy)
377 {
378 real_t gradX[max_TR_D1D][VDIM];
379 for (int dx = 0; dx < TR_D1D; ++dx)
380 {
381 gradX[dx][0] = 0.0;
382 gradX[dx][1] = 0.0;
383 }
384 for (int qx = 0; qx < Q1D; ++qx)
385 {
386 const real_t gX = grad[qy][qx][0];
387 const real_t gY = grad[qy][qx][1];
388 for (int dx = 0; dx < TR_D1D; ++dx)
389 {
390 const real_t wx = Bt(dx,qx);
391 const real_t wDx = Gt(dx,qx);
392 gradX[dx][0] += gX * wDx;
393 gradX[dx][1] += gY * wx;
394 }
395 }
396 for (int dy = 0; dy < TR_D1D; ++dy)
397 {
398 const real_t wy = Bt(dy,qy);
399 const real_t wDy = Gt(dy,qy);
400 for (int dx = 0; dx < TR_D1D; ++dx)
401 {
402 y(dx,dy,c,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy));
403 }
404 }
405 }
406 }
407 // We've now calculated y = reshape(div u * op^T) * x
408 });
409}
410
411// PA Vector Divergence Apply 3D kernel
412template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
413static void PADivergenceApply3D(const int NE,
414 const Array<real_t> &b,
415 const Array<real_t> &g,
416 const Array<real_t> &bt,
417 const Vector &op_,
418 const Vector &x_,
419 Vector &y_,
420 int tr_d1d = 0,
421 int te_d1d = 0,
422 int q1d = 0)
423{
424 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
425 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
426 const int Q1D = T_Q1D ? T_Q1D : q1d;
427 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
428 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
429 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
430 auto B = Reshape(b.Read(), Q1D, TR_D1D);
431 auto G = Reshape(g.Read(), Q1D, TR_D1D);
432 auto Bt = Reshape(bt.Read(), TE_D1D, Q1D);
433 auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
434 auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
435 auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
436 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
437 {
438 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
439 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
440 const int Q1D = T_Q1D ? T_Q1D : q1d;
441 const int VDIM = 3;
442 // the following variables are evaluated at compile time
443 constexpr int max_TE_D1D = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
444 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
445
446 real_t grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
447 real_t div[max_Q1D][max_Q1D][max_Q1D];
448 for (int qz = 0; qz < Q1D; ++qz)
449 {
450 for (int qy = 0; qy < Q1D; ++qy)
451 {
452 for (int qx = 0; qx < Q1D; ++qx)
453 {
454 div[qz][qy][qx] = 0.0;
455 }
456 }
457 }
458
459 for (int c = 0; c < VDIM; ++c)
460 {
461 for (int qz = 0; qz < Q1D; ++qz)
462 {
463 for (int qy = 0; qy < Q1D; ++qy)
464 {
465 for (int qx = 0; qx < Q1D; ++qx)
466 {
467 grad[qz][qy][qx][0] = 0.0;
468 grad[qz][qy][qx][1] = 0.0;
469 grad[qz][qy][qx][2] = 0.0;
470 }
471 }
472 }
473 for (int dz = 0; dz < TR_D1D; ++dz)
474 {
475 real_t gradXY[max_Q1D][max_Q1D][VDIM];
476 for (int qy = 0; qy < Q1D; ++qy)
477 {
478 for (int qx = 0; qx < Q1D; ++qx)
479 {
480 gradXY[qy][qx][0] = 0.0;
481 gradXY[qy][qx][1] = 0.0;
482 gradXY[qy][qx][2] = 0.0;
483 }
484 }
485 for (int dy = 0; dy < TR_D1D; ++dy)
486 {
487 real_t gradX[max_Q1D][VDIM];
488 for (int qx = 0; qx < Q1D; ++qx)
489 {
490 gradX[qx][0] = 0.0;
491 gradX[qx][1] = 0.0;
492 gradX[qx][2] = 0.0;
493 }
494 for (int dx = 0; dx < TR_D1D; ++dx)
495 {
496 const real_t s = x(dx,dy,dz,c,e);
497 for (int qx = 0; qx < Q1D; ++qx)
498 {
499 gradX[qx][0] += s * G(qx,dx);
500 gradX[qx][1] += s * B(qx,dx);
501 gradX[qx][2] += s * B(qx,dx);
502 }
503 }
504 for (int qy = 0; qy < Q1D; ++qy)
505 {
506 const real_t wy = B(qy,dy);
507 const real_t wDy = G(qy,dy);
508 for (int qx = 0; qx < Q1D; ++qx)
509 {
510 gradXY[qy][qx][0] += gradX[qx][0] * wy;
511 gradXY[qy][qx][1] += gradX[qx][1] * wDy;
512 gradXY[qy][qx][2] += gradX[qx][2] * wy;
513 }
514 }
515 }
516 for (int qz = 0; qz < Q1D; ++qz)
517 {
518 const real_t wz = B(qz,dz);
519 const real_t wDz = G(qz,dz);
520 for (int qy = 0; qy < Q1D; ++qy)
521 {
522 for (int qx = 0; qx < Q1D; ++qx)
523 {
524 grad[qz][qy][qx][0] += gradXY[qy][qx][0] * wz;
525 grad[qz][qy][qx][1] += gradXY[qy][qx][1] * wz;
526 grad[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz;
527 }
528 }
529 }
530 }
531 // We've now calculated grad(u_c) = [Dxyz_1, xDyz_2, xyDz_3] in plane
532 for (int qz = 0; qz < Q1D; ++qz)
533 {
534 for (int qy = 0; qy < Q1D; ++qy)
535 {
536 for (int qx = 0; qx < Q1D; ++qx)
537 {
538 const int q = qx + (qy + qz * Q1D) * Q1D;
539 const real_t gradX = grad[qz][qy][qx][0];
540 const real_t gradY = grad[qz][qy][qx][1];
541 const real_t gradZ = grad[qz][qy][qx][2];
542
543 div[qz][qy][qx] += gradX*op(q,0,c,e) + gradY*op(q,1,c,e) + gradZ*op(q,2,c,e);
544
545 }
546 }
547 }
548 }
549 // We've now calculated div = reshape(div phi * op) * u
550 for (int qz = 0; qz < Q1D; ++qz)
551 {
552 real_t opXY[max_TE_D1D][max_TE_D1D];
553 for (int dy = 0; dy < TE_D1D; ++dy)
554 {
555 for (int dx = 0; dx < TE_D1D; ++dx)
556 {
557 opXY[dy][dx] = 0.0;
558 }
559 }
560 for (int qy = 0; qy < Q1D; ++qy)
561 {
562 real_t opX[max_TE_D1D];
563 for (int dx = 0; dx < TE_D1D; ++dx)
564 {
565 opX[dx] = 0.0;
566 for (int qx = 0; qx < Q1D; ++qx)
567 {
568 opX[dx] += Bt(dx,qx)*div[qz][qy][qx];
569 }
570 }
571 for (int dy = 0; dy < TE_D1D; ++dy)
572 {
573 for (int dx = 0; dx < TE_D1D; ++dx)
574 {
575 opXY[dy][dx] += Bt(dy,qy)*opX[dx];
576 }
577 }
578 }
579 for (int dz = 0; dz < TE_D1D; ++dz)
580 {
581 for (int dy = 0; dy < TE_D1D; ++dy)
582 {
583 for (int dx = 0; dx < TE_D1D; ++dx)
584 {
585 y(dx,dy,dz,e) += Bt(dz,qz)*opXY[dy][dx];
586 }
587 }
588 }
589 }
590 // We've now calculated y = p * div
591 });
592}
593
594// PA Vector Divergence Apply 3D kernel
595template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
596static void PADivergenceApplyTranspose3D(const int NE,
597 const Array<real_t> &bt,
598 const Array<real_t> &gt,
599 const Array<real_t> &b,
600 const Vector &op_,
601 const Vector &x_,
602 Vector &y_,
603 int tr_d1d = 0,
604 int te_d1d = 0,
605 int q1d = 0)
606{
607 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
608 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
609 const int Q1D = T_Q1D ? T_Q1D : q1d;
610 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
611 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
612 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
613 auto Bt = Reshape(bt.Read(), TR_D1D, Q1D);
614 auto Gt = Reshape(gt.Read(), TR_D1D, Q1D);
615 auto B = Reshape(b.Read(), Q1D, TE_D1D);
616 auto op = Reshape(op_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
617 auto x = Reshape(x_.Read(), TE_D1D, TE_D1D, TE_D1D, NE);
618 auto y = Reshape(y_.ReadWrite(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
619 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
620 {
621 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
622 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
623 const int Q1D = T_Q1D ? T_Q1D : q1d;
624 const int VDIM = 3;
625 // the following variables are evaluated at compile time
626 constexpr int max_TR_D1D = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
627 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
628
629 real_t quadTest[max_Q1D][max_Q1D][max_Q1D];
630 real_t grad[max_Q1D][max_Q1D][max_Q1D][VDIM];
631 for (int qz = 0; qz < Q1D; ++qz)
632 {
633 for (int qy = 0; qy < Q1D; ++qy)
634 {
635 for (int qx = 0; qx < Q1D; ++qx)
636 {
637 quadTest[qz][qy][qx] = 0.0;
638 }
639 }
640 }
641 for (int dz = 0; dz < TE_D1D; ++dz)
642 {
643 real_t quadTestXY[max_Q1D][max_Q1D];
644 for (int qy = 0; qy < Q1D; ++qy)
645 {
646 for (int qx = 0; qx < Q1D; ++qx)
647 {
648 quadTestXY[qy][qx] = 0.0;
649 }
650 }
651 for (int dy = 0; dy < TE_D1D; ++dy)
652 {
653 real_t quadTestX[max_Q1D];
654 for (int qx = 0; qx < Q1D; ++qx)
655 {
656 quadTestX[qx] = 0.0;
657 }
658 for (int dx = 0; dx < TE_D1D; ++dx)
659 {
660 const real_t s = x(dx,dy,dz,e);
661 for (int qx = 0; qx < Q1D; ++qx)
662 {
663 quadTestX[qx] += s * B(qx,dx);
664 }
665 }
666 for (int qy = 0; qy < Q1D; ++qy)
667 {
668 const real_t wy = B(qy,dy);
669 for (int qx = 0; qx < Q1D; ++qx)
670 {
671 quadTestXY[qy][qx] += quadTestX[qx] * wy;
672 }
673 }
674 }
675 for (int qz = 0; qz < Q1D; ++qz)
676 {
677 const real_t wz = B(qz,dz);
678 for (int qy = 0; qy < Q1D; ++qy)
679 {
680 for (int qx = 0; qx < Q1D; ++qx)
681 {
682 quadTest[qz][qy][qx] += quadTestXY[qy][qx] * wz;
683 }
684 }
685 }
686 }
687 // We've now calculated x on the quads
688 for (int c = 0; c < VDIM; ++c)
689 {
690 for (int qz = 0; qz < Q1D; ++qz)
691 {
692 for (int qy = 0; qy < Q1D; ++qy)
693 {
694 for (int qx = 0; qx < Q1D; ++qx)
695 {
696 const int q = qx + (qy + qz * Q1D) * Q1D;
697 grad[qz][qy][qx][0] = quadTest[qz][qy][qx]*op(q,0,c,e);
698 grad[qz][qy][qx][1] = quadTest[qz][qy][qx]*op(q,1,c,e);
699 grad[qz][qy][qx][2] = quadTest[qz][qy][qx]*op(q,2,c,e);
700 }
701 }
702 }
703 // We've now calculated op_c^T * x
704 for (int qz = 0; qz < Q1D; ++qz)
705 {
706 real_t gradXY[max_TR_D1D][max_TR_D1D][VDIM];
707 for (int dy = 0; dy < TR_D1D; ++dy)
708 {
709 for (int dx = 0; dx < TR_D1D; ++dx)
710 {
711 gradXY[dy][dx][0] = 0.0;
712 gradXY[dy][dx][1] = 0.0;
713 gradXY[dy][dx][2] = 0.0;
714 }
715 }
716 for (int qy = 0; qy < Q1D; ++qy)
717 {
718 real_t gradX[max_TR_D1D][VDIM];
719 for (int dx = 0; dx < TR_D1D; ++dx)
720 {
721 gradX[dx][0] = 0.0;
722 gradX[dx][1] = 0.0;
723 gradX[dx][2] = 0.0;
724 }
725 for (int qx = 0; qx < Q1D; ++qx)
726 {
727 const real_t gX = grad[qz][qy][qx][0];
728 const real_t gY = grad[qz][qy][qx][1];
729 const real_t gZ = grad[qz][qy][qx][2];
730 for (int dx = 0; dx < TR_D1D; ++dx)
731 {
732 const real_t wx = Bt(dx,qx);
733 const real_t wDx = Gt(dx,qx);
734 gradX[dx][0] += gX * wDx;
735 gradX[dx][1] += gY * wx;
736 gradX[dx][2] += gZ * wx;
737 }
738 }
739 for (int dy = 0; dy < TR_D1D; ++dy)
740 {
741 const real_t wy = Bt(dy,qy);
742 const real_t wDy = Gt(dy,qy);
743 for (int dx = 0; dx < TR_D1D; ++dx)
744 {
745 gradXY[dy][dx][0] += gradX[dx][0] * wy;
746 gradXY[dy][dx][1] += gradX[dx][1] * wDy;
747 gradXY[dy][dx][2] += gradX[dx][2] * wy;
748 }
749 }
750 }
751 for (int dz = 0; dz < TR_D1D; ++dz)
752 {
753 const real_t wz = Bt(dz,qz);
754 const real_t wDz = Gt(dz,qz);
755 for (int dy = 0; dy < TR_D1D; ++dy)
756 {
757 for (int dx = 0; dx < TR_D1D; ++dx)
758 {
759 y(dx,dy,dz,c,e) +=
760 ((gradXY[dy][dx][0] * wz) +
761 (gradXY[dy][dx][1] * wz) +
762 (gradXY[dy][dx][2] * wDz));
763 }
764 }
765 }
766 }
767 }
768 // We've now calculated y = reshape(div u * op^T) * x
769 });
770}
771
772// Shared memory PA Vector Divergence Apply 3D kernel
773template<const int T_TR_D1D = 0, const int T_TE_D1D = 0, const int T_Q1D = 0>
774static void SmemPADivergenceApply3D(const int NE,
775 const Array<real_t> &b_,
776 const Array<real_t> &g_,
777 const Array<real_t> &bt_,
778 const Vector &q_,
779 const Vector &x_,
780 Vector &y_,
781 const int tr_d1d = 0,
782 const int te_d1d = 0,
783 const int q1d = 0)
784{
785 const int TR_D1D = T_TR_D1D ? T_TR_D1D : tr_d1d;
786 const int TE_D1D = T_TE_D1D ? T_TE_D1D : te_d1d;
787 const int Q1D = T_Q1D ? T_Q1D : q1d;
788
789 MFEM_VERIFY(TR_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
790 MFEM_VERIFY(TE_D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
791 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
792
793 auto b = Reshape(b_.Read(), Q1D, TR_D1D);
794 auto g = Reshape(g_.Read(), Q1D, TR_D1D);
795 auto bt = Reshape(bt_.Read(), TE_D1D, Q1D);
796 auto Q = Reshape(q_.Read(), Q1D*Q1D*Q1D, 3,3, NE);
797 auto x = Reshape(x_.Read(), TR_D1D, TR_D1D, TR_D1D, 3, NE);
798 auto y = Reshape(y_.ReadWrite(), TE_D1D, TE_D1D, TE_D1D, NE);
799
800 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
801 {
802 constexpr int VDIM = 3;
803 const int tidz = MFEM_THREAD_ID(z);
804 const int D1DR = T_TR_D1D ? T_TR_D1D : tr_d1d;
805 const int D1DE = T_TE_D1D ? T_TE_D1D : te_d1d;
806 const int Q1D = T_Q1D ? T_Q1D : q1d;
807 constexpr int MQ1 = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
808 constexpr int MD1R = T_TR_D1D ? T_TR_D1D : DofQuadLimits::MAX_D1D;
809 constexpr int MD1E = T_TE_D1D ? T_TE_D1D : DofQuadLimits::MAX_D1D;
810 constexpr int MD1 = MD1E > MD1R ? MD1E : MD1R;
811 constexpr int MDQ = MQ1 > MD1 ? MQ1 : MD1;
812 MFEM_SHARED real_t sBG[2][MQ1*MD1];
813 real_t (*B)[MD1] = (real_t (*)[MD1]) (sBG+0);
814 real_t (*G)[MD1] = (real_t (*)[MD1]) (sBG+1);
815 real_t (*Bt)[MQ1] = (real_t (*)[MQ1]) (sBG+0);
816 MFEM_SHARED real_t sm0[3][MDQ*MDQ*MDQ];
817 MFEM_SHARED real_t sm1[3][MDQ*MDQ*MDQ];
818 real_t (*X)[MD1][MD1] = (real_t (*)[MD1][MD1]) (sm0+2);
819 real_t (*DDQ0)[MD1][MQ1] = (real_t (*)[MD1][MQ1]) (sm0+0);
820 real_t (*DDQ1)[MD1][MQ1] = (real_t (*)[MD1][MQ1]) (sm0+1);
821 real_t (*DQQ0)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm1+0);
822 real_t (*DQQ1)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm1+1);
823 real_t (*DQQ2)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm1+2);
824 real_t (*QQQ0)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm0+0);
825 real_t (*QQQ1)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm0+1);
826 real_t (*QQQ2)[MQ1][MQ1] = (real_t (*)[MQ1][MQ1]) (sm0+2);
827 real_t (*QQD0)[MQ1][MD1] = (real_t (*)[MQ1][MD1]) (sm1+0);
828 real_t (*QDD0)[MD1][MD1] = (real_t (*)[MD1][MD1]) (sm0+0);
829 MFEM_SHARED real_t div[MQ1][MQ1][MQ1];
830
831 if (tidz == 0)
832 {
833 MFEM_FOREACH_THREAD(d,y,D1DR)
834 {
835 MFEM_FOREACH_THREAD(q,x,Q1D)
836 {
837 B[q][d] = b(q,d);
838 G[q][d] = g(q,d);
839 }
840 }
841 }
842 MFEM_SYNC_THREAD;
843 MFEM_FOREACH_THREAD(qz,z,Q1D)
844 {
845 MFEM_FOREACH_THREAD(qy,y,Q1D)
846 {
847 MFEM_FOREACH_THREAD(qx,x,Q1D)
848 {
849 div[qz][qy][qx] = 0.0;
850 }
851 }
852 }
853 MFEM_SYNC_THREAD;
854
855 for (int c = 0; c < VDIM; ++c)
856 {
857 MFEM_FOREACH_THREAD(qz,z,Q1D)
858 {
859 MFEM_FOREACH_THREAD(qy,y,Q1D)
860 {
861 MFEM_FOREACH_THREAD(qx,x,Q1D)
862 {
863 QQQ0[qz][qy][qx] = 0.0;
864 QQQ1[qz][qy][qx] = 0.0;
865 QQQ2[qz][qy][qx] = 0.0;
866 }
867 }
868 }
869 MFEM_SYNC_THREAD;
870 MFEM_FOREACH_THREAD(dz,z,D1DR)
871 {
872 MFEM_FOREACH_THREAD(dy,y,D1DR)
873 {
874 MFEM_FOREACH_THREAD(dx,x,D1DR)
875 {
876 X[dz][dy][dx] = x(dx,dy,dz,c,e);
877 }
878 }
879 }
880 MFEM_SYNC_THREAD;
881 MFEM_FOREACH_THREAD(dz,z,D1DR)
882 {
883 MFEM_FOREACH_THREAD(dy,y,D1DR)
884 {
885 MFEM_FOREACH_THREAD(qx,x,Q1D)
886 {
887 real_t u = 0.0;
888 real_t v = 0.0;
889 for (int dx = 0; dx < D1DR; ++dx)
890 {
891 const real_t coord = X[dz][dy][dx];
892 u += coord * B[qx][dx];
893 v += coord * G[qx][dx];
894 }
895 DDQ0[dz][dy][qx] = u;
896 DDQ1[dz][dy][qx] = v;
897 }
898 }
899 }
900 MFEM_SYNC_THREAD;
901 MFEM_FOREACH_THREAD(dz,z,D1DR)
902 {
903 MFEM_FOREACH_THREAD(qy,y,Q1D)
904 {
905 MFEM_FOREACH_THREAD(qx,x,Q1D)
906 {
907 real_t u = 0.0;
908 real_t v = 0.0;
909 real_t w = 0.0;
910 for (int dy = 0; dy < D1DR; ++dy)
911 {
912 u += DDQ1[dz][dy][qx] * B[qy][dy];
913 v += DDQ0[dz][dy][qx] * G[qy][dy];
914 w += DDQ0[dz][dy][qx] * B[qy][dy];
915 }
916 DQQ0[dz][qy][qx] = u;
917 DQQ1[dz][qy][qx] = v;
918 DQQ2[dz][qy][qx] = w;
919 }
920 }
921 }
922 MFEM_SYNC_THREAD;
923 MFEM_FOREACH_THREAD(qz,z,Q1D)
924 {
925 MFEM_FOREACH_THREAD(qy,y,Q1D)
926 {
927 MFEM_FOREACH_THREAD(qx,x,Q1D)
928 {
929 real_t u = 0.0;
930 real_t v = 0.0;
931 real_t w = 0.0;
932 for (int dz = 0; dz < D1DR; ++dz)
933 {
934 u += DQQ0[dz][qy][qx] * B[qz][dz];
935 v += DQQ1[dz][qy][qx] * B[qz][dz];
936 w += DQQ2[dz][qy][qx] * G[qz][dz];
937 }
938 QQQ0[qz][qy][qx] = u;
939 QQQ1[qz][qy][qx] = v;
940 QQQ2[qz][qy][qx] = w;
941 }
942 }
943 }
944 MFEM_SYNC_THREAD;
945 MFEM_FOREACH_THREAD(qz,z,Q1D)
946 {
947 MFEM_FOREACH_THREAD(qy,y,Q1D)
948 {
949 MFEM_FOREACH_THREAD(qx,x,Q1D)
950 {
951 const int q = qx + (qy + qz * Q1D) * Q1D;
952 const real_t gX = QQQ0[qz][qy][qx];
953 const real_t gY = QQQ1[qz][qy][qx];
954 const real_t gZ = QQQ2[qz][qy][qx];
955 div[qz][qy][qx] += gX*Q(q,0,c,e) + gY*Q(q,1,c,e) + gZ*Q(q,2,c,e);
956 }
957 }
958 }
959 MFEM_SYNC_THREAD;
960 }
961
962 if (tidz == 0)
963 {
964 MFEM_FOREACH_THREAD(d,y,D1DE)
965 {
966 MFEM_FOREACH_THREAD(q,x,Q1D)
967 {
968 Bt[d][q] = bt(d,q);
969 }
970 }
971 }
972 MFEM_SYNC_THREAD;
973
974 MFEM_FOREACH_THREAD(qz,z,Q1D)
975 {
976 MFEM_FOREACH_THREAD(qy,y,Q1D)
977 {
978 MFEM_FOREACH_THREAD(dx,x,D1DE)
979 {
980 real_t u = 0.0;
981 for (int qx = 0; qx < Q1D; ++qx)
982 {
983 u += div[qz][qy][qx] * Bt[dx][qx];
984 }
985 QQD0[qz][qy][dx] = u;
986 }
987 }
988 }
989 MFEM_SYNC_THREAD;
990 MFEM_FOREACH_THREAD(qz,z,Q1D)
991 {
992 MFEM_FOREACH_THREAD(dy,y,D1DE)
993 {
994 MFEM_FOREACH_THREAD(dx,x,D1DE)
995 {
996 real_t u = 0.0;
997 for (int qy = 0; qy < Q1D; ++qy)
998 {
999 u += QQD0[qz][qy][dx] * Bt[dy][qy];
1000 }
1001 QDD0[qz][dy][dx] = u;
1002 }
1003 }
1004 }
1005 MFEM_SYNC_THREAD;
1006 MFEM_FOREACH_THREAD(dz,z,D1DE)
1007 {
1008 MFEM_FOREACH_THREAD(dy,y,D1DE)
1009 {
1010 MFEM_FOREACH_THREAD(dx,x,D1DE)
1011 {
1012 real_t u = 0.0;
1013 for (int qz = 0; qz < Q1D; ++qz)
1014 {
1015 u += QDD0[qz][dy][dx] * Bt[dz][qz];
1016 }
1017 y(dx,dy,dz,e) += u;
1018 }
1019 }
1020 }
1021 });
1022}
1023
1024static void PADivergenceApply(const int dim,
1025 const int TR_D1D,
1026 const int TE_D1D,
1027 const int Q1D,
1028 const int NE,
1029 const Array<real_t> &B,
1030 const Array<real_t> &G,
1031 const Array<real_t> &Bt,
1032 const Vector &op,
1033 const Vector &x,
1034 Vector &y,
1035 bool transpose=false)
1036{
1037 if (dim == 2)
1038 {
1039 if (transpose)
1040 {
1041 return PADivergenceApplyTranspose2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1042 }
1043 else
1044 {
1045 return PADivergenceApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1046 }
1047 }
1048 if (dim == 3)
1049 {
1050 if (transpose)
1051 {
1052 return PADivergenceApplyTranspose3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1053 }
1054 else
1055 {
1056 return PADivergenceApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D);
1057 }
1058 }
1059 MFEM_ABORT("Unknown kernel.");
1060}
1061
1062// PA Divergence Apply kernel
1064{
1065 PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1066 trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y,
1067 false);
1068}
1069
1070// PA Divergence Apply kernel
1072 Vector &y) const
1073{
1074 PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne,
1075 trial_maps->Bt, trial_maps->Gt, test_maps->B, pa_data, x, y,
1076 true);
1077}
1078
1079} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
A coefficient that is constant across space and time.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition device.hpp:278
Array< real_t > G
Gradients/divergences/curls of basis functions evaluated at quadrature points.
Definition fe_base.hpp:214
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition fe_base.hpp:165
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:178
Array< real_t > Gt
Transpose of G.
Definition fe_base.hpp:221
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:182
Array< real_t > Bt
Transpose of B.
Definition fe_base.hpp:199
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
Ordering::Type GetOrdering() const
Return the ordering method.
Definition fespace.hpp:876
int GetNE() const
Returns number of elements in the mesh.
Definition fespace.hpp:891
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:679
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Definition fespace.cpp:3871
Abstract class for all finite elements.
Definition fe_base.hpp:244
virtual const DofToQuad & GetDofToQuad(const IntegrationRule &ir, DofToQuad::Mode mode) const
Return a DofToQuad structure corresponding to the given IntegrationRule using the given DofToQuad::Mo...
Definition fe_base.cpp:365
int GetDim() const
Returns the reference space dimension for the finite element.
Definition fe_base.hpp:321
Vector J
Jacobians of the element transformations at all quadrature points.
Definition mesh.hpp:2976
Class for an integration rule - an Array of IntegrationPoint.
Definition intrules.hpp:100
int GetNPoints() const
Returns the number of the points in the integration rule.
Definition intrules.hpp:256
const Array< real_t > & GetWeights() const
Return the quadrature weights in a contiguous array.
Definition intrules.cpp:86
const IntegrationRule * IntRule
Mesh data type.
Definition mesh.hpp:64
int Dimension() const
Dimension of the reference space used within the elements.
Definition mesh.hpp:1216
ElementTransformation * GetTypicalElementTransformation()
If the local mesh is not empty return GetElementTransformation(0); otherwise, return the identity tra...
Definition mesh.cpp:390
const GeometricFactors * GetGeometricFactors(const IntegrationRule &ir, const int flags, MemoryType d_mt=MemoryType::DEFAULT)
Return the mesh geometric factors corresponding to the given integration rule.
Definition mesh.cpp:880
void AssemblePA(const FiniteElementSpace &trial_fes, const FiniteElementSpace &test_fes) override
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
static const IntegrationRule & GetRule(const FiniteElement &trial_fe, const FiniteElement &test_fe, const ElementTransformation &Trans)
void AddMultPA(const Vector &x, Vector &y) const override
Method for partially assembled action.
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
int dim
Definition ex24.cpp:53
void trans(const Vector &u, Vector &x)
Definition ex27.cpp:412
real_t b
Definition lissajous.cpp:42
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
void forall(int N, lambda &&body)
Definition forall.hpp:753
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128