MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_dgtrace_pa.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13#include "../bilininteg.hpp"
14#include "../gridfunc.hpp"
15#include "../qfunction.hpp"
16#include "../restriction.hpp"
17
18namespace mfem
19{
20
21// PA DG Trace Integrator
22static void PADGTraceSetup2D(const int Q1D,
23 const int NF,
24 const Array<real_t> &w,
25 const Vector &det,
26 const Vector &nor,
27 const Vector &rho,
28 const Vector &vel,
29 const real_t alpha,
30 const real_t beta,
31 Vector &op)
32{
33 const int VDIM = 2;
34
35 auto d = Reshape(det.Read(), Q1D, NF);
36 auto n = Reshape(nor.Read(), Q1D, VDIM, NF);
37 const bool const_r = rho.Size() == 1;
38 auto R =
39 const_r ? Reshape(rho.Read(), 1,1) : Reshape(rho.Read(), Q1D,NF);
40 const bool const_v = vel.Size() == 2;
41 auto V =
42 const_v ? Reshape(vel.Read(), 2,1,1) : Reshape(vel.Read(), 2,Q1D,NF);
43 auto W = w.Read();
44 auto qd = Reshape(op.Write(), Q1D, 2, 2, NF);
45
46 mfem::forall(Q1D*NF, [=] MFEM_HOST_DEVICE (int tid)
47 {
48 const int f = tid / Q1D;
49 const int q = tid % Q1D;
50 {
51 const real_t r = const_r ? R(0,0) : R(q,f);
52 const real_t v0 = const_v ? V(0,0,0) : V(0,q,f);
53 const real_t v1 = const_v ? V(1,0,0) : V(1,q,f);
54 const real_t dot = n(q,0,f) * v0 + n(q,1,f) * v1;
55 const real_t abs = dot > 0_r ? dot : -dot;
56 const real_t w = W[q]*r*d(q,f);
57 qd(q,0,0,f) = w*( alpha/2 * dot + beta * abs );
58 qd(q,1,0,f) = w*( alpha/2 * dot - beta * abs );
59 qd(q,0,1,f) = w*(-alpha/2 * dot - beta * abs );
60 qd(q,1,1,f) = w*(-alpha/2 * dot + beta * abs );
61 }
62 });
63}
64
65static void PADGTraceSetup3D(const int Q1D,
66 const int NF,
67 const Array<real_t> &w,
68 const Vector &det,
69 const Vector &nor,
70 const Vector &rho,
71 const Vector &vel,
72 const real_t alpha,
73 const real_t beta,
74 Vector &op)
75{
76 const int VDIM = 3;
77
78 auto d = Reshape(det.Read(), Q1D, Q1D, NF);
79 auto n = Reshape(nor.Read(), Q1D, Q1D, VDIM, NF);
80 const bool const_r = rho.Size() == 1;
81 auto R =
82 const_r ? Reshape(rho.Read(), 1,1,1) : Reshape(rho.Read(), Q1D,Q1D,NF);
83 const bool const_v = vel.Size() == 3;
84 auto V =
85 const_v ? Reshape(vel.Read(), 3,1,1,1) : Reshape(vel.Read(), 3,Q1D,Q1D,NF);
86 auto W = w.Read();
87 auto qd = Reshape(op.Write(), Q1D, Q1D, 2, 2, NF);
88
89 mfem::forall(Q1D*Q1D*NF, [=] MFEM_HOST_DEVICE (int tid)
90 {
91 int f = tid / (Q1D * Q1D);
92 int q2 = (tid / Q1D) % Q1D;
93 int q1 = tid % Q1D;
94 {
95 {
96 const real_t r = const_r ? R(0,0,0) : R(q1,q2,f);
97 const real_t v0 = const_v ? V(0,0,0,0) : V(0,q1,q2,f);
98 const real_t v1 = const_v ? V(1,0,0,0) : V(1,q1,q2,f);
99 const real_t v2 = const_v ? V(2,0,0,0) : V(2,q1,q2,f);
100 const real_t dot = n(q1,q2,0,f) * v0 + n(q1,q2,1,f) * v1 +
101 n(q1,q2,2,f) * v2;
102 const real_t abs = dot > 0.0 ? dot : -dot;
103 const real_t w = W[q1+q2*Q1D]*r*d(q1,q2,f);
104 qd(q1,q2,0,0,f) = w*( alpha/2 * dot + beta * abs );
105 qd(q1,q2,1,0,f) = w*( alpha/2 * dot - beta * abs );
106 qd(q1,q2,0,1,f) = w*(-alpha/2 * dot - beta * abs );
107 qd(q1,q2,1,1,f) = w*(-alpha/2 * dot + beta * abs );
108 }
109 }
110 });
111}
112
113static void PADGTraceSetup(const int dim,
114 const int D1D,
115 const int Q1D,
116 const int NF,
117 const Array<real_t> &W,
118 const Vector &det,
119 const Vector &nor,
120 const Vector &rho,
121 const Vector &u,
122 const real_t alpha,
123 const real_t beta,
124 Vector &op)
125{
126 if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADGTraceSetup"); }
127 if (dim == 2)
128 {
129 PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
130 }
131 if (dim == 3)
132 {
133 PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op);
134 }
135}
136
137void DGTraceIntegrator::SetupPA(const FiniteElementSpace &fes, FaceType type)
138{
139 const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ?
141
142 nf = fes.GetNFbyType(type);
143 if (nf==0) { return; }
144 // Assumes tensor-product elements
145 Mesh *mesh = fes.GetMesh();
146 const FiniteElement &el = *fes.GetTypicalTraceElement();
147 const IntegrationRule *ir = IntRule?
148 IntRule:
149 &GetRule(el.GetGeomType(), el.GetOrder(),
150 *mesh->GetTypicalElementTransformation());
151 const int symmDims = 4;
152 nq = ir->GetNPoints();
153 dim = mesh->Dimension();
154 geom = mesh->GetFaceGeometricFactors(
155 *ir,
158 maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR);
159 dofs1D = maps->ndof;
160 quad1D = maps->nqpt;
161 pa_data.SetSize(symmDims * nq * nf, Device::GetMemoryType());
162
163 FaceQuadratureSpace qs(*mesh, *ir, type);
164 CoefficientVector vel(*u, qs, CoefficientStorage::COMPRESSED);
165
166 CoefficientVector r(qs, CoefficientStorage::COMPRESSED);
167 if (rho == nullptr)
168 {
169 r.SetConstant(1.0);
170 }
171 else if (ConstantCoefficient *const_rho = dynamic_cast<ConstantCoefficient*>
172 (rho))
173 {
174 r.SetConstant(const_rho->constant);
175 }
176 else if (QuadratureFunctionCoefficient* qf_rho =
177 dynamic_cast<QuadratureFunctionCoefficient*>(rho))
178 {
179 r.MakeRef(qf_rho->GetQuadFunction());
180 }
181 else
182 {
183 r.SetSize(nq * nf);
184 auto C_vel = Reshape(vel.HostRead(), dim, nq, nf);
185 auto n = Reshape(geom->normal.HostRead(), nq, dim, nf);
186 auto C = Reshape(r.HostWrite(), nq, nf);
187 int f_ind = 0;
188 for (int f = 0; f < mesh->GetNumFacesWithGhost(); ++f)
189 {
190 Mesh::FaceInformation face = mesh->GetFaceInformation(f);
191 if (face.IsNonconformingCoarse() || !face.IsOfFaceType(type))
192 {
193 // We skip nonconforming coarse faces as they are treated
194 // by the corresponding nonconforming fine faces.
195 continue;
196 }
197 FaceElementTransformations &T =
198 *fes.GetMesh()->GetFaceElementTransformations(f);
199 for (int q = 0; q < nq; ++q)
200 {
201 // Convert to lexicographic ordering
202 int iq = ToLexOrdering(dim, face.element[0].local_face_id,
203 quad1D, q);
204
205 T.SetAllIntPoints(&ir->IntPoint(q));
206 const IntegrationPoint &eip1 = T.GetElement1IntPoint();
207 const IntegrationPoint &eip2 = T.GetElement2IntPoint();
208 real_t rq;
209
210 if (face.IsBoundary())
211 {
212 rq = rho->Eval(*T.Elem1, eip1);
213 }
214 else
215 {
216 real_t udotn = 0.0;
217 for (int d=0; d<dim; ++d)
218 {
219 udotn += C_vel(d,iq,f_ind)*n(iq,d,f_ind);
220 }
221 if (udotn >= 0.0) { rq = rho->Eval(*T.Elem2, eip2); }
222 else { rq = rho->Eval(*T.Elem1, eip1); }
223 }
224 C(iq,f_ind) = rq;
225 }
226 f_ind++;
227 }
228 MFEM_VERIFY(f_ind==nf, "Incorrect number of faces.");
229 }
230 PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(),
231 geom->detJ, geom->normal, r, vel,
232 alpha, beta, pa_data);
233}
234
239
244
245// PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein
246template<int T_D1D = 0, int T_Q1D = 0> static
247void PADGTraceApply2D(const int NF,
248 const Array<real_t> &b,
249 const Array<real_t> &bt,
250 const Vector &op_,
251 const Vector &x_,
252 Vector &y_,
253 const int d1d = 0,
254 const int q1d = 0)
255{
256 const int VDIM = 1;
257 const int D1D = T_D1D ? T_D1D : d1d;
258 const int Q1D = T_Q1D ? T_Q1D : q1d;
259 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
260 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
261 auto B = Reshape(b.Read(), Q1D, D1D);
262 auto Bt = Reshape(bt.Read(), D1D, Q1D);
263 auto op = Reshape(op_.Read(), Q1D, 2, 2, NF);
264 auto x = Reshape(x_.Read(), D1D, VDIM, 2, NF);
265 auto y = Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
266
267 mfem::forall(NF, [=] MFEM_HOST_DEVICE (int f)
268 {
269 const int VDIM = 1;
270 const int D1D = T_D1D ? T_D1D : d1d;
271 const int Q1D = T_Q1D ? T_Q1D : q1d;
272 // the following variables are evaluated at compile time
273 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
274 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
275 real_t u0[max_D1D][VDIM];
276 real_t u1[max_D1D][VDIM];
277 for (int d = 0; d < D1D; d++)
278 {
279 for (int c = 0; c < VDIM; c++)
280 {
281 u0[d][c] = x(d,c,0,f);
282 u1[d][c] = x(d,c,1,f);
283 }
284 }
285 real_t Bu0[max_Q1D][VDIM];
286 real_t Bu1[max_Q1D][VDIM];
287 for (int q = 0; q < Q1D; ++q)
288 {
289 for (int c = 0; c < VDIM; c++)
290 {
291 Bu0[q][c] = 0.0;
292 Bu1[q][c] = 0.0;
293 }
294 for (int d = 0; d < D1D; ++d)
295 {
296 const real_t b = B(q,d);
297 for (int c = 0; c < VDIM; c++)
298 {
299 Bu0[q][c] += b*u0[d][c];
300 Bu1[q][c] += b*u1[d][c];
301 }
302 }
303 }
304 real_t DBu[max_Q1D][VDIM];
305 for (int q = 0; q < Q1D; ++q)
306 {
307 for (int c = 0; c < VDIM; c++)
308 {
309 DBu[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,1,0,f)*Bu1[q][c];
310 }
311 }
312 real_t BDBu[max_D1D][VDIM];
313 for (int d = 0; d < D1D; ++d)
314 {
315 for (int c = 0; c < VDIM; c++)
316 {
317 BDBu[d][c] = 0.0;
318 }
319 for (int q = 0; q < Q1D; ++q)
320 {
321 const real_t b = Bt(d,q);
322 for (int c = 0; c < VDIM; c++)
323 {
324 BDBu[d][c] += b*DBu[q][c];
325 }
326 }
327 for (int c = 0; c < VDIM; c++)
328 {
329 y(d,c,0,f) += BDBu[d][c];
330 y(d,c,1,f) += -BDBu[d][c];
331 }
332 }
333 });
334}
335
336// PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein
337template<int T_D1D = 0, int T_Q1D = 0> static
338void PADGTraceApply3D(const int NF,
339 const Array<real_t> &b,
340 const Array<real_t> &bt,
341 const Vector &op_,
342 const Vector &x_,
343 Vector &y_,
344 const int d1d = 0,
345 const int q1d = 0)
346{
347 const int VDIM = 1;
348 const int D1D = T_D1D ? T_D1D : d1d;
349 const int Q1D = T_Q1D ? T_Q1D : q1d;
350 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
351 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
352 auto B = Reshape(b.Read(), Q1D, D1D);
353 auto Bt = Reshape(bt.Read(), D1D, Q1D);
354 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
355 auto x = Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
356 auto y = Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
357
358 mfem::forall(NF, [=] MFEM_HOST_DEVICE (int f)
359 {
360 const int VDIM = 1;
361 const int D1D = T_D1D ? T_D1D : d1d;
362 const int Q1D = T_Q1D ? T_Q1D : q1d;
363 // the following variables are evaluated at compile time
364 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
365 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
366 real_t u0[max_D1D][max_D1D][VDIM];
367 real_t u1[max_D1D][max_D1D][VDIM];
368 for (int d1 = 0; d1 < D1D; d1++)
369 {
370 for (int d2 = 0; d2 < D1D; d2++)
371 {
372 for (int c = 0; c < VDIM; c++)
373 {
374 u0[d1][d2][c] = x(d1,d2,c,0,f);
375 u1[d1][d2][c] = x(d1,d2,c,1,f);
376 }
377 }
378 }
379 real_t Bu0[max_Q1D][max_D1D][VDIM];
380 real_t Bu1[max_Q1D][max_D1D][VDIM];
381 for (int q = 0; q < Q1D; ++q)
382 {
383 for (int d2 = 0; d2 < D1D; d2++)
384 {
385 for (int c = 0; c < VDIM; c++)
386 {
387 Bu0[q][d2][c] = 0.0;
388 Bu1[q][d2][c] = 0.0;
389 }
390 for (int d1 = 0; d1 < D1D; ++d1)
391 {
392 const real_t b = B(q,d1);
393 for (int c = 0; c < VDIM; c++)
394 {
395 Bu0[q][d2][c] += b*u0[d1][d2][c];
396 Bu1[q][d2][c] += b*u1[d1][d2][c];
397 }
398 }
399 }
400 }
401 real_t BBu0[max_Q1D][max_Q1D][VDIM];
402 real_t BBu1[max_Q1D][max_Q1D][VDIM];
403 for (int q1 = 0; q1 < Q1D; ++q1)
404 {
405 for (int q2 = 0; q2 < Q1D; q2++)
406 {
407 for (int c = 0; c < VDIM; c++)
408 {
409 BBu0[q1][q2][c] = 0.0;
410 BBu1[q1][q2][c] = 0.0;
411 }
412 for (int d2 = 0; d2 < D1D; ++d2)
413 {
414 const real_t b = B(q2,d2);
415 for (int c = 0; c < VDIM; c++)
416 {
417 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
418 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
419 }
420 }
421 }
422 }
423 real_t DBBu[max_Q1D][max_Q1D][VDIM];
424 for (int q1 = 0; q1 < Q1D; ++q1)
425 {
426 for (int q2 = 0; q2 < Q1D; q2++)
427 {
428 for (int c = 0; c < VDIM; c++)
429 {
430 DBBu[q1][q2][c] = op(q1,q2,0,0,f)*BBu0[q1][q2][c] +
431 op(q1,q2,1,0,f)*BBu1[q1][q2][c];
432 }
433 }
434 }
435 real_t BDBBu[max_Q1D][max_D1D][VDIM];
436 for (int q1 = 0; q1 < Q1D; ++q1)
437 {
438 for (int d2 = 0; d2 < D1D; d2++)
439 {
440 for (int c = 0; c < VDIM; c++)
441 {
442 BDBBu[q1][d2][c] = 0.0;
443 }
444 for (int q2 = 0; q2 < Q1D; ++q2)
445 {
446 const real_t b = Bt(d2,q2);
447 for (int c = 0; c < VDIM; c++)
448 {
449 BDBBu[q1][d2][c] += b*DBBu[q1][q2][c];
450 }
451 }
452 }
453 }
454 real_t BBDBBu[max_D1D][max_D1D][VDIM];
455 for (int d1 = 0; d1 < D1D; ++d1)
456 {
457 for (int d2 = 0; d2 < D1D; d2++)
458 {
459 for (int c = 0; c < VDIM; c++)
460 {
461 BBDBBu[d1][d2][c] = 0.0;
462 }
463 for (int q1 = 0; q1 < Q1D; ++q1)
464 {
465 const real_t b = Bt(d1,q1);
466 for (int c = 0; c < VDIM; c++)
467 {
468 BBDBBu[d1][d2][c] += b*BDBBu[q1][d2][c];
469 }
470 }
471 for (int c = 0; c < VDIM; c++)
472 {
473 y(d1,d2,c,0,f) += BBDBBu[d1][d2][c];
474 y(d1,d2,c,1,f) += -BBDBBu[d1][d2][c];
475 }
476 }
477 }
478 });
479}
480
481// Optimized PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein
482template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
483void SmemPADGTraceApply3D(const int NF,
484 const Array<real_t> &b,
485 const Array<real_t> &bt,
486 const Vector &op_,
487 const Vector &x_,
488 Vector &y_,
489 const int d1d = 0,
490 const int q1d = 0)
491{
492 const int D1D = T_D1D ? T_D1D : d1d;
493 const int Q1D = T_Q1D ? T_Q1D : q1d;
494 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
495 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
496 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
497 auto B = Reshape(b.Read(), Q1D, D1D);
498 auto Bt = Reshape(bt.Read(), D1D, Q1D);
499 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
500 auto x = Reshape(x_.Read(), D1D, D1D, 2, NF);
501 auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
502
503 mfem::forall_2D_batch(NF, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int f)
504 {
505 const int tidz = MFEM_THREAD_ID(z);
506 const int D1D = T_D1D ? T_D1D : d1d;
507 const int Q1D = T_Q1D ? T_Q1D : q1d;
508 // the following variables are evaluated at compile time
509 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
510 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
511 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
512 MFEM_SHARED real_t u0[NBZ][max_D1D][max_D1D];
513 MFEM_SHARED real_t u1[NBZ][max_D1D][max_D1D];
514 MFEM_FOREACH_THREAD(d1,x,D1D)
515 {
516 MFEM_FOREACH_THREAD(d2,y,D1D)
517 {
518 u0[tidz][d1][d2] = x(d1,d2,0,f);
519 u1[tidz][d1][d2] = x(d1,d2,1,f);
520 }
521 }
522 MFEM_SYNC_THREAD;
523 MFEM_SHARED real_t Bu0[NBZ][max_Q1D][max_D1D];
524 MFEM_SHARED real_t Bu1[NBZ][max_Q1D][max_D1D];
525 MFEM_FOREACH_THREAD(q1,x,Q1D)
526 {
527 MFEM_FOREACH_THREAD(d2,y,D1D)
528 {
529 real_t Bu0_ = 0.0;
530 real_t Bu1_ = 0.0;
531 for (int d1 = 0; d1 < D1D; ++d1)
532 {
533 const real_t b = B(q1,d1);
534 Bu0_ += b*u0[tidz][d1][d2];
535 Bu1_ += b*u1[tidz][d1][d2];
536 }
537 Bu0[tidz][q1][d2] = Bu0_;
538 Bu1[tidz][q1][d2] = Bu1_;
539 }
540 }
541 MFEM_SYNC_THREAD;
542 MFEM_SHARED real_t BBu0[NBZ][max_Q1D][max_Q1D];
543 MFEM_SHARED real_t BBu1[NBZ][max_Q1D][max_Q1D];
544 MFEM_FOREACH_THREAD(q1,x,Q1D)
545 {
546 MFEM_FOREACH_THREAD(q2,y,Q1D)
547 {
548 real_t BBu0_ = 0.0;
549 real_t BBu1_ = 0.0;
550 for (int d2 = 0; d2 < D1D; ++d2)
551 {
552 const real_t b = B(q2,d2);
553 BBu0_ += b*Bu0[tidz][q1][d2];
554 BBu1_ += b*Bu1[tidz][q1][d2];
555 }
556 BBu0[tidz][q1][q2] = BBu0_;
557 BBu1[tidz][q1][q2] = BBu1_;
558 }
559 }
560 MFEM_SYNC_THREAD;
561 MFEM_SHARED real_t DBBu[NBZ][max_Q1D][max_Q1D];
562 MFEM_FOREACH_THREAD(q1,x,Q1D)
563 {
564 MFEM_FOREACH_THREAD(q2,y,Q1D)
565 {
566 DBBu[tidz][q1][q2] = op(q1,q2,0,0,f)*BBu0[tidz][q1][q2] +
567 op(q1,q2,1,0,f)*BBu1[tidz][q1][q2];
568 }
569 }
570 MFEM_SYNC_THREAD;
571 MFEM_SHARED real_t BDBBu[NBZ][max_Q1D][max_D1D];
572 MFEM_FOREACH_THREAD(q1,x,Q1D)
573 {
574 MFEM_FOREACH_THREAD(d2,y,D1D)
575 {
576 real_t BDBBu_ = 0.0;
577 for (int q2 = 0; q2 < Q1D; ++q2)
578 {
579 const real_t b = Bt(d2,q2);
580 BDBBu_ += b*DBBu[tidz][q1][q2];
581 }
582 BDBBu[tidz][q1][d2] = BDBBu_;
583 }
584 }
585 MFEM_SYNC_THREAD;
586 MFEM_FOREACH_THREAD(d1,x,D1D)
587 {
588 MFEM_FOREACH_THREAD(d2,y,D1D)
589 {
590 real_t BBDBBu_ = 0.0;
591 for (int q1 = 0; q1 < Q1D; ++q1)
592 {
593 const real_t b = Bt(d1,q1);
594 BBDBBu_ += b*BDBBu[tidz][q1][d2];
595 }
596 y(d1,d2,0,f) += BBDBBu_;
597 y(d1,d2,1,f) += -BBDBBu_;
598 }
599 }
600 });
601}
602
603static void PADGTraceApply(const int dim,
604 const int D1D,
605 const int Q1D,
606 const int NF,
607 const Array<real_t> &B,
608 const Array<real_t> &Bt,
609 const Vector &op,
610 const Vector &x,
611 Vector &y)
612{
613 if (dim == 2)
614 {
615 switch ((D1D << 4 ) | Q1D)
616 {
617 case 0x22: return PADGTraceApply2D<2,2>(NF,B,Bt,op,x,y);
618 case 0x33: return PADGTraceApply2D<3,3>(NF,B,Bt,op,x,y);
619 case 0x44: return PADGTraceApply2D<4,4>(NF,B,Bt,op,x,y);
620 case 0x55: return PADGTraceApply2D<5,5>(NF,B,Bt,op,x,y);
621 case 0x66: return PADGTraceApply2D<6,6>(NF,B,Bt,op,x,y);
622 case 0x77: return PADGTraceApply2D<7,7>(NF,B,Bt,op,x,y);
623 case 0x88: return PADGTraceApply2D<8,8>(NF,B,Bt,op,x,y);
624 case 0x99: return PADGTraceApply2D<9,9>(NF,B,Bt,op,x,y);
625 default: return PADGTraceApply2D(NF,B,Bt,op,x,y,D1D,Q1D);
626 }
627 }
628 else if (dim == 3)
629 {
630 switch ((D1D << 4 ) | Q1D)
631 {
632 case 0x22: return SmemPADGTraceApply3D<2,2,1>(NF,B,Bt,op,x,y);
633 case 0x23: return SmemPADGTraceApply3D<2,3,1>(NF,B,Bt,op,x,y);
634 case 0x34: return SmemPADGTraceApply3D<3,4,2>(NF,B,Bt,op,x,y);
635 case 0x45: return SmemPADGTraceApply3D<4,5,2>(NF,B,Bt,op,x,y);
636 case 0x56: return SmemPADGTraceApply3D<5,6,1>(NF,B,Bt,op,x,y);
637 case 0x67: return SmemPADGTraceApply3D<6,7,1>(NF,B,Bt,op,x,y);
638 case 0x78: return SmemPADGTraceApply3D<7,8,1>(NF,B,Bt,op,x,y);
639 case 0x89: return SmemPADGTraceApply3D<8,9,1>(NF,B,Bt,op,x,y);
640 default: return PADGTraceApply3D(NF,B,Bt,op,x,y,D1D,Q1D);
641 }
642 }
643 MFEM_ABORT("Unknown kernel.");
644}
645
646// PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein
647template<int T_D1D = 0, int T_Q1D = 0> static
648void PADGTraceApplyTranspose2D(const int NF,
649 const Array<real_t> &b,
650 const Array<real_t> &bt,
651 const Vector &op_,
652 const Vector &x_,
653 Vector &y_,
654 const int d1d = 0,
655 const int q1d = 0)
656{
657 const int VDIM = 1;
658 const int D1D = T_D1D ? T_D1D : d1d;
659 const int Q1D = T_Q1D ? T_Q1D : q1d;
660 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
661 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
662 auto B = Reshape(b.Read(), Q1D, D1D);
663 auto Bt = Reshape(bt.Read(), D1D, Q1D);
664 auto op = Reshape(op_.Read(), Q1D, 2, 2, NF);
665 auto x = Reshape(x_.Read(), D1D, VDIM, 2, NF);
666 auto y = Reshape(y_.ReadWrite(), D1D, VDIM, 2, NF);
667
668 mfem::forall(NF, [=] MFEM_HOST_DEVICE (int f)
669 {
670 const int VDIM = 1;
671 const int D1D = T_D1D ? T_D1D : d1d;
672 const int Q1D = T_Q1D ? T_Q1D : q1d;
673 // the following variables are evaluated at compile time
674 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
675 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
676 real_t u0[max_D1D][VDIM];
677 real_t u1[max_D1D][VDIM];
678 for (int d = 0; d < D1D; d++)
679 {
680 for (int c = 0; c < VDIM; c++)
681 {
682 u0[d][c] = x(d,c,0,f);
683 u1[d][c] = x(d,c,1,f);
684 }
685 }
686 real_t Bu0[max_Q1D][VDIM];
687 real_t Bu1[max_Q1D][VDIM];
688 for (int q = 0; q < Q1D; ++q)
689 {
690 for (int c = 0; c < VDIM; c++)
691 {
692 Bu0[q][c] = 0.0;
693 Bu1[q][c] = 0.0;
694 }
695 for (int d = 0; d < D1D; ++d)
696 {
697 const real_t b = B(q,d);
698 for (int c = 0; c < VDIM; c++)
699 {
700 Bu0[q][c] += b*u0[d][c];
701 Bu1[q][c] += b*u1[d][c];
702 }
703 }
704 }
705 real_t DBu0[max_Q1D][VDIM];
706 real_t DBu1[max_Q1D][VDIM];
707 for (int q = 0; q < Q1D; ++q)
708 {
709 for (int c = 0; c < VDIM; c++)
710 {
711 DBu0[q][c] = op(q,0,0,f)*Bu0[q][c] + op(q,0,1,f)*Bu1[q][c];
712 DBu1[q][c] = op(q,1,0,f)*Bu0[q][c] + op(q,1,1,f)*Bu1[q][c];
713 }
714 }
715 real_t BDBu0[max_D1D][VDIM];
716 real_t BDBu1[max_D1D][VDIM];
717 for (int d = 0; d < D1D; ++d)
718 {
719 for (int c = 0; c < VDIM; c++)
720 {
721 BDBu0[d][c] = 0.0;
722 BDBu1[d][c] = 0.0;
723 }
724 for (int q = 0; q < Q1D; ++q)
725 {
726 const real_t b = Bt(d,q);
727 for (int c = 0; c < VDIM; c++)
728 {
729 BDBu0[d][c] += b*DBu0[q][c];
730 BDBu1[d][c] += b*DBu1[q][c];
731 }
732 }
733 for (int c = 0; c < VDIM; c++)
734 {
735 y(d,c,0,f) += BDBu0[d][c];
736 y(d,c,1,f) += BDBu1[d][c];
737 }
738 }
739 });
740}
741
742// PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein
743template<int T_D1D = 0, int T_Q1D = 0> static
744void PADGTraceApplyTranspose3D(const int NF,
745 const Array<real_t> &b,
746 const Array<real_t> &bt,
747 const Vector &op_,
748 const Vector &x_,
749 Vector &y_,
750 const int d1d = 0,
751 const int q1d = 0)
752{
753 const int VDIM = 1;
754 const int D1D = T_D1D ? T_D1D : d1d;
755 const int Q1D = T_Q1D ? T_Q1D : q1d;
756 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
757 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
758 auto B = Reshape(b.Read(), Q1D, D1D);
759 auto Bt = Reshape(bt.Read(), D1D, Q1D);
760 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
761 auto x = Reshape(x_.Read(), D1D, D1D, VDIM, 2, NF);
762 auto y = Reshape(y_.ReadWrite(), D1D, D1D, VDIM, 2, NF);
763
764 mfem::forall(NF, [=] MFEM_HOST_DEVICE (int f)
765 {
766 const int VDIM = 1;
767 const int D1D = T_D1D ? T_D1D : d1d;
768 const int Q1D = T_Q1D ? T_Q1D : q1d;
769 // the following variables are evaluated at compile time
770 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
771 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
772 real_t u0[max_D1D][max_D1D][VDIM];
773 real_t u1[max_D1D][max_D1D][VDIM];
774 for (int d1 = 0; d1 < D1D; d1++)
775 {
776 for (int d2 = 0; d2 < D1D; d2++)
777 {
778 for (int c = 0; c < VDIM; c++)
779 {
780 u0[d1][d2][c] = x(d1,d2,c,0,f);
781 u1[d1][d2][c] = x(d1,d2,c,1,f);
782 }
783 }
784 }
785 real_t Bu0[max_Q1D][max_D1D][VDIM];
786 real_t Bu1[max_Q1D][max_D1D][VDIM];
787 for (int q1 = 0; q1 < Q1D; ++q1)
788 {
789 for (int d2 = 0; d2 < D1D; ++d2)
790 {
791 for (int c = 0; c < VDIM; c++)
792 {
793 Bu0[q1][d2][c] = 0.0;
794 Bu1[q1][d2][c] = 0.0;
795 }
796 for (int d1 = 0; d1 < D1D; ++d1)
797 {
798 const real_t b = B(q1,d1);
799 for (int c = 0; c < VDIM; c++)
800 {
801 Bu0[q1][d2][c] += b*u0[d1][d2][c];
802 Bu1[q1][d2][c] += b*u1[d1][d2][c];
803 }
804 }
805 }
806 }
807 real_t BBu0[max_Q1D][max_Q1D][VDIM];
808 real_t BBu1[max_Q1D][max_Q1D][VDIM];
809 for (int q1 = 0; q1 < Q1D; ++q1)
810 {
811 for (int q2 = 0; q2 < Q1D; ++q2)
812 {
813 for (int c = 0; c < VDIM; c++)
814 {
815 BBu0[q1][q2][c] = 0.0;
816 BBu1[q1][q2][c] = 0.0;
817 }
818 for (int d2 = 0; d2 < D1D; ++d2)
819 {
820 const real_t b = B(q2,d2);
821 for (int c = 0; c < VDIM; c++)
822 {
823 BBu0[q1][q2][c] += b*Bu0[q1][d2][c];
824 BBu1[q1][q2][c] += b*Bu1[q1][d2][c];
825 }
826 }
827 }
828 }
829 real_t DBu0[max_Q1D][max_Q1D][VDIM];
830 real_t DBu1[max_Q1D][max_Q1D][VDIM];
831 for (int q1 = 0; q1 < Q1D; ++q1)
832 {
833 for (int q2 = 0; q2 < Q1D; ++q2)
834 {
835 const real_t D00 = op(q1,q2,0,0,f);
836 const real_t D01 = op(q1,q2,0,1,f);
837 const real_t D10 = op(q1,q2,1,0,f);
838 const real_t D11 = op(q1,q2,1,1,f);
839 for (int c = 0; c < VDIM; c++)
840 {
841 DBu0[q1][q2][c] = D00*BBu0[q1][q2][c] + D01*BBu1[q1][q2][c];
842 DBu1[q1][q2][c] = D10*BBu0[q1][q2][c] + D11*BBu1[q1][q2][c];
843 }
844 }
845 }
846 real_t BDBu0[max_D1D][max_Q1D][VDIM];
847 real_t BDBu1[max_D1D][max_Q1D][VDIM];
848 for (int d1 = 0; d1 < D1D; ++d1)
849 {
850 for (int q2 = 0; q2 < Q1D; ++q2)
851 {
852 for (int c = 0; c < VDIM; c++)
853 {
854 BDBu0[d1][q2][c] = 0.0;
855 BDBu1[d1][q2][c] = 0.0;
856 }
857 for (int q1 = 0; q1 < Q1D; ++q1)
858 {
859 const real_t b = Bt(d1,q1);
860 for (int c = 0; c < VDIM; c++)
861 {
862 BDBu0[d1][q2][c] += b*DBu0[q1][q2][c];
863 BDBu1[d1][q2][c] += b*DBu1[q1][q2][c];
864 }
865 }
866 }
867 }
868 real_t BBDBu0[max_D1D][max_D1D][VDIM];
869 real_t BBDBu1[max_D1D][max_D1D][VDIM];
870 for (int d1 = 0; d1 < D1D; ++d1)
871 {
872 for (int d2 = 0; d2 < D1D; ++d2)
873 {
874 for (int c = 0; c < VDIM; c++)
875 {
876 BBDBu0[d1][d2][c] = 0.0;
877 BBDBu1[d1][d2][c] = 0.0;
878 }
879 for (int q2 = 0; q2 < Q1D; ++q2)
880 {
881 const real_t b = Bt(d2,q2);
882 for (int c = 0; c < VDIM; c++)
883 {
884 BBDBu0[d1][d2][c] += b*BDBu0[d1][q2][c];
885 BBDBu1[d1][d2][c] += b*BDBu1[d1][q2][c];
886 }
887 }
888 for (int c = 0; c < VDIM; c++)
889 {
890 y(d1,d2,c,0,f) += BBDBu0[d1][d2][c];
891 y(d1,d2,c,1,f) += BBDBu1[d1][d2][c];
892 }
893 }
894 }
895 });
896}
897
898// Optimized PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein
899template<int T_D1D = 0, int T_Q1D = 0, int T_NBZ = 0> static
900void SmemPADGTraceApplyTranspose3D(const int NF,
901 const Array<real_t> &b,
902 const Array<real_t> &bt,
903 const Vector &op_,
904 const Vector &x_,
905 Vector &y_,
906 const int d1d = 0,
907 const int q1d = 0)
908{
909 const int D1D = T_D1D ? T_D1D : d1d;
910 const int Q1D = T_Q1D ? T_Q1D : q1d;
911 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
912 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().MAX_D1D, "");
913 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().MAX_Q1D, "");
914 auto B = Reshape(b.Read(), Q1D, D1D);
915 auto Bt = Reshape(bt.Read(), D1D, Q1D);
916 auto op = Reshape(op_.Read(), Q1D, Q1D, 2, 2, NF);
917 auto x = Reshape(x_.Read(), D1D, D1D, 2, NF);
918 auto y = Reshape(y_.ReadWrite(), D1D, D1D, 2, NF);
919
920 mfem::forall_2D_batch(NF, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int f)
921 {
922 const int tidz = MFEM_THREAD_ID(z);
923 const int D1D = T_D1D ? T_D1D : d1d;
924 const int Q1D = T_Q1D ? T_Q1D : q1d;
925 // the following variables are evaluated at compile time
926 constexpr int NBZ = T_NBZ ? T_NBZ : 1;
927 constexpr int max_D1D = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
928 constexpr int max_Q1D = T_Q1D ? T_Q1D : DofQuadLimits::MAX_Q1D;
929 MFEM_SHARED real_t u0[NBZ][max_D1D][max_D1D];
930 MFEM_SHARED real_t u1[NBZ][max_D1D][max_D1D];
931 MFEM_FOREACH_THREAD(d1,x,D1D)
932 {
933 MFEM_FOREACH_THREAD(d2,y,D1D)
934 {
935 u0[tidz][d1][d2] = x(d1,d2,0,f);
936 u1[tidz][d1][d2] = x(d1,d2,1,f);
937 }
938 }
939 MFEM_SYNC_THREAD;
940 MFEM_SHARED real_t Bu0[NBZ][max_Q1D][max_D1D];
941 MFEM_SHARED real_t Bu1[NBZ][max_Q1D][max_D1D];
942 MFEM_FOREACH_THREAD(q1,x,Q1D)
943 {
944 MFEM_FOREACH_THREAD(d2,y,D1D)
945 {
946 real_t Bu0_ = 0.0;
947 real_t Bu1_ = 0.0;
948 for (int d1 = 0; d1 < D1D; ++d1)
949 {
950 const real_t b = B(q1,d1);
951 Bu0_ += b*u0[tidz][d1][d2];
952 Bu1_ += b*u1[tidz][d1][d2];
953 }
954 Bu0[tidz][q1][d2] = Bu0_;
955 Bu1[tidz][q1][d2] = Bu1_;
956 }
957 }
958 MFEM_SYNC_THREAD;
959 MFEM_SHARED real_t BBu0[NBZ][max_Q1D][max_Q1D];
960 MFEM_SHARED real_t BBu1[NBZ][max_Q1D][max_Q1D];
961 MFEM_FOREACH_THREAD(q1,x,Q1D)
962 {
963 MFEM_FOREACH_THREAD(q2,y,Q1D)
964 {
965 real_t BBu0_ = 0.0;
966 real_t BBu1_ = 0.0;
967 for (int d2 = 0; d2 < D1D; ++d2)
968 {
969 const real_t b = B(q2,d2);
970 BBu0_ += b*Bu0[tidz][q1][d2];
971 BBu1_ += b*Bu1[tidz][q1][d2];
972 }
973 BBu0[tidz][q1][q2] = BBu0_;
974 BBu1[tidz][q1][q2] = BBu1_;
975 }
976 }
977 MFEM_SYNC_THREAD;
978 MFEM_SHARED real_t DBBu0[NBZ][max_Q1D][max_Q1D];
979 MFEM_SHARED real_t DBBu1[NBZ][max_Q1D][max_Q1D];
980 MFEM_FOREACH_THREAD(q1,x,Q1D)
981 {
982 MFEM_FOREACH_THREAD(q2,y,Q1D)
983 {
984 const real_t D00 = op(q1,q2,0,0,f);
985 const real_t D01 = op(q1,q2,0,1,f);
986 const real_t D10 = op(q1,q2,1,0,f);
987 const real_t D11 = op(q1,q2,1,1,f);
988 const real_t u0q = BBu0[tidz][q1][q2];
989 const real_t u1q = BBu1[tidz][q1][q2];
990 DBBu0[tidz][q1][q2] = D00*u0q + D01*u1q;
991 DBBu1[tidz][q1][q2] = D10*u0q + D11*u1q;
992 }
993 }
994 MFEM_SYNC_THREAD;
995 MFEM_SHARED real_t BDBBu0[NBZ][max_Q1D][max_D1D];
996 MFEM_SHARED real_t BDBBu1[NBZ][max_Q1D][max_D1D];
997 MFEM_FOREACH_THREAD(q1,x,Q1D)
998 {
999 MFEM_FOREACH_THREAD(d2,y,D1D)
1000 {
1001 real_t BDBBu0_ = 0.0;
1002 real_t BDBBu1_ = 0.0;
1003 for (int q2 = 0; q2 < Q1D; ++q2)
1004 {
1005 const real_t b = Bt(d2,q2);
1006 BDBBu0_ += b*DBBu0[tidz][q1][q2];
1007 BDBBu1_ += b*DBBu1[tidz][q1][q2];
1008 }
1009 BDBBu0[tidz][q1][d2] = BDBBu0_;
1010 BDBBu1[tidz][q1][d2] = BDBBu1_;
1011 }
1012 }
1013 MFEM_SYNC_THREAD;
1014 MFEM_FOREACH_THREAD(d1,x,D1D)
1015 {
1016 MFEM_FOREACH_THREAD(d2,y,D1D)
1017 {
1018 real_t BBDBBu0_ = 0.0;
1019 real_t BBDBBu1_ = 0.0;
1020 for (int q1 = 0; q1 < Q1D; ++q1)
1021 {
1022 const real_t b = Bt(d1,q1);
1023 BBDBBu0_ += b*BDBBu0[tidz][q1][d2];
1024 BBDBBu1_ += b*BDBBu1[tidz][q1][d2];
1025 }
1026 y(d1,d2,0,f) += BBDBBu0_;
1027 y(d1,d2,1,f) += BBDBBu1_;
1028 }
1029 }
1030 });
1031}
1032
1033static void PADGTraceApplyTranspose(const int dim,
1034 const int D1D,
1035 const int Q1D,
1036 const int NF,
1037 const Array<real_t> &B,
1038 const Array<real_t> &Bt,
1039 const Vector &op,
1040 const Vector &x,
1041 Vector &y)
1042{
1043 if (dim == 2)
1044 {
1045 switch ((D1D << 4 ) | Q1D)
1046 {
1047 case 0x22: return PADGTraceApplyTranspose2D<2,2>(NF,B,Bt,op,x,y);
1048 case 0x33: return PADGTraceApplyTranspose2D<3,3>(NF,B,Bt,op,x,y);
1049 case 0x44: return PADGTraceApplyTranspose2D<4,4>(NF,B,Bt,op,x,y);
1050 case 0x55: return PADGTraceApplyTranspose2D<5,5>(NF,B,Bt,op,x,y);
1051 case 0x66: return PADGTraceApplyTranspose2D<6,6>(NF,B,Bt,op,x,y);
1052 case 0x77: return PADGTraceApplyTranspose2D<7,7>(NF,B,Bt,op,x,y);
1053 case 0x88: return PADGTraceApplyTranspose2D<8,8>(NF,B,Bt,op,x,y);
1054 case 0x99: return PADGTraceApplyTranspose2D<9,9>(NF,B,Bt,op,x,y);
1055 default: return PADGTraceApplyTranspose2D(NF,B,Bt,op,x,y,D1D,Q1D);
1056 }
1057 }
1058 else if (dim == 3)
1059 {
1060 switch ((D1D << 4 ) | Q1D)
1061 {
1062 case 0x22: return SmemPADGTraceApplyTranspose3D<2,2>(NF,B,Bt,op,x,y);
1063 case 0x23: return SmemPADGTraceApplyTranspose3D<2,3>(NF,B,Bt,op,x,y);
1064 case 0x34: return SmemPADGTraceApplyTranspose3D<3,4>(NF,B,Bt,op,x,y);
1065 case 0x45: return SmemPADGTraceApplyTranspose3D<4,5>(NF,B,Bt,op,x,y);
1066 case 0x56: return SmemPADGTraceApplyTranspose3D<5,6>(NF,B,Bt,op,x,y);
1067 case 0x67: return SmemPADGTraceApplyTranspose3D<6,7>(NF,B,Bt,op,x,y);
1068 case 0x78: return SmemPADGTraceApplyTranspose3D<7,8>(NF,B,Bt,op,x,y);
1069 case 0x89: return SmemPADGTraceApplyTranspose3D<8,9>(NF,B,Bt,op,x,y);
1070 default: return PADGTraceApplyTranspose3D(NF,B,Bt,op,x,y,D1D,Q1D);
1071 }
1072 }
1073 MFEM_ABORT("Unknown kernel.");
1074}
1075
1076// PA DGTraceIntegrator Apply kernel
1078{
1079 PADGTraceApply(dim, dofs1D, quad1D, nf,
1080 maps->B, maps->Bt,
1081 pa_data, x, y);
1082}
1083
1085{
1086 PADGTraceApplyTranspose(dim, dofs1D, quad1D, nf,
1087 maps->B, maps->Bt,
1088 pa_data, x, y);
1089}
1090
1091} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
virtual real_t Eval(ElementTransformation &T, const IntegrationPoint &ip)=0
Evaluate the coefficient in the element described by T at the point ip.
void AddMultPA(const Vector &, Vector &) const override
Method for partially assembled action.
void AssemblePABoundaryFaces(const FiniteElementSpace &fes) override
void AddMultTransposePA(const Vector &x, Vector &y) const override
Method for partially assembled transposed action.
VectorCoefficient * u
void AssemblePAInteriorFaces(const FiniteElementSpace &fes) override
static const IntegrationRule & GetRule(Geometry::Type geom, int order, const FaceElementTransformations &T)
const FaceGeometricFactors * geom
Not owned.
const DofToQuad * maps
Not owned.
static MemoryType GetMemoryType()
(DEPRECATED) Equivalent to GetDeviceMemoryType().
Definition device.hpp:278
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Definition fe_base.hpp:165
Array< real_t > B
Basis functions evaluated at quadrature points.
Definition fe_base.hpp:193
int ndof
Number of degrees of freedom = number of basis functions. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:178
int nqpt
Number of quadrature points. When mode is TENSOR, this is the 1D number.
Definition fe_base.hpp:182
Array< real_t > Bt
Transpose of B.
Definition fe_base.hpp:199
Vector normal
Normals at all quadrature points.
Definition mesh.hpp:3037
Vector detJ
Determinants of the Jacobians at all quadrature points.
Definition mesh.hpp:3030
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
const IntegrationRule * IntRule
Vector data type.
Definition vector.hpp:82
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:498
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
Vector beta
const real_t alpha
Definition ex15.cpp:369
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:341
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_2D_batch(int N, int X, int Y, int BZ, lambda &&body)
Definition forall.hpp:768
int ToLexOrdering(const int dim, const int face_id, const int size1d, const int index)
Convert a dof face index from Native ordering to lexicographic ordering for quads and hexes.
@ COMPRESSED
Enable all above compressions.
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
void forall(int N, lambda &&body)
Definition forall.hpp:753
FaceType
Definition mesh.hpp:48
void vel(const Vector &x, real_t t, Vector &u)
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128