MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_hcurlhdiv_kernels.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
13
14namespace mfem
15{
16
17namespace internal
18{
19
20// PA H(curl) x H(div) mass assemble 2D kernel, with factor
21// dF^{-1} C dF for a vector or matrix coefficient C.
22// If transpose, use dF^T C dF^{-T} for H(div) x H(curl).
23void PAHcurlHdivMassSetup2D(const int Q1D,
24 const int coeffDim,
25 const int NE,
26 const bool transpose,
27 const Array<real_t> &w_,
28 const Vector &j,
29 Vector &coeff_,
30 Vector &op)
31{
32 const bool symmetric = (coeffDim != 4);
33 auto W = Reshape(w_.Read(), Q1D, Q1D);
34 auto J = Reshape(j.Read(), Q1D, Q1D, 2, 2, NE);
35 auto coeff = Reshape(coeff_.Read(), coeffDim, Q1D, Q1D, NE);
36 auto y = Reshape(op.Write(), 4, Q1D, Q1D, NE);
37
38 const int i11 = 0;
39 const int i12 = transpose ? 2 : 1;
40 const int i21 = transpose ? 1 : 2;
41 const int i22 = 3;
42
43 mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
44 {
45 MFEM_FOREACH_THREAD(qx,x,Q1D)
46 {
47 MFEM_FOREACH_THREAD(qy,y,Q1D)
48 {
49 const real_t J11 = J(qx,qy,0,0,e);
50 const real_t J21 = J(qx,qy,1,0,e);
51 const real_t J12 = J(qx,qy,0,1,e);
52 const real_t J22 = J(qx,qy,1,1,e);
53 const real_t w_detJ = W(qx,qy) / ((J11*J22) - (J21*J12));
54
55 if (coeffDim == 3 || coeffDim == 4) // Matrix coefficient version
56 {
57 // First compute entries of R = MJ
58 const real_t M11 = coeff(i11,qx,qy,e);
59 const real_t M12 = (!symmetric) ? coeff(i12,qx,qy,e) : coeff(1,qx,qy,e);
60 const real_t M21 = (!symmetric) ? coeff(i21,qx,qy,e) : M12;
61 const real_t M22 = (!symmetric) ? coeff(i22,qx,qy,e) : coeff(2,qx,qy,e);
62
63 // J^{-1} M^T
64 const real_t R11 = ( J22*M11 - J12*M12); // 1,1
65 const real_t R12 = ( J22*M21 - J12*M22); // 1,2
66 const real_t R21 = (-J21*M11 + J11*M12); // 2,1
67 const real_t R22 = (-J21*M21 + J11*M22); // 2,2
68
69 // (RJ)^T
70 y(i11,qx,qy,e) = w_detJ * (R11*J11 + R12*J21); // 1,1
71 y(i21,qx,qy,e) = w_detJ * (R11*J12 + R12*J22); // 1,2 (transpose)
72 y(i12,qx,qy,e) = w_detJ * (R21*J11 + R22*J21); // 2,1 (transpose)
73 y(i22,qx,qy,e) = w_detJ * (R21*J12 + R22*J22); // 2,2
74 }
75 else if (coeffDim == 2) // Vector coefficient version
76 {
77 const real_t D1 = coeff(0,qx,qy,e);
78 const real_t D2 = coeff(1,qx,qy,e);
79 const real_t R11 = D1*J11;
80 const real_t R12 = D1*J12;
81 const real_t R21 = D2*J21;
82 const real_t R22 = D2*J22;
83 y(i11,qx,qy,e) = w_detJ * ( J22*R11 - J12*R21); // 1,1
84 y(i21,qx,qy,e) = w_detJ * ( J22*R12 - J12*R22); // 1,2 (transpose)
85 y(i12,qx,qy,e) = w_detJ * (-J21*R11 + J11*R21); // 2,1 (transpose)
86 y(i22,qx,qy,e) = w_detJ * (-J21*R12 + J11*R22); // 2,2
87 }
88 }
89 }
90 });
91}
92
93// PA H(curl) x H(div) mass assemble 3D kernel, with factor
94// dF^{-1} C dF for a vector or matrix coefficient C.
95// If transpose, use dF^T C dF^{-T} for H(div) x H(curl).
96void PAHcurlHdivMassSetup3D(const int Q1D,
97 const int coeffDim,
98 const int NE,
99 const bool transpose,
100 const Array<real_t> &w_,
101 const Vector &j,
102 Vector &coeff_,
103 Vector &op)
104{
105 const bool symmetric = (coeffDim != 9);
106 auto W = Reshape(w_.Read(), Q1D, Q1D, Q1D);
107 auto J = Reshape(j.Read(), Q1D, Q1D, Q1D, 3, 3, NE);
108 auto coeff = Reshape(coeff_.Read(), coeffDim, Q1D, Q1D, Q1D, NE);
109 auto y = Reshape(op.Write(), 9, Q1D, Q1D, Q1D, NE);
110
111 const int i11 = 0;
112 const int i12 = transpose ? 3 : 1;
113 const int i13 = transpose ? 6 : 2;
114 const int i21 = transpose ? 1 : 3;
115 const int i22 = 4;
116 const int i23 = transpose ? 7 : 5;
117 const int i31 = transpose ? 2 : 6;
118 const int i32 = transpose ? 5 : 7;
119 const int i33 = 8;
120
121 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
122 {
123 MFEM_FOREACH_THREAD(qx,x,Q1D)
124 {
125 MFEM_FOREACH_THREAD(qy,y,Q1D)
126 {
127 MFEM_FOREACH_THREAD(qz,z,Q1D)
128 {
129 const real_t J11 = J(qx,qy,qz,0,0,e);
130 const real_t J21 = J(qx,qy,qz,1,0,e);
131 const real_t J31 = J(qx,qy,qz,2,0,e);
132 const real_t J12 = J(qx,qy,qz,0,1,e);
133 const real_t J22 = J(qx,qy,qz,1,1,e);
134 const real_t J32 = J(qx,qy,qz,2,1,e);
135 const real_t J13 = J(qx,qy,qz,0,2,e);
136 const real_t J23 = J(qx,qy,qz,1,2,e);
137 const real_t J33 = J(qx,qy,qz,2,2,e);
138 const real_t detJ = J11 * (J22 * J33 - J32 * J23) -
139 J21 * (J12 * J33 - J32 * J13) +
140 J31 * (J12 * J23 - J22 * J13);
141 const real_t w_detJ = W(qx,qy,qz) / detJ;
142 // adj(J)
143 const real_t A11 = (J22 * J33) - (J23 * J32);
144 const real_t A12 = (J32 * J13) - (J12 * J33);
145 const real_t A13 = (J12 * J23) - (J22 * J13);
146 const real_t A21 = (J31 * J23) - (J21 * J33);
147 const real_t A22 = (J11 * J33) - (J13 * J31);
148 const real_t A23 = (J21 * J13) - (J11 * J23);
149 const real_t A31 = (J21 * J32) - (J31 * J22);
150 const real_t A32 = (J31 * J12) - (J11 * J32);
151 const real_t A33 = (J11 * J22) - (J12 * J21);
152
153 if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version
154 {
155 // First compute entries of R = M^T J
156 const real_t M11 = (!symmetric) ? coeff(i11,qx,qy,qz,e) : coeff(0,qx,qy,qz,e);
157 const real_t M12 = (!symmetric) ? coeff(i12,qx,qy,qz,e) : coeff(1,qx,qy,qz,e);
158 const real_t M13 = (!symmetric) ? coeff(i13,qx,qy,qz,e) : coeff(2,qx,qy,qz,e);
159 const real_t M21 = (!symmetric) ? coeff(i21,qx,qy,qz,e) : M12;
160 const real_t M22 = (!symmetric) ? coeff(i22,qx,qy,qz,e) : coeff(3,qx,qy,qz,e);
161 const real_t M23 = (!symmetric) ? coeff(i23,qx,qy,qz,e) : coeff(4,qx,qy,qz,e);
162 const real_t M31 = (!symmetric) ? coeff(i31,qx,qy,qz,e) : M13;
163 const real_t M32 = (!symmetric) ? coeff(i32,qx,qy,qz,e) : M23;
164 const real_t M33 = (!symmetric) ? coeff(i33,qx,qy,qz,e) : coeff(5,qx,qy,qz,e);
165
166 const real_t R11 = M11*J11 + M21*J21 + M31*J31;
167 const real_t R12 = M11*J12 + M21*J22 + M31*J32;
168 const real_t R13 = M11*J13 + M21*J23 + M31*J33;
169 const real_t R21 = M12*J11 + M22*J21 + M32*J31;
170 const real_t R22 = M12*J12 + M22*J22 + M32*J32;
171 const real_t R23 = M12*J13 + M22*J23 + M32*J33;
172 const real_t R31 = M13*J11 + M23*J21 + M33*J31;
173 const real_t R32 = M13*J12 + M23*J22 + M33*J32;
174 const real_t R33 = M13*J13 + M23*J23 + M33*J33;
175
176 // y = (J^{-1} M^T J)^T
177 y(i11,qx,qy,qz,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31); // 1,1
178 y(i21,qx,qy,qz,e) = w_detJ * (A11*R12 + A12*R22 + A13*R32); // 1,2
179 y(i31,qx,qy,qz,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33); // 1,3
180 y(i12,qx,qy,qz,e) = w_detJ * (A21*R11 + A22*R21 + A23*R31); // 2,1
181 y(i22,qx,qy,qz,e) = w_detJ * (A21*R12 + A22*R22 + A23*R32); // 2,2
182 y(i32,qx,qy,qz,e) = w_detJ * (A21*R13 + A22*R23 + A23*R33); // 2,3
183 y(i13,qx,qy,qz,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31); // 3,1
184 y(i23,qx,qy,qz,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32); // 3,2
185 y(i33,qx,qy,qz,e) = w_detJ * (A31*R13 + A32*R23 + A33*R33); // 3,3
186 }
187 else if (coeffDim == 3) // Vector coefficient version
188 {
189 const real_t D1 = coeff(0,qx,qy,qz,e);
190 const real_t D2 = coeff(1,qx,qy,qz,e);
191 const real_t D3 = coeff(2,qx,qy,qz,e);
192 // detJ J^{-1} DJ = adj(J) DJ
193 // transpose
194 y(i11,qx,qy,qz,e) = w_detJ * (D1*A11*J11 + D2*A12*J21 + D3*A13*J31); // 1,1
195 y(i21,qx,qy,qz,e) = w_detJ * (D1*A11*J12 + D2*A12*J22 + D3*A13*J32); // 1,2
196 y(i31,qx,qy,qz,e) = w_detJ * (D1*A11*J13 + D2*A12*J23 + D3*A13*J33); // 1,3
197 y(i12,qx,qy,qz,e) = w_detJ * (D1*A21*J11 + D2*A22*J21 + D3*A23*J31); // 2,1
198 y(i22,qx,qy,qz,e) = w_detJ * (D1*A21*J12 + D2*A22*J22 + D3*A23*J32); // 2,2
199 y(i32,qx,qy,qz,e) = w_detJ * (D1*A21*J13 + D2*A22*J23 + D3*A23*J33); // 2,3
200 y(i13,qx,qy,qz,e) = w_detJ * (D1*A31*J11 + D2*A32*J21 + D3*A33*J31); // 3,1
201 y(i23,qx,qy,qz,e) = w_detJ * (D1*A31*J12 + D2*A32*J22 + D3*A33*J32); // 3,2
202 y(i33,qx,qy,qz,e) = w_detJ * (D1*A31*J13 + D2*A32*J23 + D3*A33*J33); // 3,3
203 }
204 }
205 }
206 }
207 });
208}
209
210// Mass operator for H(curl) and H(div) functions, using Piola transformations
211// u = dF^{-T} \hat{u} in H(curl), v = (1 / det dF) dF \hat{v} in H(div).
212void PAHcurlHdivMassApply2D(const int D1D,
213 const int D1Dtest,
214 const int Q1D,
215 const int NE,
216 const bool scalarCoeff,
217 const bool trialHcurl,
218 const bool transpose,
219 const Array<real_t> &Bo_,
220 const Array<real_t> &Bc_,
221 const Array<real_t> &Bot_,
222 const Array<real_t> &Bct_,
223 const Vector &op_,
224 const Vector &x_,
225 Vector &y_)
226{
227 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().HCURL_MAX_D1D,
228 "Error: D1D > MAX_D1D");
229 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().HCURL_MAX_Q1D,
230 "Error: Q1D > MAX_Q1D");
231 constexpr static int VDIM = 2;
232
233 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
234 auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
235 auto Bot = Reshape(Bot_.Read(), D1Dtest-1, Q1D);
236 auto Bct = Reshape(Bct_.Read(), D1Dtest, Q1D);
237 auto op = Reshape(op_.Read(), scalarCoeff ? 1 : 4, Q1D, Q1D, NE);
238 auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE);
239 auto y = Reshape(y_.ReadWrite(), 2*(D1Dtest-1)*D1Dtest, NE);
240
241 const int i12 = transpose ? 2 : 1;
242 const int i21 = transpose ? 1 : 2;
243
244 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
245 {
246 constexpr static int MAX_Q1D = DofQuadLimits::HCURL_MAX_Q1D;
247
248 real_t mass[MAX_Q1D][MAX_Q1D][VDIM];
249
250 for (int qy = 0; qy < Q1D; ++qy)
251 {
252 for (int qx = 0; qx < Q1D; ++qx)
253 {
254 for (int c = 0; c < VDIM; ++c)
255 {
256 mass[qy][qx][c] = 0.0;
257 }
258 }
259 }
260
261 int osc = 0;
262 for (int c = 0; c < VDIM; ++c) // loop over x, y trial components
263 {
264 const int D1Dy = trialHcurl ? ((c == 1) ? D1D - 1 : D1D) :
265 ((c == 1) ? D1D : D1D - 1);
266 const int D1Dx = trialHcurl ? ((c == 0) ? D1D - 1 : D1D) :
267 ((c == 0) ? D1D : D1D - 1);
268
269 for (int dy = 0; dy < D1Dy; ++dy)
270 {
271 real_t massX[MAX_Q1D];
272 for (int qx = 0; qx < Q1D; ++qx)
273 {
274 massX[qx] = 0.0;
275 }
276
277 for (int dx = 0; dx < D1Dx; ++dx)
278 {
279 const real_t t = x(dx + (dy * D1Dx) + osc, e);
280 for (int qx = 0; qx < Q1D; ++qx)
281 {
282 massX[qx] += t * (trialHcurl ? ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)) :
283 ((c == 0) ? Bc(qx,dx) : Bo(qx,dx)));
284 }
285 }
286
287 for (int qy = 0; qy < Q1D; ++qy)
288 {
289 const real_t wy = trialHcurl ? ((c == 1) ? Bo(qy,dy) : Bc(qy,dy)) :
290 ((c == 1) ? Bc(qy,dy) : Bo(qy,dy));
291 for (int qx = 0; qx < Q1D; ++qx)
292 {
293 mass[qy][qx][c] += massX[qx] * wy;
294 }
295 }
296 }
297
298 osc += D1Dx * D1Dy;
299 } // loop (c) over components
300
301 // Apply D operator.
302 for (int qy = 0; qy < Q1D; ++qy)
303 {
304 for (int qx = 0; qx < Q1D; ++qx)
305 {
306 const real_t O11 = op(0,qx,qy,e);
307 const real_t O12 = scalarCoeff ? 0.0 : op(i12,qx,qy,e);
308 const real_t O21 = scalarCoeff ? 0.0 : op(i21,qx,qy,e);
309 const real_t O22 = scalarCoeff ? O11 : op(3,qx,qy,e);
310 const real_t massX = mass[qy][qx][0];
311 const real_t massY = mass[qy][qx][1];
312 mass[qy][qx][0] = (O11*massX)+(O12*massY);
313 mass[qy][qx][1] = (O21*massX)+(O22*massY);
314 }
315 }
316
317 osc = 0;
318 for (int c = 0; c < VDIM; ++c) // loop over x, y test components
319 {
320 const int D1Dy = trialHcurl ? ((c == 1) ? D1Dtest : D1Dtest - 1) :
321 ((c == 1) ? D1Dtest - 1 : D1Dtest);
322 const int D1Dx = trialHcurl ? ((c == 0) ? D1Dtest : D1Dtest - 1) :
323 ((c == 0) ? D1Dtest - 1 : D1Dtest);
324
325 for (int qy = 0; qy < Q1D; ++qy)
326 {
327 real_t massX[DofQuadLimits::HDIV_MAX_D1D];
328 for (int dx = 0; dx < D1Dx; ++dx)
329 {
330 massX[dx] = 0.0;
331 }
332 for (int qx = 0; qx < Q1D; ++qx)
333 {
334 for (int dx = 0; dx < D1Dx; ++dx)
335 {
336 massX[dx] += mass[qy][qx][c] * (trialHcurl ?
337 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)) :
338 ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)));
339 }
340 }
341 for (int dy = 0; dy < D1Dy; ++dy)
342 {
343 const real_t wy = trialHcurl ? ((c == 1) ? Bct(dy,qy) : Bot(dy,qy)) :
344 ((c == 1) ? Bot(dy,qy) : Bct(dy,qy));
345 for (int dx = 0; dx < D1Dx; ++dx)
346 {
347 y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy;
348 }
349 }
350 }
351
352 osc += D1Dx * D1Dy;
353 } // loop c
354 }); // end of element loop
355}
356
357// Mass operator for H(curl) and H(div) functions, using Piola transformations
358// u = dF^{-T} \hat{u} in H(curl), v = (1 / det dF) dF \hat{v} in H(div).
359void PAHcurlHdivMassApply3D(const int D1D,
360 const int D1Dtest,
361 const int Q1D,
362 const int NE,
363 const bool scalarCoeff,
364 const bool trialHcurl,
365 const bool transpose,
366 const Array<real_t> &Bo_,
367 const Array<real_t> &Bc_,
368 const Array<real_t> &Bot_,
369 const Array<real_t> &Bct_,
370 const Vector &op_,
371 const Vector &x_,
372 Vector &y_)
373{
374 MFEM_VERIFY(D1D <= DeviceDofQuadLimits::Get().HCURL_MAX_D1D,
375 "Error: D1D > MAX_D1D");
376 MFEM_VERIFY(Q1D <= DeviceDofQuadLimits::Get().HCURL_MAX_Q1D,
377 "Error: Q1D > MAX_Q1D");
378 constexpr static int VDIM = 3;
379
380 auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1);
381 auto Bc = Reshape(Bc_.Read(), Q1D, D1D);
382 auto Bot = Reshape(Bot_.Read(), D1Dtest-1, Q1D);
383 auto Bct = Reshape(Bct_.Read(), D1Dtest, Q1D);
384 auto op = Reshape(op_.Read(), scalarCoeff ? 1 : 9, Q1D, Q1D, Q1D, NE);
385 auto x = Reshape(x_.Read(), 3*(D1D-1)*D1D*(trialHcurl ? D1D : D1D-1), NE);
386 auto y = Reshape(y_.ReadWrite(), 3*(D1Dtest-1)*D1Dtest*
387 (trialHcurl ? D1Dtest-1 : D1Dtest), NE);
388
389 const int i12 = transpose ? 3 : 1;
390 const int i13 = transpose ? 6 : 2;
391 const int i21 = transpose ? 1 : 3;
392 const int i23 = transpose ? 7 : 5;
393 const int i31 = transpose ? 2 : 6;
394 const int i32 = transpose ? 5 : 7;
395
396 mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e)
397 {
398 constexpr static int MAX_Q1D = DofQuadLimits::HCURL_MAX_Q1D;
399
400 real_t mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM];
401
402 for (int qz = 0; qz < Q1D; ++qz)
403 {
404 for (int qy = 0; qy < Q1D; ++qy)
405 {
406 for (int qx = 0; qx < Q1D; ++qx)
407 {
408 for (int c = 0; c < VDIM; ++c)
409 {
410 mass[qz][qy][qx][c] = 0.0;
411 }
412 }
413 }
414 }
415
416 int osc = 0;
417 for (int c = 0; c < VDIM; ++c) // loop over x, y, z trial components
418 {
419 const int D1Dz = trialHcurl ? ((c == 2) ? D1D - 1 : D1D) :
420 ((c == 2) ? D1D : D1D - 1);
421 const int D1Dy = trialHcurl ? ((c == 1) ? D1D - 1 : D1D) :
422 ((c == 1) ? D1D : D1D - 1);
423 const int D1Dx = trialHcurl ? ((c == 0) ? D1D - 1 : D1D) :
424 ((c == 0) ? D1D : D1D - 1);
425
426 for (int dz = 0; dz < D1Dz; ++dz)
427 {
428 real_t massXY[MAX_Q1D][MAX_Q1D];
429 for (int qy = 0; qy < Q1D; ++qy)
430 {
431 for (int qx = 0; qx < Q1D; ++qx)
432 {
433 massXY[qy][qx] = 0.0;
434 }
435 }
436
437 for (int dy = 0; dy < D1Dy; ++dy)
438 {
439 real_t massX[MAX_Q1D];
440 for (int qx = 0; qx < Q1D; ++qx)
441 {
442 massX[qx] = 0.0;
443 }
444
445 for (int dx = 0; dx < D1Dx; ++dx)
446 {
447 const real_t t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e);
448 for (int qx = 0; qx < Q1D; ++qx)
449 {
450 massX[qx] += t * (trialHcurl ? ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)) :
451 ((c == 0) ? Bc(qx,dx) : Bo(qx,dx)));
452 }
453 }
454
455 for (int qy = 0; qy < Q1D; ++qy)
456 {
457 const real_t wy = trialHcurl ? ((c == 1) ? Bo(qy,dy) : Bc(qy,dy)) :
458 ((c == 1) ? Bc(qy,dy) : Bo(qy,dy));
459 for (int qx = 0; qx < Q1D; ++qx)
460 {
461 const real_t wx = massX[qx];
462 massXY[qy][qx] += wx * wy;
463 }
464 }
465 }
466
467 for (int qz = 0; qz < Q1D; ++qz)
468 {
469 const real_t wz = trialHcurl ? ((c == 2) ? Bo(qz,dz) : Bc(qz,dz)) :
470 ((c == 2) ? Bc(qz,dz) : Bo(qz,dz));
471 for (int qy = 0; qy < Q1D; ++qy)
472 {
473 for (int qx = 0; qx < Q1D; ++qx)
474 {
475 mass[qz][qy][qx][c] += massXY[qy][qx] * wz;
476 }
477 }
478 }
479 }
480
481 osc += D1Dx * D1Dy * D1Dz;
482 } // loop (c) over components
483
484 // Apply D operator.
485 for (int qz = 0; qz < Q1D; ++qz)
486 {
487 for (int qy = 0; qy < Q1D; ++qy)
488 {
489 for (int qx = 0; qx < Q1D; ++qx)
490 {
491 const real_t O11 = op(0,qx,qy,qz,e);
492 const real_t O12 = scalarCoeff ? 0.0 : op(i12,qx,qy,qz,e);
493 const real_t O13 = scalarCoeff ? 0.0 : op(i13,qx,qy,qz,e);
494 const real_t O21 = scalarCoeff ? 0.0 : op(i21,qx,qy,qz,e);
495 const real_t O22 = scalarCoeff ? O11 : op(4,qx,qy,qz,e);
496 const real_t O23 = scalarCoeff ? 0.0 : op(i23,qx,qy,qz,e);
497 const real_t O31 = scalarCoeff ? 0.0 : op(i31,qx,qy,qz,e);
498 const real_t O32 = scalarCoeff ? 0.0 : op(i32,qx,qy,qz,e);
499 const real_t O33 = scalarCoeff ? O11 : op(8,qx,qy,qz,e);
500 const real_t massX = mass[qz][qy][qx][0];
501 const real_t massY = mass[qz][qy][qx][1];
502 const real_t massZ = mass[qz][qy][qx][2];
503 mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ);
504 mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ);
505 mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ);
506 }
507 }
508 }
509
510 for (int qz = 0; qz < Q1D; ++qz)
511 {
512 real_t massXY[DofQuadLimits::HDIV_MAX_D1D][DofQuadLimits::HDIV_MAX_D1D];
513
514 osc = 0;
515 for (int c = 0; c < VDIM; ++c) // loop over x, y, z test components
516 {
517 const int D1Dz = trialHcurl ? ((c == 2) ? D1Dtest : D1Dtest - 1) :
518 ((c == 2) ? D1Dtest - 1 : D1Dtest);
519 const int D1Dy = trialHcurl ? ((c == 1) ? D1Dtest : D1Dtest - 1) :
520 ((c == 1) ? D1Dtest - 1 : D1Dtest);
521 const int D1Dx = trialHcurl ? ((c == 0) ? D1Dtest : D1Dtest - 1) :
522 ((c == 0) ? D1Dtest - 1 : D1Dtest);
523
524 for (int dy = 0; dy < D1Dy; ++dy)
525 {
526 for (int dx = 0; dx < D1Dx; ++dx)
527 {
528 massXY[dy][dx] = 0.0;
529 }
530 }
531 for (int qy = 0; qy < Q1D; ++qy)
532 {
533 real_t massX[DofQuadLimits::HDIV_MAX_D1D];
534 for (int dx = 0; dx < D1Dx; ++dx)
535 {
536 massX[dx] = 0.0;
537 }
538 for (int qx = 0; qx < Q1D; ++qx)
539 {
540 for (int dx = 0; dx < D1Dx; ++dx)
541 {
542 massX[dx] += mass[qz][qy][qx][c] * (trialHcurl ?
543 ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)) :
544 ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)));
545 }
546 }
547 for (int dy = 0; dy < D1Dy; ++dy)
548 {
549 const real_t wy = trialHcurl ? ((c == 1) ? Bct(dy,qy) : Bot(dy,qy)) :
550 ((c == 1) ? Bot(dy,qy) : Bct(dy,qy));
551 for (int dx = 0; dx < D1Dx; ++dx)
552 {
553 massXY[dy][dx] += massX[dx] * wy;
554 }
555 }
556 }
557
558 for (int dz = 0; dz < D1Dz; ++dz)
559 {
560 const real_t wz = trialHcurl ? ((c == 2) ? Bct(dz,qz) : Bot(dz,qz)) :
561 ((c == 2) ? Bot(dz,qz) : Bct(dz,qz));
562 for (int dy = 0; dy < D1Dy; ++dy)
563 {
564 for (int dx = 0; dx < D1Dx; ++dx)
565 {
566 y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) +=
567 massXY[dy][dx] * wz;
568 }
569 }
570 }
571
572 osc += D1Dx * D1Dy * D1Dz;
573 } // loop c
574 } // loop qz
575 }); // end of element loop
576}
577
578} // namespace internal
579
580} // namespace mfem
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:762
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:774
float real_t
Definition config.hpp:43
void forall(int N, lambda &&body)
Definition forall.hpp:753
static const DeviceDofQuadLimits & Get()
Return a const reference to the DeviceDofQuadLimits singleton.
Definition forall.hpp:128