MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
lor_rt_impl.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "lor_util.hpp"
15
16namespace mfem
17{
18
19template <int ORDER, int SDIM>
21{
22 const int nel_ho = fes_ho.GetNE();
23
24 static constexpr int nv = 4;
25 static constexpr int ne = 4;
26 static constexpr int dim = 2;
27 static constexpr int ddm2 = (dim*(dim+1))/2;
28 static constexpr int ngeom = ddm2 + 1;
29 static constexpr int o = ORDER;
30 static constexpr int op1 = ORDER + 1;
31 static constexpr int ndof_per_el = dim*o*op1;
32 static constexpr int nnz_per_row = 7;
33 static constexpr int sz_local_mat = ne*ne;
34
35 const bool const_mq = c1.Size() == 1;
36 const auto MQ = const_mq
37 ? Reshape(c1.Read(), 1, 1, 1)
38 : Reshape(c1.Read(), op1, op1, nel_ho);
39 const bool const_dq = c2.Size() == 1;
40 const auto DQ = const_dq
41 ? Reshape(c2.Read(), 1, 1, 1)
42 : Reshape(c2.Read(), op1, op1, nel_ho);
43
44 sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
45 auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1, dim, nel_ho);
46
47 auto X = X_vert.Read();
48
49 mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho)
50 {
51 MFEM_FOREACH_THREAD(iy,y,o)
52 {
53 MFEM_FOREACH_THREAD(ix,x,op1)
54 {
55 for (int c=0; c<2; ++c)
56 {
57 for (int j=0; j<nnz_per_row; ++j)
58 {
59 V(j,ix+iy*op1,c,iel_ho) = 0.0;
60 }
61 }
62 }
63 }
64 MFEM_SYNC_THREAD;
65
66 // Loop over the sub-elements
67 MFEM_FOREACH_THREAD(ky,y,ORDER)
68 {
69 MFEM_FOREACH_THREAD(kx,x,ORDER)
70 {
71 // Compute geometric factors at quadrature points
72 real_t Q_[ngeom*nv];
73 real_t local_mat_[sz_local_mat];
74
75 DeviceTensor<3> Q(Q_, ngeom, 2, 2);
76 DeviceTensor<2> local_mat(local_mat_, ne, ne);
77
78 // local_mat is the local (dense) stiffness matrix
79 for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
80
81 SetupLORQuadData2D<ORDER,SDIM,true,false>(X, iel_ho, kx, ky, Q, true);
82
83 for (int iqx=0; iqx<2; ++iqx)
84 {
85 for (int iqy=0; iqy<2; ++iqy)
86 {
87 const real_t mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
88 const real_t dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
89 // Loop over x,y components. c=0 => x, c=1 => y
90 for (int cj=0; cj<dim; ++cj)
91 {
92 for (int bj=0; bj<2; ++bj)
93 {
94 const real_t bxj = (cj == 0 && bj == iqx) ? 1 : 0;
95 const real_t byj = (cj == 1 && bj == iqy) ? 1 : 0;
96 const real_t div_j = (bj == 0) ? -1 : 1;
97
98 const real_t jj_loc = bj + 2*cj;
99
100 for (int ci=0; ci<dim; ++ci)
101 {
102 for (int bi=0; bi<2; ++bi)
103 {
104 const real_t bxi = (ci == 0 && bi == iqx) ? 1 : 0;
105 const real_t byi = (ci == 1 && bi == iqy) ? 1 : 0;
106 const real_t div_i = (bi == 0) ? -1 : 1;
107
108 const real_t ii_loc = bi + 2*ci;
109
110 // Only store the lower-triangular part of
111 // the matrix (by symmetry).
112 if (jj_loc > ii_loc) { continue; }
113
114 real_t val = 0.0;
115 val += bxi*bxj*Q(0,iqy,iqx);
116 val += byi*bxj*Q(1,iqy,iqx);
117 val += bxi*byj*Q(1,iqy,iqx);
118 val += byi*byj*Q(2,iqy,iqx);
119 val *= mq;
120 val += dq*div_j*div_i*Q(3,iqy,iqx);
121
122 local_mat(ii_loc, jj_loc) += val;
123 }
124 }
125 }
126 }
127 }
128 }
129 // Assemble the local matrix into the macro-element sparse matrix
130 // in a format similar to coordinate format. The (I,J) arrays
131 // are implicit (not stored explicitly).
132 for (int ii_loc=0; ii_loc<ne; ++ii_loc)
133 {
134 const int ci = ii_loc/2;
135 const int bi = ii_loc%2;
136 const int ix = (ci == 0) ? bi : 0;
137 const int iy = (ci == 1) ? bi : 0;
138
139 int ii = kx+ix + (ky+iy)*((ci == 0) ? op1 : o);
140
141 for (int jj_loc=0; jj_loc<ne; ++jj_loc)
142 {
143 const int cj = jj_loc/2;
144 const int bj = jj_loc%2;
145
146 const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + 1-bi + 2*bj);
147
148 // Symmetry
149 const real_t val = (jj_loc <= ii_loc)
150 ? local_mat(ii_loc, jj_loc)
151 : local_mat(jj_loc, ii_loc);
152 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
153 }
154 }
155 }
156 }
157 });
158
159 sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
160 sparse_mapping = -1;
161 auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
162 for (int ci=0; ci<2; ++ci)
163 {
164 const int i_off = (ci == 0) ? 0 : o*op1;
165 const int id0 = ci;
166 const int id1 = (ci+1)%2;
167
168 const int nxi = (ci == 0) ? op1 : o;
169
170 for (int i0=0; i0<op1; ++i0)
171 {
172 for (int i1=0; i1<o; ++i1)
173 {
174 int ii_lex[2];
175 ii_lex[id0] = i0;
176 ii_lex[id1] = i1;
177 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi;
178
179 for (int cj_rel=0; cj_rel<2; ++cj_rel)
180 {
181 const int cj = (ci + cj_rel) % 2;
182 const int j_off = (cj == 0) ? 0 : o*op1;
183 const int nxj = (cj == 0) ? op1 : o;
184
185 const int j0_begin = (i0 > 0) ? i0-1 : i0;
186 const int j0_end = (cj_rel == 0)
187 ? ((i0 < o) ? i0+1 : i0)
188 : ((i0 < o) ? i0 : i0-1);
189 const int j1_begin = i1;
190 const int j1_end = (cj_rel == 0) ? i1 : i1+1;
191
192 for (int j0=j0_begin; j0<=j0_end; ++j0)
193 {
194 const int d0 = 1 + j0 - i0;
195 for (int j1=j1_begin; j1<=j1_end; ++j1)
196 {
197 const int d1 = j1 - i1;
198 int jj_lex[2];
199 jj_lex[id0] = j0;
200 jj_lex[id1] = j1;
201 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj;
202 const int jj_off = (cj_rel == 0) ? d0 : 3 + d0 + 2*d1;
203 map(jj_off, ii_el) = jj_el;
204 }
205 }
206 }
207 }
208 }
209 }
210}
211
212template <int ORDER>
214{
215 const int nel_ho = fes_ho.GetNE();
216
217 static constexpr int nv = 8; // number of vertices in hexahedron
218 static constexpr int nf = 6; // number of faces in hexahedron
219 static constexpr int dim = 3;
220 static constexpr int ddm2 = (dim*(dim+1))/2;
221 static constexpr int ngeom = ddm2 + 1; // number of geometric factors stored
222 static constexpr int o = ORDER;
223 static constexpr int op1 = ORDER + 1;
224 static constexpr int ndof_per_el = dim*o*o*op1;
225 static constexpr int nnz_per_row = 11;
226 static constexpr int sz_local_mat = nf*nf;
227
228 const bool const_mq = c1.Size() == 1;
229 const auto MQ = const_mq
230 ? Reshape(c1.Read(), 1, 1, 1, 1)
231 : Reshape(c1.Read(), op1, op1, op1, nel_ho);
232 const bool const_dq = c2.Size() == 1;
233 const auto DQ = const_dq
234 ? Reshape(c2.Read(), 1, 1, 1, 1)
235 : Reshape(c2.Read(), op1, op1, op1, nel_ho);
236
237 sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
238 auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*o*op1, dim, nel_ho);
239
240 auto X = X_vert.Read();
241
242 // Last thread dimension is lowered to avoid "too many resources" error
243 mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
244 [=] MFEM_HOST_DEVICE (int iel_ho)
245 {
246 MFEM_FOREACH_THREAD(iz,z,o)
247 {
248 MFEM_FOREACH_THREAD(iy,y,o)
249 {
250 MFEM_FOREACH_THREAD(ix,x,op1)
251 {
252 for (int c=0; c<dim; ++c)
253 {
254 for (int j=0; j<nnz_per_row; ++j)
255 {
256 V(j,ix+iy*op1+iz*o*op1,c,iel_ho) = 0.0;
257 }
258 }
259 }
260 }
261 }
262 MFEM_SYNC_THREAD;
263
264 // Loop over the sub-elements
265 MFEM_FOREACH_THREAD(kz,z,ORDER)
266 {
267 MFEM_FOREACH_THREAD(ky,y,ORDER)
268 {
269 MFEM_FOREACH_THREAD(kx,x,ORDER)
270 {
271 // Geometric factors at quadrature points (element vertices)
272 real_t Q_[ngeom*nv];
273 DeviceTensor<4> Q(Q_, ngeom, 2, 2, 2);
274
275 real_t local_mat_[sz_local_mat];
276 DeviceTensor<2> local_mat(local_mat_, nf, nf);
277 for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
278
279 real_t vx[8], vy[8], vz[8];
280 LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
281
282 for (int iqz=0; iqz<2; ++iqz)
283 {
284 for (int iqy=0; iqy<2; ++iqy)
285 {
286 for (int iqx=0; iqx<2; ++iqx)
287 {
288 const real_t x = iqx;
289 const real_t y = iqy;
290 const real_t z = iqz;
291 const real_t w = 1.0/8.0;
292
293 real_t J_[3*3];
294 DeviceTensor<2> J(J_, 3, 3);
295
296 Jacobian3D(x, y, z, vx, vy, vz, J);
297
298 const real_t detJ = Det3D(J);
299 const real_t w_detJ = w/detJ;
300
301 Q(0,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0)); // 1,1
302 Q(1,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,0)+J(1,1)*J(1,0)+J(2,1)*J(2,0)); // 2,1
303 Q(2,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,0)+J(1,2)*J(1,0)+J(2,2)*J(2,0)); // 3,1
304 Q(3,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1)); // 2,2
305 Q(4,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,1)+J(1,2)*J(1,1)+J(2,2)*J(2,1)); // 3,2
306 Q(5,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2)); // 3,3
307 Q(6,iqz,iqy,iqx) = w_detJ;
308 }
309 }
310 }
311 for (int iqz=0; iqz<2; ++iqz)
312 {
313 for (int iqy=0; iqy<2; ++iqy)
314 {
315 for (int iqx=0; iqx<2; ++iqx)
316 {
317 const real_t mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
318 const real_t dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
319 // Loop over x,y,z components. 0 => x, 1 => y, 2 => z
320 for (int cj=0; cj<dim; ++cj)
321 {
322 const real_t jq0 = (cj == 0) ? iqx : ((cj == 1) ? iqy : iqz);
323
324 const int jd_0 = cj;
325 const int jd_1 = (cj + 1)%3;
326 const int jd_2 = (cj + 2)%3;
327
328 for (int bj=0; bj<2; ++bj) // 2 faces in each dim
329 {
330 const real_t div_j = (bj == 0) ? -1 : 1;
331
332 real_t basis_j[3];
333 basis_j[jd_0] = (bj == jq0) ? 1 : 0;
334 basis_j[jd_1] = 0.0;
335 basis_j[jd_2] = 0.0;
336
337 const int jj_loc = bj + 2*cj;
338
339 for (int ci=0; ci<dim; ++ci)
340 {
341 const real_t iq0 = (ci == 0) ? iqx : ((ci == 1) ? iqy : iqz);
342
343 const int id_0 = ci;
344 const int id_1 = (ci + 1)%3;
345 const int id_2 = (ci + 2)%3;
346
347 for (int bi=0; bi<2; ++bi)
348 {
349 const real_t div_i = (bi == 0) ? -1 : 1;
350
351 real_t basis_i[3];
352 basis_i[id_0] = (bi == iq0) ? 1 : 0;
353 basis_i[id_1] = 0.0;
354 basis_i[id_2] = 0.0;
355
356 const int ii_loc = bi + 2*ci;
357
358 // Only store the lower-triangular part of
359 // the matrix (by symmetry).
360 if (jj_loc > ii_loc) { continue; }
361
362 const real_t div_div = Q(6,iqz,iqy,iqx)*div_i*div_j;
363
364 real_t basis_basis = 0.0;
365 basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
366 basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
367 basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
368 basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
369 basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
370 basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
371
372 const real_t val = dq*div_div + mq*basis_basis;
373 // const double val = 1.0;
374
375 local_mat(ii_loc, jj_loc) += val;
376 }
377 }
378 }
379 }
380 }
381 }
382 }
383 // Assemble the local matrix into the macro-element sparse matrix
384 // The nonzeros of the macro-element sparse matrix are ordered as
385 // follows:
386 //
387 // The axes are ordered relative to the direction of the basis
388 // vector, e.g. for x-vectors, the axes are (x,y,z), for
389 // y-vectors the axes are (y,z,x), and for z-vectors the axes are
390 // (z,x,y).
391 //
392 // The nonzeros are then given in "rotated lexicographic"
393 // ordering, according to these axes.
394 for (int ii_loc=0; ii_loc<nf; ++ii_loc)
395 {
396 const int ci = ii_loc/2;
397 const int bi = ii_loc%2;
398
399 const int id0 = ci;
400 const int id1 = (ci+1)%3;
401 const int id2 = (ci+2)%3;
402
403 const int i0 = bi;
404 const int i1 = 0;
405 const int i2 = 0;
406
407 int ii_lex[3];
408 ii_lex[id0] = i0;
409 ii_lex[id1] = i1;
410 ii_lex[id2] = i2;
411
412 const int nx = (ci == 0) ? op1 : o;
413 const int ny = (ci == 1) ? op1 : o;
414
415 const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
416
417 for (int jj_loc=0; jj_loc<nf; ++jj_loc)
418 {
419 const int cj = jj_loc/2;
420 // add 3 to take modulus (rather than remainder) when
421 // (cj - ci) is negative
422 const int cj_rel = (3 + cj - ci)%3;
423
424 const int bj = jj_loc%2;
425
426 const int jd0 = cj_rel;
427 const int jd1 = (cj_rel+1)%3;
428 const int jd2 = (cj_rel+2)%3;
429
430 int jj_rel[3];
431 jj_rel[jd0] = bj;
432 jj_rel[jd1] = 0;
433 jj_rel[jd2] = 0;
434
435 const int d0 = jj_rel[0] - i0 + 1;
436 const int d1 = jj_rel[1] - i1;
437 const int d2 = jj_rel[2] - i2;
438 int jj_off;
439 if (cj_rel == 0) { jj_off = d0; }
440 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
441 else /* if (cj_rel == 2) */ { jj_off = 7 + d0 + 2*d2; }
442
443 // Symmetry
444 const real_t val = (jj_loc <= ii_loc)
445 ? local_mat(ii_loc, jj_loc)
446 : local_mat(jj_loc, ii_loc);
447 AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
448 }
449 }
450 }
451 }
452 }
453 });
454
455 sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
456 sparse_mapping = -1;
457 auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
458 for (int ci=0; ci<dim; ++ci)
459 {
460 const int i_off = ci*o*o*op1;
461 const int id0 = ci;
462 const int id1 = (ci+1)%3;
463 const int id2 = (ci+2)%3;
464
465 const int nxi = (ci == 0) ? op1 : o;
466 const int nyi = (ci == 1) ? op1 : o;
467
468 for (int i0=0; i0<op1; ++i0)
469 {
470 for (int i1=0; i1<o; ++i1)
471 {
472 for (int i2=0; i2<o; ++i2)
473 {
474 int ii_lex[3];
475 ii_lex[id0] = i0;
476 ii_lex[id1] = i1;
477 ii_lex[id2] = i2;
478 const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
479
480 for (int cj_rel=0; cj_rel<dim; ++cj_rel)
481 {
482 const int cj = (ci + cj_rel) % 3;
483 const int j_off = cj*o*o*op1;
484
485 const int nxj = (cj == 0) ? op1 : o;
486 const int nyj = (cj == 1) ? op1 : o;
487
488 const int j0_begin = (i0 > 0) ? i0-1 : i0;
489 const int j0_end = (cj_rel == 0)
490 ? ((i0 < o) ? i0+1 : i0)
491 : ((i0 < o) ? i0 : i0-1);
492 const int j1_begin = i1;
493 const int j1_end = (cj_rel == 1) ? i1+1 : i1;
494 const int j2_begin = i2;
495 const int j2_end = (cj_rel == 2) ? i2+1 : i2;
496
497 for (int j0=j0_begin; j0<=j0_end; ++j0)
498 {
499 const int d0 = 1 + j0 - i0;
500 for (int j1=j1_begin; j1<=j1_end; ++j1)
501 {
502 const int d1 = j1 - i1;
503 for (int j2=j2_begin; j2<=j2_end; ++j2)
504 {
505 const int d2 = j2 - i2;
506 int jj_lex[3];
507 jj_lex[id0] = j0;
508 jj_lex[id1] = j1;
509 jj_lex[id2] = j2;
510 const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
511 int jj_off;
512 if (cj_rel == 0) { jj_off = d0; }
513 else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
514 else /* if (cj_rel == 2) */ { jj_off = 7 + d0 + 2*d2; }
515 map(jj_off, ii_el) = jj_el;
516 }
517 }
518 }
519 }
520 }
521 }
522 }
523 }
524}
525
526} // namespace mfem
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition backends.hpp:92
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:697
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Definition array.hpp:337
FiniteElementSpace & fes_ho
The associated high-order space.
CoefficientVector c2
Coefficient of second integrator.
Vector & X_vert
Mesh coordinate vector.
Vector & sparse_ij
Local element sparsity matrix data.
CoefficientVector c1
Coefficient of first integrator.
Array< int > & sparse_mapping
Local element sparsity pattern.
A basic generic Tensor class, appropriate for use on the GPU.
Definition dtensor.hpp:82
int GetNE() const
Returns number of elements in the mesh.
Definition fespace.hpp:740
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:474
int Size() const
Returns the size of the vector.
Definition vector.hpp:218
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:538
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:482
int dim
Definition ex24.cpp:53
MFEM_HOST_DEVICE void SetupLORQuadData2D(const real_t *X, int iel_ho, int kx, int ky, DeviceTensor< 3 > &Q, bool piola)
Definition lor_util.hpp:162
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
MFEM_HOST_DEVICE void Jacobian3D(const real_t x, const real_t y, const real_t z, const real_t vx[8], const real_t vy[8], const real_t vz[8], DeviceMatrix &J)
Definition lor_util.hpp:210
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:763
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:775
MFEM_HOST_DEVICE real_t Det3D(DeviceMatrix &J)
Definition lor_util.hpp:28
float real_t
Definition config.hpp:43
MFEM_HOST_DEVICE void LORVertexCoordinates3D(const real_t *X, int iel_ho, int kx, int ky, int kz, real_t vx[8], real_t vy[8], real_t vz[8])
Definition lor_util.hpp:75