MFEM  v4.6.0
Finite element discretization library
lor_nd.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "lor_nd.hpp"
13 #include "lor_util.hpp"
14 #include "../../linalg/dtensor.hpp"
15 #include "../../general/forall.hpp"
16 
17 namespace mfem
18 {
19 
20 template <int ORDER>
22 {
23  const int nel_ho = fes_ho.GetNE();
24 
25  static constexpr int nv = 4;
26  static constexpr int ne = 4;
27  static constexpr int dim = 2;
28  static constexpr int ddm2 = (dim*(dim+1))/2;
29  static constexpr int ngeom = ddm2 + 1;
30  static constexpr int o = ORDER;
31  static constexpr int op1 = ORDER + 1;
32  static constexpr int ndof_per_el = dim*o*op1;
33  static constexpr int nnz_per_row = 7;
34  static constexpr int sz_local_mat = ne*ne;
35 
36  const bool const_mq = c1.Size() == 1;
37  const auto MQ = const_mq
38  ? Reshape(c1.Read(), 1, 1, 1)
39  : Reshape(c1.Read(), op1, op1, nel_ho);
40  const bool const_dq = c2.Size() == 1;
41  const auto DQ = const_dq
42  ? Reshape(c2.Read(), 1, 1, 1)
43  : Reshape(c2.Read(), op1, op1, nel_ho);
44 
45  sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
46  auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1, dim, nel_ho);
47 
48  auto X = X_vert.Read();
49 
50  mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho)
51  {
52  // Assemble a sparse matrix over the macro-element by looping over each
53  // subelement.
54  // V(j,ix,iy) stores the jth nonzero in the row of the sparse matrix
55  // corresponding to local DOF (ix, iy).
56  MFEM_FOREACH_THREAD(iy,y,o)
57  {
58  MFEM_FOREACH_THREAD(ix,x,op1)
59  {
60  for (int c=0; c<2; ++c)
61  {
62  for (int j=0; j<nnz_per_row; ++j)
63  {
64  V(j,ix+iy*op1,c,iel_ho) = 0.0;
65  }
66  }
67  }
68  }
69  MFEM_SYNC_THREAD;
70 
71  // Loop over the sub-elements
72  MFEM_FOREACH_THREAD(ky,y,ORDER)
73  {
74  MFEM_FOREACH_THREAD(kx,x,ORDER)
75  {
76  // Compute geometric factors at quadrature points
77  double Q_[ngeom*nv];
78  double local_mat_[sz_local_mat];
79 
80  DeviceTensor<3> Q(Q_, ngeom, 2, 2);
81  DeviceTensor<2> local_mat(local_mat_, ne, ne);
82 
83  // local_mat is the local (dense) stiffness matrix
84  for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
85 
86  double vx[4], vy[4];
87  LORVertexCoordinates2D<ORDER>(X, iel_ho, kx, ky, vx, vy);
88 
89  for (int iqx=0; iqx<2; ++iqx)
90  {
91  for (int iqy=0; iqy<2; ++iqy)
92  {
93  const double x = iqx;
94  const double y = iqy;
95  const double w = 1.0/4.0;
96 
97  double J_[2*2];
98  DeviceTensor<2> J(J_, 2, 2);
99 
100  Jacobian2D(x, y, vx, vy, J);
101 
102  const double detJ = Det2D(J);
103  const double w_detJ = w/detJ;
104 
105  Q(0,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1)); // 1,1
106  Q(1,iqy,iqx) = -w_detJ * (J(0,1)*J(0,0) + J(1,1)*J(1,0)); // 1,2
107  Q(2,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0)); // 2,2
108  Q(3,iqy,iqx) = w_detJ;
109  }
110  }
111  for (int iqx=0; iqx<2; ++iqx)
112  {
113  for (int iqy=0; iqy<2; ++iqy)
114  {
115  const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
116  const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
117  // Loop over x,y components. c=0 => x, c=1 => y
118  for (int cj=0; cj<dim; ++cj)
119  {
120  for (int bj=0; bj<2; ++bj)
121  {
122  const double curl_j = ((cj == 0) ? 1 : -1)*((bj == 0) ? 1 : -1);
123  const double bxj = (cj == 0) ? ((bj == iqy) ? 1 : 0) : 0;
124  const double byj = (cj == 1) ? ((bj == iqx) ? 1 : 0) : 0;
125 
126  const double jj_loc = bj + 2*cj;
127 
128  for (int ci=0; ci<dim; ++ci)
129  {
130  for (int bi=0; bi<2; ++bi)
131  {
132  const double curl_i = ((ci == 0) ? 1 : -1)*((bi == 0) ? 1 : -1);
133  const double bxi = (ci == 0) ? ((bi == iqy) ? 1 : 0) : 0;
134  const double byi = (ci == 1) ? ((bi == iqx) ? 1 : 0) : 0;
135 
136  const double ii_loc = bi + 2*ci;
137 
138  // Only store the lower-triangular part of
139  // the matrix (by symmetry).
140  if (jj_loc > ii_loc) { continue; }
141 
142  double val = 0.0;
143  val += bxi*bxj*Q(0,iqy,iqx);
144  val += byi*bxj*Q(1,iqy,iqx);
145  val += bxi*byj*Q(1,iqy,iqx);
146  val += byi*byj*Q(2,iqy,iqx);
147  val *= mq;
148  val += dq*curl_i*curl_j*Q(3,iqy,iqx);
149 
150  local_mat(ii_loc, jj_loc) += val;
151  }
152  }
153  }
154  }
155  }
156  }
157  // Assemble the local matrix into the macro-element sparse matrix
158  // in a format similar to coordinate format. The (I,J) arrays
159  // are implicit (not stored explicitly).
160  for (int ii_loc=0; ii_loc<ne; ++ii_loc)
161  {
162  const int ci = ii_loc/2;
163  const int bi = ii_loc%2;
164  const int ix = (ci == 0) ? 0 : bi;
165  const int iy = (ci == 1) ? 0 : bi;
166 
167  int ii = (ci == 0) ? kx+ix + (ky+iy)*o : kx+ix + (ky+iy)*op1;
168 
169  for (int jj_loc=0; jj_loc<ne; ++jj_loc)
170  {
171  const int cj = jj_loc/2;
172  const int bj = jj_loc%2;
173 
174  const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + bj + (1-bi)*2);
175 
176  // Symmetry
177  const double val = (jj_loc <= ii_loc)
178  ? local_mat(ii_loc, jj_loc)
179  : local_mat(jj_loc, ii_loc);
180  AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
181  }
182  }
183  }
184  }
185  });
186 
187  sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
188  sparse_mapping = -1;
189  auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
190  for (int ci=0; ci<2; ++ci)
191  {
192  for (int i1=0; i1<o; ++i1)
193  {
194  for (int i2=0; i2<op1; ++i2)
195  {
196  const int ii_el = (ci == 0) ? i1 + i2*o : i2 + i1*op1 + o*op1;
197  for (int cj=0; cj<2; ++cj)
198  {
199  const int j1_begin = (ci == cj) ? i1 : ((i2 > 0) ? i2-1 : i2);
200  const int j1_end = (ci == cj) ? i1 : ((i2 < o) ? i2 : i2-1);
201  const int j2_begin = (ci == cj) ? ((i2 > 0) ? i2-1 : i2) : i1;
202  const int j2_end = (ci == cj) ? ((i2 < o) ? i2+1 : i2) : i1+1;
203 
204  for (int j1=j1_begin; j1<=j1_end; ++j1)
205  {
206  for (int j2=j2_begin; j2<=j2_end; ++j2)
207  {
208  const int jj_el = (cj == 0) ? j1 + j2*o : j2 + j1*op1 + o*op1;
209  int jj_off = (ci == cj) ? (j2-i2+1) : 3 + (j2-i1) + 2*(j1-i2+1);
210  map(jj_off, ii_el) = jj_el;
211  }
212  }
213  }
214  }
215  }
216  }
217 }
218 
219 template <int ORDER>
221 {
222  const int nel_ho = fes_ho.GetNE();
223 
224  static constexpr int nv = 8; // number of vertices in hexahedron
225  static constexpr int ne = 12; // number of edges in hexahedron
226  static constexpr int dim = 3;
227  static constexpr int ddm2 = (dim*(dim+1))/2;
228  static constexpr int ngeom = 2*ddm2; // number of geometric factors stored
229  static constexpr int o = ORDER;
230  static constexpr int op1 = ORDER + 1;
231  static constexpr int ndof_per_el = dim*o*op1*op1;
232  static constexpr int nnz_per_row = 33;
233  static constexpr int sz_local_mat = ne*ne;
234 
235  const bool const_mq = c1.Size() == 1;
236  const auto MQ = const_mq
237  ? Reshape(c1.Read(), 1, 1, 1, 1)
238  : Reshape(c1.Read(), op1, op1, op1, nel_ho);
239  const bool const_dq = c2.Size() == 1;
240  const auto DQ = const_dq
241  ? Reshape(c2.Read(), 1, 1, 1, 1)
242  : Reshape(c2.Read(), op1, op1, op1, nel_ho);
243 
244  sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
245  auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1*op1, dim, nel_ho);
246 
247  auto X = X_vert.Read();
248 
249  // Last thread dimension is lowered to avoid "too many resources" error
250  mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
251  [=] MFEM_HOST_DEVICE (int iel_ho)
252  {
253  MFEM_FOREACH_THREAD(iz,z,o)
254  {
255  MFEM_FOREACH_THREAD(iy,y,op1)
256  {
257  MFEM_FOREACH_THREAD(ix,x,op1)
258  {
259  for (int c=0; c<dim; ++c)
260  {
261  for (int j=0; j<nnz_per_row; ++j)
262  {
263  V(j,ix+iy*op1+iz*op1*op1,c,iel_ho) = 0.0;
264  }
265  }
266  }
267  }
268  }
269  MFEM_SYNC_THREAD;
270 
271  // Loop over the sub-elements
272  MFEM_FOREACH_THREAD(kz,z,ORDER)
273  {
274  MFEM_FOREACH_THREAD(ky,y,ORDER)
275  {
276  MFEM_FOREACH_THREAD(kx,x,ORDER)
277  {
278  // Geometric factors at quadrature points (element vertices)
279  double Q_[ngeom*nv];
280  DeviceTensor<4> Q(Q_, ngeom, 2, 2, 2);
281 
282  double local_mat_[sz_local_mat];
283  DeviceTensor<2> local_mat(local_mat_, ne, ne);
284  for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
285 
286  double vx[8], vy[8], vz[8];
287  LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
288 
289  for (int iqz=0; iqz<2; ++iqz)
290  {
291  for (int iqy=0; iqy<2; ++iqy)
292  {
293  for (int iqx=0; iqx<2; ++iqx)
294  {
295  const double x = iqx;
296  const double y = iqy;
297  const double z = iqz;
298  const double w = 1.0/8.0;
299 
300  double J_[3*3];
301  DeviceTensor<2> J(J_, 3, 3);
302 
303  Jacobian3D(x, y, z, vx, vy, vz, J);
304 
305  const double detJ = Det3D(J);
306  const double w_detJ = w/detJ;
307 
308  // adj(J)
309  double A_[3*3];
310  DeviceTensor<2> A(A_, 3, 3);
311  Adjugate3D(J, A);
312 
313  Q(0,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(0,0)+A(0,1)*A(0,1)+A(0,2)*A(0,2)); // 1,1
314  Q(1,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(1,0)+A(0,1)*A(1,1)+A(0,2)*A(1,2)); // 2,1
315  Q(2,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(2,0)+A(0,1)*A(2,1)+A(0,2)*A(2,2)); // 3,1
316  Q(3,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(1,0)+A(1,1)*A(1,1)+A(1,2)*A(1,2)); // 2,2
317  Q(4,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(2,0)+A(1,1)*A(2,1)+A(1,2)*A(2,2)); // 3,2
318  Q(5,iqz,iqy,iqx) = w_detJ*(A(2,0)*A(2,0)+A(2,1)*A(2,1)+A(2,2)*A(2,2)); // 3,3
319 
320  // w J^T J / det(J)
321  Q(6,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0)); // 1,1
322  Q(7,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,1)+J(1,0)*J(1,1)+J(2,0)*J(2,1)); // 2,1
323  Q(8,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,2)+J(1,0)*J(1,2)+J(2,0)*J(2,2)); // 3,1
324  Q(9,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1)); // 2,2
325  Q(10,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,2)+J(1,1)*J(1,2)+J(2,1)*J(2,2)); // 3,2
326  Q(11,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2)); // 3,3
327  }
328  }
329  }
330  for (int iqz=0; iqz<2; ++iqz)
331  {
332  for (int iqy=0; iqy<2; ++iqy)
333  {
334  for (int iqx=0; iqx<2; ++iqx)
335  {
336  const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
337  const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
338  // Loop over x,y,z components. 0 => x, 1 => y, 2 => z
339  for (int cj=0; cj<dim; ++cj)
340  {
341  const double jq1 = (cj == 0) ? iqy : ((cj == 1) ? iqz : iqx);
342  const double jq2 = (cj == 0) ? iqz : ((cj == 1) ? iqx : iqy);
343 
344  const int jd_0 = cj;
345  const int jd_1 = (cj + 1)%3;
346  const int jd_2 = (cj + 2)%3;
347 
348  for (int bj=0; bj<4; ++bj) // 4 edges in each dim
349  {
350  const int bj1 = bj%2;
351  const int bj2 = bj/2;
352 
353  double curl_j[3];
354  curl_j[jd_0] = 0.0;
355  curl_j[jd_1] = ((bj1 == 0) ? jq1 - 1 : -jq1)*((bj2 == 0) ? 1 : -1);
356  curl_j[jd_2] = ((bj2 == 0) ? 1 - jq2 : jq2)*((bj1 == 0) ? 1 : -1);
357 
358  double basis_j[3];
359  basis_j[jd_0] = ((bj1 == 0) ? 1 - jq1 : jq1)*((bj2 == 0) ? 1 - jq2 : jq2);
360  basis_j[jd_1] = 0.0;
361  basis_j[jd_2] = 0.0;
362 
363  const int jj_loc = bj + 4*cj;
364 
365  for (int ci=0; ci<dim; ++ci)
366  {
367  const double iq1 = (ci == 0) ? iqy : ((ci == 1) ? iqz : iqx);
368  const double iq2 = (ci == 0) ? iqz : ((ci == 1) ? iqx : iqy);
369 
370  const int id_0 = ci;
371  const int id_1 = (ci + 1)%3;
372  const int id_2 = (ci + 2)%3;
373 
374  for (int bi=0; bi<4; ++bi)
375  {
376  const int bi1 = bi%2;
377  const int bi2 = bi/2;
378 
379  double curl_i[3];
380  curl_i[id_0] = 0.0;
381  curl_i[id_1] = ((bi1 == 0) ? iq1 - 1 : -iq1)*((bi2 == 0) ? 1 : -1);
382  curl_i[id_2] = ((bi2 == 0) ? 1 - iq2 : iq2)*((bi1 == 0) ? 1 : -1);
383 
384  double basis_i[3];
385  basis_i[id_0] = ((bi1 == 0) ? 1 - iq1 : iq1)*((bi2 == 0) ? 1 - iq2 : iq2);
386  basis_i[id_1] = 0.0;
387  basis_i[id_2] = 0.0;
388 
389  const int ii_loc = bi + 4*ci;
390 
391  // Only store the lower-triangular part of
392  // the matrix (by symmetry).
393  if (jj_loc > ii_loc) { continue; }
394 
395  double curl_curl = 0.0;
396  curl_curl += Q(6,iqz,iqy,iqx)*curl_i[0]*curl_j[0];
397  curl_curl += Q(7,iqz,iqy,iqx)*(curl_i[0]*curl_j[1] + curl_i[1]*curl_j[0]);
398  curl_curl += Q(8,iqz,iqy,iqx)*(curl_i[0]*curl_j[2] + curl_i[2]*curl_j[0]);
399  curl_curl += Q(9,iqz,iqy,iqx)*curl_i[1]*curl_j[1];
400  curl_curl += Q(10,iqz,iqy,iqx)*(curl_i[1]*curl_j[2] + curl_i[2]*curl_j[1]);
401  curl_curl += Q(11,iqz,iqy,iqx)*curl_i[2]*curl_j[2];
402 
403  double basis_basis = 0.0;
404  basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
405  basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
406  basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
407  basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
408  basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
409  basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
410 
411  const double val = dq*curl_curl + mq*basis_basis;
412 
413  local_mat(ii_loc, jj_loc) += val;
414  }
415  }
416  }
417  }
418  }
419  }
420  }
421  // Assemble the local matrix into the macro-element sparse matrix
422  // The nonzeros of the macro-element sparse matrix are ordered as
423  // follows:
424  //
425  // The axes are ordered relative to the direction of the basis
426  // vector, e.g. for x-vectors, the axes are (x,y,z), for
427  // y-vectors the axes are (y,z,x), and for z-vectors the axes are
428  // (z,x,y).
429  //
430  // The nonzeros are then given in "rotated lexicographic"
431  // ordering, according to these axes.
432  for (int ii_loc=0; ii_loc<ne; ++ii_loc)
433  {
434  const int ci = ii_loc/4;
435  const int bi = ii_loc%4;
436 
437  const int id0 = ci;
438  const int id1 = (ci+1)%3;
439  const int id2 = (ci+2)%3;
440 
441  const int i0 = 0;
442  const int i1 = bi%2;
443  const int i2 = bi/2;
444 
445  int ii_lex[3];
446  ii_lex[id0] = i0;
447  ii_lex[id1] = i1;
448  ii_lex[id2] = i2;
449 
450  const int nx = (ci == 0) ? o : op1;
451  const int ny = (ci == 1) ? o : op1;
452 
453  const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
454 
455  for (int jj_loc=0; jj_loc<ne; ++jj_loc)
456  {
457  const int cj = jj_loc/4;
458  // add 3 to take modulus (rather than remainder) when
459  // (cj - ci) is negative
460  const int cj_rel = (3 + cj - ci)%3;
461 
462  const int bj = jj_loc%4;
463 
464  const int jd0 = cj_rel;
465  const int jd1 = (cj_rel+1)%3;
466  const int jd2 = (cj_rel+2)%3;
467 
468  int jj_rel[3];
469  jj_rel[jd0] = 0;
470  jj_rel[jd1] = bj%2;
471  jj_rel[jd2] = bj/2;
472 
473  const int d0 = jj_rel[0] - i0;
474  const int d1 = 1 + jj_rel[1] - i1;
475  const int d2 = 1 + jj_rel[2] - i2;
476  int jj_off;
477  if (cj_rel == 0) { jj_off = d1 + 3*d2; }
478  else if (cj_rel == 1) { jj_off = 9 + d0 + 2*d1 + 4*d2; }
479  else /* if (cj_rel == 2) */ { jj_off = 21 + d0 + 2*d1 + 6*d2; }
480 
481  // Symmetry
482  const double val = (jj_loc <= ii_loc)
483  ? local_mat(ii_loc, jj_loc)
484  : local_mat(jj_loc, ii_loc);
485  AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
486  }
487  }
488  }
489  }
490  }
491  });
492 
493  sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
494  sparse_mapping = -1;
495  auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
496  for (int ci=0; ci<dim; ++ci)
497  {
498  const int i_off = ci*o*op1*op1;
499  const int id0 = ci;
500  const int id1 = (ci+1)%3;
501  const int id2 = (ci+2)%3;
502 
503  const int nxi = (ci == 0) ? o : op1;
504  const int nyi = (ci == 1) ? o : op1;
505 
506  for (int i0=0; i0<o; ++i0)
507  {
508  for (int i1=0; i1<op1; ++i1)
509  {
510  for (int i2=0; i2<op1; ++i2)
511  {
512  int ii_lex[3];
513  ii_lex[id0] = i0;
514  ii_lex[id1] = i1;
515  ii_lex[id2] = i2;
516  const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
517 
518  for (int cj_rel=0; cj_rel<dim; ++cj_rel)
519  {
520  const int cj = (ci + cj_rel) % 3;
521  const int j_off = cj*o*op1*op1;
522 
523  const int nxj = (cj == 0) ? o : op1;
524  const int nyj = (cj == 1) ? o : op1;
525 
526  const int j0_begin = i0;
527  const int j0_end = (cj_rel == 0) ? i0 : i0 + 1;
528  const int j1_begin = (i1 > 0) ? i1-1 : i1;
529  const int j1_end = (cj_rel == 1)
530  ? ((i1 < o) ? i1 : i1-1)
531  : ((i1 < o) ? i1+1 : i1);
532  const int j2_begin = (i2 > 0) ? i2-1 : i2;
533  const int j2_end = (cj_rel == 2)
534  ? ((i2 < o) ? i2 : i2-1)
535  : ((i2 < o) ? i2+1 : i2);
536 
537  for (int j0=j0_begin; j0<=j0_end; ++j0)
538  {
539  const int d0 = j0 - i0;
540  for (int j1=j1_begin; j1<=j1_end; ++j1)
541  {
542  const int d1 = j1 - i1 + 1;
543  for (int j2=j2_begin; j2<=j2_end; ++j2)
544  {
545  const int d2 = j2 - i2 + 1;
546  int jj_lex[3];
547  jj_lex[id0] = j0;
548  jj_lex[id1] = j1;
549  jj_lex[id2] = j2;
550  const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
551  int jj_off;
552  if (cj_rel == 0) { jj_off = d1 + 3*d2; }
553  else if (cj_rel == 1) { jj_off = 9 + d0 + 2*d1 + 4*d2; }
554  else /* if (cj_rel == 2) */ { jj_off = 21 + d0 + 2*d1 + 6*d2; }
555  map(jj_off, ii_el) = jj_el;
556  }
557  }
558  }
559  }
560  }
561  }
562  }
563  }
564 }
565 
566 // Explicit template instantiations
567 template void BatchedLOR_ND::Assemble2D<1>();
568 template void BatchedLOR_ND::Assemble2D<2>();
569 template void BatchedLOR_ND::Assemble2D<3>();
570 template void BatchedLOR_ND::Assemble2D<4>();
571 template void BatchedLOR_ND::Assemble2D<5>();
572 template void BatchedLOR_ND::Assemble2D<6>();
573 template void BatchedLOR_ND::Assemble2D<7>();
574 template void BatchedLOR_ND::Assemble2D<8>();
575 
576 template void BatchedLOR_ND::Assemble3D<1>();
577 template void BatchedLOR_ND::Assemble3D<2>();
578 template void BatchedLOR_ND::Assemble3D<3>();
579 template void BatchedLOR_ND::Assemble3D<4>();
580 template void BatchedLOR_ND::Assemble3D<5>();
581 template void BatchedLOR_ND::Assemble3D<6>();
582 template void BatchedLOR_ND::Assemble3D<7>();
583 template void BatchedLOR_ND::Assemble3D<8>();
584 
586  FiniteElementSpace &fes_ho_,
587  Vector &X_vert_,
588  Vector &sparse_ij_,
589  Array<int> &sparse_mapping_)
590  : BatchedLORKernel(fes_ho_, X_vert_, sparse_ij_, sparse_mapping_)
591 {
592  ProjectLORCoefficient<VectorFEMassIntegrator>(a, c1);
593  ProjectLORCoefficient<CurlCurlIntegrator>(a, c2);
594 }
595 
596 } // namespace mfem
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Definition: array.hpp:327
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition: forall.hpp:763
Abstract base class for the batched LOR assembly kernels.
BatchedLOR_ND(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array< int > &sparse_mapping_)
Definition: lor_nd.cpp:585
void forall_2D(int N, int X, int Y, lambda &&body)
Definition: forall.hpp:751
Array< int > & sparse_mapping
Local element sparsity pattern.
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:517
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition: backends.hpp:84
int Size() const
Returns the size of the vector.
Definition: vector.hpp:197
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:453
CoefficientVector c2
Coefficient of second integrator.
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:461
Vector & sparse_ij
Local element sparsity matrix data.
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
Definition: lor_util.hpp:188
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
Definition: lor_util.hpp:183
MFEM_HOST_DEVICE void Jacobian3D(const double x, const double y, const double z, const double vx[8], const double vy[8], const double vz[8], DeviceMatrix &J)
Definition: lor_util.hpp:126
FiniteElementSpace & fes_ho
The associated high-order space.
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:81
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:736
MFEM_HOST_DEVICE void Jacobian2D(const double x, const double y, const double vx[4], const double vy[4], DeviceMatrix &J)
Definition: lor_util.hpp:115
Vector & X_vert
Mesh coordinate vector.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:219
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:687
double a
Definition: lissajous.cpp:41
A "square matrix" operator for the associated FE space and BLFIntegrators The sum of all the BLFInteg...
int dim
Definition: ex24.cpp:53
Vector data type.
Definition: vector.hpp:58
MFEM_HOST_DEVICE void Adjugate3D(const DeviceMatrix &J, DeviceMatrix &A)
Definition: lor_util.hpp:170
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
CoefficientVector c1
Coefficient of first integrator.