MFEM  v4.6.0
Finite element discretization library
lor_rt.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "lor_rt.hpp"
13 #include "lor_util.hpp"
14 #include "../../linalg/dtensor.hpp"
15 #include "../../general/forall.hpp"
16 
17 namespace mfem
18 {
19 
20 template <int ORDER>
22 {
23  const int nel_ho = fes_ho.GetNE();
24 
25  static constexpr int nv = 4;
26  static constexpr int ne = 4;
27  static constexpr int dim = 2;
28  static constexpr int ddm2 = (dim*(dim+1))/2;
29  static constexpr int ngeom = ddm2 + 1;
30  static constexpr int o = ORDER;
31  static constexpr int op1 = ORDER + 1;
32  static constexpr int ndof_per_el = dim*o*op1;
33  static constexpr int nnz_per_row = 7;
34  static constexpr int sz_local_mat = ne*ne;
35 
36  const bool const_mq = c1.Size() == 1;
37  const auto MQ = const_mq
38  ? Reshape(c1.Read(), 1, 1, 1)
39  : Reshape(c1.Read(), op1, op1, nel_ho);
40  const bool const_dq = c2.Size() == 1;
41  const auto DQ = const_dq
42  ? Reshape(c2.Read(), 1, 1, 1)
43  : Reshape(c2.Read(), op1, op1, nel_ho);
44 
45  sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
46  auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*op1, dim, nel_ho);
47 
48  auto X = X_vert.Read();
49 
50  mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho)
51  {
52  MFEM_FOREACH_THREAD(iy,y,o)
53  {
54  MFEM_FOREACH_THREAD(ix,x,op1)
55  {
56  for (int c=0; c<2; ++c)
57  {
58  for (int j=0; j<nnz_per_row; ++j)
59  {
60  V(j,ix+iy*op1,c,iel_ho) = 0.0;
61  }
62  }
63  }
64  }
65  MFEM_SYNC_THREAD;
66 
67  // Loop over the sub-elements
68  MFEM_FOREACH_THREAD(ky,y,ORDER)
69  {
70  MFEM_FOREACH_THREAD(kx,x,ORDER)
71  {
72  // Compute geometric factors at quadrature points
73  double Q_[ngeom*nv];
74  double local_mat_[sz_local_mat];
75 
76  DeviceTensor<3> Q(Q_, ngeom, 2, 2);
77  DeviceTensor<2> local_mat(local_mat_, ne, ne);
78 
79  // local_mat is the local (dense) stiffness matrix
80  for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
81 
82  double vx[4], vy[4];
83  LORVertexCoordinates2D<ORDER>(X, iel_ho, kx, ky, vx, vy);
84 
85  for (int iqx=0; iqx<2; ++iqx)
86  {
87  for (int iqy=0; iqy<2; ++iqy)
88  {
89  const double x = iqx;
90  const double y = iqy;
91  const double w = 1.0/4.0;
92 
93  double J_[2*2];
94  DeviceTensor<2> J(J_, 2, 2);
95 
96  Jacobian2D(x, y, vx, vy, J);
97 
98  const double detJ = Det2D(J);
99  const double w_detJ = w/detJ;
100 
101  Q(0,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0)); // 1,1
102  Q(1,iqy,iqx) = w_detJ * (J(0,0)*J(0,1) + J(1,0)*J(1,1)); // 1,2
103  Q(2,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1)); // 2,2
104  Q(3,iqy,iqx) = w_detJ;
105  }
106  }
107  for (int iqx=0; iqx<2; ++iqx)
108  {
109  for (int iqy=0; iqy<2; ++iqy)
110  {
111  const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
112  const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
113  // Loop over x,y components. c=0 => x, c=1 => y
114  for (int cj=0; cj<dim; ++cj)
115  {
116  for (int bj=0; bj<2; ++bj)
117  {
118  const double bxj = (cj == 0 && bj == iqx) ? 1 : 0;
119  const double byj = (cj == 1 && bj == iqy) ? 1 : 0;
120  const double div_j = (bj == 0) ? -1 : 1;
121 
122  const double jj_loc = bj + 2*cj;
123 
124  for (int ci=0; ci<dim; ++ci)
125  {
126  for (int bi=0; bi<2; ++bi)
127  {
128  const double bxi = (ci == 0 && bi == iqx) ? 1 : 0;
129  const double byi = (ci == 1 && bi == iqy) ? 1 : 0;
130  const double div_i = (bi == 0) ? -1 : 1;
131 
132  const double ii_loc = bi + 2*ci;
133 
134  // Only store the lower-triangular part of
135  // the matrix (by symmetry).
136  if (jj_loc > ii_loc) { continue; }
137 
138  double val = 0.0;
139  val += bxi*bxj*Q(0,iqy,iqx);
140  val += byi*bxj*Q(1,iqy,iqx);
141  val += bxi*byj*Q(1,iqy,iqx);
142  val += byi*byj*Q(2,iqy,iqx);
143  val *= mq;
144  val += dq*div_j*div_i*Q(3,iqy,iqx);
145 
146  local_mat(ii_loc, jj_loc) += val;
147  }
148  }
149  }
150  }
151  }
152  }
153  // Assemble the local matrix into the macro-element sparse matrix
154  // in a format similar to coordinate format. The (I,J) arrays
155  // are implicit (not stored explicitly).
156  for (int ii_loc=0; ii_loc<ne; ++ii_loc)
157  {
158  const int ci = ii_loc/2;
159  const int bi = ii_loc%2;
160  const int ix = (ci == 0) ? bi : 0;
161  const int iy = (ci == 1) ? bi : 0;
162 
163  int ii = kx+ix + (ky+iy)*((ci == 0) ? op1 : o);
164 
165  for (int jj_loc=0; jj_loc<ne; ++jj_loc)
166  {
167  const int cj = jj_loc/2;
168  const int bj = jj_loc%2;
169 
170  const int jj_off = (ci == cj) ? (bj - bi + 1) : (3 + 1-bi + 2*bj);
171 
172  // Symmetry
173  const double val = (jj_loc <= ii_loc)
174  ? local_mat(ii_loc, jj_loc)
175  : local_mat(jj_loc, ii_loc);
176  AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
177  }
178  }
179  }
180  }
181  });
182 
183  sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
184  sparse_mapping = -1;
185  auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
186  for (int ci=0; ci<2; ++ci)
187  {
188  const int i_off = (ci == 0) ? 0 : o*op1;
189  const int id0 = ci;
190  const int id1 = (ci+1)%2;
191 
192  const int nxi = (ci == 0) ? op1 : o;
193 
194  for (int i0=0; i0<op1; ++i0)
195  {
196  for (int i1=0; i1<o; ++i1)
197  {
198  int ii_lex[2];
199  ii_lex[id0] = i0;
200  ii_lex[id1] = i1;
201  const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi;
202 
203  for (int cj_rel=0; cj_rel<2; ++cj_rel)
204  {
205  const int cj = (ci + cj_rel) % 2;
206  const int j_off = (cj == 0) ? 0 : o*op1;
207  const int nxj = (cj == 0) ? op1 : o;
208 
209  const int j0_begin = (i0 > 0) ? i0-1 : i0;
210  const int j0_end = (cj_rel == 0)
211  ? ((i0 < o) ? i0+1 : i0)
212  : ((i0 < o) ? i0 : i0-1);
213  const int j1_begin = i1;
214  const int j1_end = (cj_rel == 0) ? i1 : i1+1;
215 
216  for (int j0=j0_begin; j0<=j0_end; ++j0)
217  {
218  const int d0 = 1 + j0 - i0;
219  for (int j1=j1_begin; j1<=j1_end; ++j1)
220  {
221  const int d1 = j1 - i1;
222  int jj_lex[2];
223  jj_lex[id0] = j0;
224  jj_lex[id1] = j1;
225  const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj;
226  const int jj_off = (cj_rel == 0) ? d0 : 3 + d0 + 2*d1;
227  map(jj_off, ii_el) = jj_el;
228  }
229  }
230  }
231  }
232  }
233  }
234 }
235 
236 template <int ORDER>
238 {
239  const int nel_ho = fes_ho.GetNE();
240 
241  static constexpr int nv = 8; // number of vertices in hexahedron
242  static constexpr int nf = 6; // number of faces in hexahedron
243  static constexpr int dim = 3;
244  static constexpr int ddm2 = (dim*(dim+1))/2;
245  static constexpr int ngeom = ddm2 + 1; // number of geometric factors stored
246  static constexpr int o = ORDER;
247  static constexpr int op1 = ORDER + 1;
248  static constexpr int ndof_per_el = dim*o*o*op1;
249  static constexpr int nnz_per_row = 11;
250  static constexpr int sz_local_mat = nf*nf;
251 
252  const bool const_mq = c1.Size() == 1;
253  const auto MQ = const_mq
254  ? Reshape(c1.Read(), 1, 1, 1, 1)
255  : Reshape(c1.Read(), op1, op1, op1, nel_ho);
256  const bool const_dq = c2.Size() == 1;
257  const auto DQ = const_dq
258  ? Reshape(c2.Read(), 1, 1, 1, 1)
259  : Reshape(c2.Read(), op1, op1, op1, nel_ho);
260 
261  sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
262  auto V = Reshape(sparse_ij.Write(), nnz_per_row, o*o*op1, dim, nel_ho);
263 
264  auto X = X_vert.Read();
265 
266  // Last thread dimension is lowered to avoid "too many resources" error
267  mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
268  [=] MFEM_HOST_DEVICE (int iel_ho)
269  {
270  MFEM_FOREACH_THREAD(iz,z,o)
271  {
272  MFEM_FOREACH_THREAD(iy,y,o)
273  {
274  MFEM_FOREACH_THREAD(ix,x,op1)
275  {
276  for (int c=0; c<dim; ++c)
277  {
278  for (int j=0; j<nnz_per_row; ++j)
279  {
280  V(j,ix+iy*op1+iz*o*op1,c,iel_ho) = 0.0;
281  }
282  }
283  }
284  }
285  }
286  MFEM_SYNC_THREAD;
287 
288  // Loop over the sub-elements
289  MFEM_FOREACH_THREAD(kz,z,ORDER)
290  {
291  MFEM_FOREACH_THREAD(ky,y,ORDER)
292  {
293  MFEM_FOREACH_THREAD(kx,x,ORDER)
294  {
295  // Geometric factors at quadrature points (element vertices)
296  double Q_[ngeom*nv];
297  DeviceTensor<4> Q(Q_, ngeom, 2, 2, 2);
298 
299  double local_mat_[sz_local_mat];
300  DeviceTensor<2> local_mat(local_mat_, nf, nf);
301  for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
302 
303  double vx[8], vy[8], vz[8];
304  LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
305 
306  for (int iqz=0; iqz<2; ++iqz)
307  {
308  for (int iqy=0; iqy<2; ++iqy)
309  {
310  for (int iqx=0; iqx<2; ++iqx)
311  {
312  const double x = iqx;
313  const double y = iqy;
314  const double z = iqz;
315  const double w = 1.0/8.0;
316 
317  double J_[3*3];
318  DeviceTensor<2> J(J_, 3, 3);
319 
320  Jacobian3D(x, y, z, vx, vy, vz, J);
321 
322  const double detJ = Det3D(J);
323  const double w_detJ = w/detJ;
324 
325  Q(0,iqz,iqy,iqx) = w_detJ*(J(0,0)*J(0,0)+J(1,0)*J(1,0)+J(2,0)*J(2,0)); // 1,1
326  Q(1,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,0)+J(1,1)*J(1,0)+J(2,1)*J(2,0)); // 2,1
327  Q(2,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,0)+J(1,2)*J(1,0)+J(2,2)*J(2,0)); // 3,1
328  Q(3,iqz,iqy,iqx) = w_detJ*(J(0,1)*J(0,1)+J(1,1)*J(1,1)+J(2,1)*J(2,1)); // 2,2
329  Q(4,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,1)+J(1,2)*J(1,1)+J(2,2)*J(2,1)); // 3,2
330  Q(5,iqz,iqy,iqx) = w_detJ*(J(0,2)*J(0,2)+J(1,2)*J(1,2)+J(2,2)*J(2,2)); // 3,3
331  Q(6,iqz,iqy,iqx) = w_detJ;
332  }
333  }
334  }
335  for (int iqz=0; iqz<2; ++iqz)
336  {
337  for (int iqy=0; iqy<2; ++iqy)
338  {
339  for (int iqx=0; iqx<2; ++iqx)
340  {
341  const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
342  const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
343  // Loop over x,y,z components. 0 => x, 1 => y, 2 => z
344  for (int cj=0; cj<dim; ++cj)
345  {
346  const double jq0 = (cj == 0) ? iqx : ((cj == 1) ? iqy : iqz);
347 
348  const int jd_0 = cj;
349  const int jd_1 = (cj + 1)%3;
350  const int jd_2 = (cj + 2)%3;
351 
352  for (int bj=0; bj<2; ++bj) // 2 faces in each dim
353  {
354  const double div_j = (bj == 0) ? -1 : 1;
355 
356  double basis_j[3];
357  basis_j[jd_0] = (bj == jq0) ? 1 : 0;
358  basis_j[jd_1] = 0.0;
359  basis_j[jd_2] = 0.0;
360 
361  const int jj_loc = bj + 2*cj;
362 
363  for (int ci=0; ci<dim; ++ci)
364  {
365  const double iq0 = (ci == 0) ? iqx : ((ci == 1) ? iqy : iqz);
366 
367  const int id_0 = ci;
368  const int id_1 = (ci + 1)%3;
369  const int id_2 = (ci + 2)%3;
370 
371  for (int bi=0; bi<2; ++bi)
372  {
373  const double div_i = (bi == 0) ? -1 : 1;
374 
375  double basis_i[3];
376  basis_i[id_0] = (bi == iq0) ? 1 : 0;
377  basis_i[id_1] = 0.0;
378  basis_i[id_2] = 0.0;
379 
380  const int ii_loc = bi + 2*ci;
381 
382  // Only store the lower-triangular part of
383  // the matrix (by symmetry).
384  if (jj_loc > ii_loc) { continue; }
385 
386  const double div_div = Q(6,iqz,iqy,iqx)*div_i*div_j;
387 
388  double basis_basis = 0.0;
389  basis_basis += Q(0,iqz,iqy,iqx)*basis_i[0]*basis_j[0];
390  basis_basis += Q(1,iqz,iqy,iqx)*(basis_i[0]*basis_j[1] + basis_i[1]*basis_j[0]);
391  basis_basis += Q(2,iqz,iqy,iqx)*(basis_i[0]*basis_j[2] + basis_i[2]*basis_j[0]);
392  basis_basis += Q(3,iqz,iqy,iqx)*basis_i[1]*basis_j[1];
393  basis_basis += Q(4,iqz,iqy,iqx)*(basis_i[1]*basis_j[2] + basis_i[2]*basis_j[1]);
394  basis_basis += Q(5,iqz,iqy,iqx)*basis_i[2]*basis_j[2];
395 
396  const double val = dq*div_div + mq*basis_basis;
397  // const double val = 1.0;
398 
399  local_mat(ii_loc, jj_loc) += val;
400  }
401  }
402  }
403  }
404  }
405  }
406  }
407  // Assemble the local matrix into the macro-element sparse matrix
408  // The nonzeros of the macro-element sparse matrix are ordered as
409  // follows:
410  //
411  // The axes are ordered relative to the direction of the basis
412  // vector, e.g. for x-vectors, the axes are (x,y,z), for
413  // y-vectors the axes are (y,z,x), and for z-vectors the axes are
414  // (z,x,y).
415  //
416  // The nonzeros are then given in "rotated lexicographic"
417  // ordering, according to these axes.
418  for (int ii_loc=0; ii_loc<nf; ++ii_loc)
419  {
420  const int ci = ii_loc/2;
421  const int bi = ii_loc%2;
422 
423  const int id0 = ci;
424  const int id1 = (ci+1)%3;
425  const int id2 = (ci+2)%3;
426 
427  const int i0 = bi;
428  const int i1 = 0;
429  const int i2 = 0;
430 
431  int ii_lex[3];
432  ii_lex[id0] = i0;
433  ii_lex[id1] = i1;
434  ii_lex[id2] = i2;
435 
436  const int nx = (ci == 0) ? op1 : o;
437  const int ny = (ci == 1) ? op1 : o;
438 
439  const int ii = kx+ii_lex[0] + (ky+ii_lex[1])*nx + (kz+ii_lex[2])*nx*ny;
440 
441  for (int jj_loc=0; jj_loc<nf; ++jj_loc)
442  {
443  const int cj = jj_loc/2;
444  // add 3 to take modulus (rather than remainder) when
445  // (cj - ci) is negative
446  const int cj_rel = (3 + cj - ci)%3;
447 
448  const int bj = jj_loc%2;
449 
450  const int jd0 = cj_rel;
451  const int jd1 = (cj_rel+1)%3;
452  const int jd2 = (cj_rel+2)%3;
453 
454  int jj_rel[3];
455  jj_rel[jd0] = bj;
456  jj_rel[jd1] = 0;
457  jj_rel[jd2] = 0;
458 
459  const int d0 = jj_rel[0] - i0 + 1;
460  const int d1 = jj_rel[1] - i1;
461  const int d2 = jj_rel[2] - i2;
462  int jj_off;
463  if (cj_rel == 0) { jj_off = d0; }
464  else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
465  else /* if (cj_rel == 2) */ { jj_off = 7 + d0 + 2*d2; }
466 
467  // Symmetry
468  const double val = (jj_loc <= ii_loc)
469  ? local_mat(ii_loc, jj_loc)
470  : local_mat(jj_loc, ii_loc);
471  AtomicAdd(V(jj_off, ii, ci, iel_ho), val);
472  }
473  }
474  }
475  }
476  }
477  });
478 
479  sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
480  sparse_mapping = -1;
481  auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
482  for (int ci=0; ci<dim; ++ci)
483  {
484  const int i_off = ci*o*o*op1;
485  const int id0 = ci;
486  const int id1 = (ci+1)%3;
487  const int id2 = (ci+2)%3;
488 
489  const int nxi = (ci == 0) ? op1 : o;
490  const int nyi = (ci == 1) ? op1 : o;
491 
492  for (int i0=0; i0<op1; ++i0)
493  {
494  for (int i1=0; i1<o; ++i1)
495  {
496  for (int i2=0; i2<o; ++i2)
497  {
498  int ii_lex[3];
499  ii_lex[id0] = i0;
500  ii_lex[id1] = i1;
501  ii_lex[id2] = i2;
502  const int ii_el = i_off + ii_lex[0] + ii_lex[1]*nxi + ii_lex[2]*nxi*nyi;
503 
504  for (int cj_rel=0; cj_rel<dim; ++cj_rel)
505  {
506  const int cj = (ci + cj_rel) % 3;
507  const int j_off = cj*o*o*op1;
508 
509  const int nxj = (cj == 0) ? op1 : o;
510  const int nyj = (cj == 1) ? op1 : o;
511 
512  const int j0_begin = (i0 > 0) ? i0-1 : i0;
513  const int j0_end = (cj_rel == 0)
514  ? ((i0 < o) ? i0+1 : i0)
515  : ((i0 < o) ? i0 : i0-1);
516  const int j1_begin = i1;
517  const int j1_end = (cj_rel == 1) ? i1+1 : i1;
518  const int j2_begin = i2;
519  const int j2_end = (cj_rel == 2) ? i2+1 : i2;
520 
521  for (int j0=j0_begin; j0<=j0_end; ++j0)
522  {
523  const int d0 = 1 + j0 - i0;
524  for (int j1=j1_begin; j1<=j1_end; ++j1)
525  {
526  const int d1 = j1 - i1;
527  for (int j2=j2_begin; j2<=j2_end; ++j2)
528  {
529  const int d2 = j2 - i2;
530  int jj_lex[3];
531  jj_lex[id0] = j0;
532  jj_lex[id1] = j1;
533  jj_lex[id2] = j2;
534  const int jj_el = j_off + jj_lex[0] + jj_lex[1]*nxj + jj_lex[2]*nxj*nyj;
535  int jj_off;
536  if (cj_rel == 0) { jj_off = d0; }
537  else if (cj_rel == 1) { jj_off = 3 + d0 + 2*d1; }
538  else /* if (cj_rel == 2) */ { jj_off = 7 + d0 + 2*d2; }
539  map(jj_off, ii_el) = jj_el;
540  }
541  }
542  }
543  }
544  }
545  }
546  }
547  }
548 }
549 
550 // Explicit template instantiations
551 template void BatchedLOR_RT::Assemble2D<1>();
552 template void BatchedLOR_RT::Assemble2D<2>();
553 template void BatchedLOR_RT::Assemble2D<3>();
554 template void BatchedLOR_RT::Assemble2D<4>();
555 template void BatchedLOR_RT::Assemble2D<5>();
556 template void BatchedLOR_RT::Assemble2D<6>();
557 template void BatchedLOR_RT::Assemble2D<7>();
558 template void BatchedLOR_RT::Assemble2D<8>();
559 
560 template void BatchedLOR_RT::Assemble3D<1>();
561 template void BatchedLOR_RT::Assemble3D<2>();
562 template void BatchedLOR_RT::Assemble3D<3>();
563 template void BatchedLOR_RT::Assemble3D<4>();
564 template void BatchedLOR_RT::Assemble3D<5>();
565 template void BatchedLOR_RT::Assemble3D<6>();
566 template void BatchedLOR_RT::Assemble3D<7>();
567 template void BatchedLOR_RT::Assemble3D<8>();
568 
570  FiniteElementSpace &fes_ho_,
571  Vector &X_vert_,
572  Vector &sparse_ij_,
573  Array<int> &sparse_mapping_)
574  : BatchedLORKernel(fes_ho_, X_vert_, sparse_ij_, sparse_mapping_)
575 {
576  ProjectLORCoefficient<VectorFEMassIntegrator>(a, c1);
577  ProjectLORCoefficient<DivDivIntegrator>(a, c2);
578 }
579 
580 } // namespace mfem
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Definition: array.hpp:327
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition: forall.hpp:763
Abstract base class for the batched LOR assembly kernels.
void forall_2D(int N, int X, int Y, lambda &&body)
Definition: forall.hpp:751
Array< int > & sparse_mapping
Local element sparsity pattern.
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:517
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition: backends.hpp:84
int Size() const
Returns the size of the vector.
Definition: vector.hpp:197
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:453
CoefficientVector c2
Coefficient of second integrator.
BatchedLOR_RT(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array< int > &sparse_mapping_)
Definition: lor_rt.cpp:569
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:461
Vector & sparse_ij
Local element sparsity matrix data.
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
Definition: lor_util.hpp:188
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
Definition: lor_util.hpp:183
MFEM_HOST_DEVICE void Jacobian3D(const double x, const double y, const double z, const double vx[8], const double vy[8], const double vz[8], DeviceMatrix &J)
Definition: lor_util.hpp:126
FiniteElementSpace & fes_ho
The associated high-order space.
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:81
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:736
MFEM_HOST_DEVICE void Jacobian2D(const double x, const double y, const double vx[4], const double vy[4], DeviceMatrix &J)
Definition: lor_util.hpp:115
Vector & X_vert
Mesh coordinate vector.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:219
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:687
double a
Definition: lissajous.cpp:41
A "square matrix" operator for the associated FE space and BLFIntegrators The sum of all the BLFInteg...
int dim
Definition: ex24.cpp:53
Vector data type.
Definition: vector.hpp:58
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
CoefficientVector c1
Coefficient of first integrator.