MFEM  v4.6.0
Finite element discretization library
lor_h1.cpp
Go to the documentation of this file.
1 // Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced
2 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3 // LICENSE and NOTICE for details. LLNL-CODE-806117.
4 //
5 // This file is part of the MFEM library. For more information and source code
6 // availability visit https://mfem.org.
7 //
8 // MFEM is free software; you can redistribute it and/or modify it under the
9 // terms of the BSD-3 license. We welcome feedback and contributions, see file
10 // CONTRIBUTING.md for details.
11 
12 #include "lor_h1.hpp"
13 #include "lor_util.hpp"
14 #include "../../linalg/dtensor.hpp"
15 #include "../../general/forall.hpp"
16 
17 namespace mfem
18 {
19 
20 template <int ORDER>
22 {
23  const int nel_ho = fes_ho.GetNE();
24 
25  static constexpr int nv = 4;
26  static constexpr int dim = 2;
27  static constexpr int ddm2 = (dim*(dim+1))/2;
28  static constexpr int nd1d = ORDER + 1;
29  static constexpr int ndof_per_el = nd1d*nd1d;
30  static constexpr int nnz_per_row = 9;
31  static constexpr int sz_local_mat = nv*nv;
32 
33  const bool const_mq = c1.Size() == 1;
34  const auto MQ = const_mq
35  ? Reshape(c1.Read(), 1, 1, 1)
36  : Reshape(c1.Read(), nd1d, nd1d, nel_ho);
37  const bool const_dq = c2.Size() == 1;
38  const auto DQ = const_dq
39  ? Reshape(c2.Read(), 1, 1, 1)
40  : Reshape(c2.Read(), nd1d, nd1d, nel_ho);
41 
42  sparse_ij.SetSize(nnz_per_row*ndof_per_el*nel_ho);
43  auto V = Reshape(sparse_ij.Write(), nnz_per_row, nd1d, nd1d, nel_ho);
44 
45  auto X = X_vert.Read();
46 
47  mfem::forall_2D(nel_ho, ORDER, ORDER, [=] MFEM_HOST_DEVICE (int iel_ho)
48  {
49  // Assemble a sparse matrix over the macro-element by looping over each
50  // subelement.
51  // V(j,ix,iy) stores the jth nonzero in the row of the sparse matrix
52  // corresponding to local DOF (ix, iy).
53  MFEM_FOREACH_THREAD(iy,y,nd1d)
54  {
55  MFEM_FOREACH_THREAD(ix,x,nd1d)
56  {
57  for (int j=0; j<nnz_per_row; ++j)
58  {
59  V(j,ix,iy,iel_ho) = 0.0;
60  }
61  }
62  }
63  MFEM_SYNC_THREAD;
64 
65  // Compute geometric factors at quadrature points
66  MFEM_FOREACH_THREAD(ky,y,ORDER)
67  {
68  MFEM_FOREACH_THREAD(kx,x,ORDER)
69  {
70  double Q_[(ddm2 + 1)*nv];
71  double local_mat_[sz_local_mat];
72  DeviceTensor<3> Q(Q_, ddm2 + 1, 2, 2);
73  DeviceTensor<2> local_mat(local_mat_, nv, nv);
74 
75  for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
76 
77  double vx[4], vy[4];
78  LORVertexCoordinates2D<ORDER>(X, iel_ho, kx, ky, vx, vy);
79 
80  for (int iqy=0; iqy<2; ++iqy)
81  {
82  for (int iqx=0; iqx<2; ++iqx)
83  {
84  const double x = iqx;
85  const double y = iqy;
86  const double w = 1.0/4.0;
87 
88  double J_[2*2];
89  DeviceTensor<2> J(J_, 2, 2);
90 
91  Jacobian2D(x, y, vx, vy, J);
92 
93  const double detJ = Det2D(J);
94  const double w_detJ = w/detJ;
95 
96  Q(0,iqy,iqx) = w_detJ * (J(0,1)*J(0,1) + J(1,1)*J(1,1)); // 1,1
97  Q(1,iqy,iqx) = -w_detJ * (J(0,1)*J(0,0) + J(1,1)*J(1,0)); // 1,2
98  Q(2,iqy,iqx) = w_detJ * (J(0,0)*J(0,0) + J(1,0)*J(1,0)); // 2,2
99  Q(3,iqy,iqx) = w*detJ;
100  }
101  }
102  for (int iqx=0; iqx<2; ++iqx)
103  {
104  for (int iqy=0; iqy<2; ++iqy)
105  {
106  const double mq = const_mq ? MQ(0,0,0) : MQ(kx+iqx, ky+iqy, iel_ho);
107  const double dq = const_dq ? DQ(0,0,0) : DQ(kx+iqx, ky+iqy, iel_ho);
108  for (int jy=0; jy<2; ++jy)
109  {
110  const double bjy = (jy == iqy) ? 1.0 : 0.0;
111  const double gjy = (jy == 0) ? -1.0 : 1.0;
112  for (int jx=0; jx<2; ++jx)
113  {
114  const double bjx = (jx == iqx) ? 1.0 : 0.0;
115  const double gjx = (jx == 0) ? -1.0 : 1.0;
116 
117  const double djx = gjx*bjy;
118  const double djy = bjx*gjy;
119 
120  int jj_loc = jx + 2*jy;
121 
122  for (int iy=0; iy<2; ++iy)
123  {
124  const double biy = (iy == iqy) ? 1.0 : 0.0;
125  const double giy = (iy == 0) ? -1.0 : 1.0;
126  for (int ix=0; ix<2; ++ix)
127  {
128  const double bix = (ix == iqx) ? 1.0 : 0.0;
129  const double gix = (ix == 0) ? -1.0 : 1.0;
130 
131  const double dix = gix*biy;
132  const double diy = bix*giy;
133 
134  int ii_loc = ix + 2*iy;
135 
136  // Only store the lower-triangular part of
137  // the matrix (by symmetry).
138  if (jj_loc > ii_loc) { continue; }
139 
140  double val = 0.0;
141  val += dix*djx*Q(0,iqy,iqx);
142  val += (dix*djy + diy*djx)*Q(1,iqy,iqx);
143  val += diy*djy*Q(2,iqy,iqx);
144  val *= dq;
145 
146  val += mq*bix*biy*bjx*bjy*Q(3,iqy,iqx);
147 
148  local_mat(ii_loc, jj_loc) += val;
149  }
150  }
151  }
152  }
153  }
154  }
155  // Assemble the local matrix into the macro-element sparse matrix
156  // in a format similar to coordinate format. The (I,J) arrays
157  // are implicit (not stored explicitly).
158  for (int ii_loc=0; ii_loc<nv; ++ii_loc)
159  {
160  const int ix = ii_loc%2;
161  const int iy = ii_loc/2;
162  for (int jj_loc=0; jj_loc<nv; ++jj_loc)
163  {
164  const int jx = jj_loc%2;
165  const int jy = jj_loc/2;
166  const int jj_off = (jx-ix+1) + 3*(jy-iy+1);
167 
168  // Symmetry
169  if (jj_loc <= ii_loc)
170  {
171  AtomicAdd(V(jj_off, ix+kx, iy+ky, iel_ho), local_mat(ii_loc, jj_loc));
172  }
173  else
174  {
175  AtomicAdd(V(jj_off, ix+kx, iy+ky, iel_ho), local_mat(jj_loc, ii_loc));
176  }
177  }
178  }
179  }
180  }
181  });
182 
183  sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
184  sparse_mapping = -1;
185  auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
186  for (int iy=0; iy<nd1d; ++iy)
187  {
188  const int jy_begin = (iy > 0) ? iy - 1 : 0;
189  const int jy_end = (iy < ORDER) ? iy + 1 : ORDER;
190  for (int ix=0; ix<nd1d; ++ix)
191  {
192  const int jx_begin = (ix > 0) ? ix - 1 : 0;
193  const int jx_end = (ix < ORDER) ? ix + 1 : ORDER;
194  const int ii_el = ix + nd1d*iy;
195  for (int jy=jy_begin; jy<=jy_end; ++jy)
196  {
197  for (int jx=jx_begin; jx<=jx_end; ++jx)
198  {
199  const int jj_off = (jx-ix+1) + 3*(jy-iy+1);
200  const int jj_el = jx + nd1d*jy;
201  map(jj_off, ii_el) = jj_el;
202  }
203  }
204  }
205  }
206 }
207 
208 template <int ORDER>
210 {
211  const int nel_ho = fes_ho.GetNE();
212  static constexpr int nv = 8;
213  static constexpr int dim = 3;
214  static constexpr int ddm2 = (dim*(dim+1))/2;
215  static constexpr int nd1d = ORDER + 1;
216  static constexpr int ndof_per_el = nd1d*nd1d*nd1d;
217  static constexpr int nnz_per_row = 27;
218  static constexpr int sz_grad_A = 3*3*2*2*2*2;
219  static constexpr int sz_grad_B = sz_grad_A*2;
220  static constexpr int sz_mass_A = 2*2*2*2;
221  static constexpr int sz_mass_B = sz_mass_A*2;
222  static constexpr int sz_local_mat = nv*nv;
223 
224  const bool const_mq = c1.Size() == 1;
225  const auto MQ = const_mq
226  ? Reshape(c1.Read(), 1, 1, 1, 1)
227  : Reshape(c1.Read(), nd1d, nd1d, nd1d, nel_ho);
228  const bool const_dq = c2.Size() == 1;
229  const auto DQ = const_dq
230  ? Reshape(c2.Read(), 1, 1, 1, 1)
231  : Reshape(c2.Read(), nd1d, nd1d, nd1d, nel_ho);
232 
233  sparse_ij.SetSize(nel_ho*ndof_per_el*nnz_per_row);
234  auto V = Reshape(sparse_ij.Write(), nnz_per_row, nd1d, nd1d, nd1d, nel_ho);
235 
236  auto X = X_vert.Read();
237 
238  // Last thread dimension is lowered to avoid "too many resources" error
239  mfem::forall_3D(nel_ho, ORDER, ORDER, (ORDER>6)?4:ORDER,
240  [=] MFEM_HOST_DEVICE (int iel_ho)
241  {
242  // Assemble a sparse matrix over the macro-element by looping over each
243  // subelement.
244  // V(j,i) stores the jth nonzero in the ith row of the sparse matrix.
245  MFEM_FOREACH_THREAD(iz,z,nd1d)
246  {
247  MFEM_FOREACH_THREAD(iy,y,nd1d)
248  {
249  MFEM_FOREACH_THREAD(ix,x,nd1d)
250  {
251  MFEM_UNROLL(nnz_per_row)
252  for (int j=0; j<nnz_per_row; ++j)
253  {
254  V(j,ix,iy,iz,iel_ho) = 0.0;
255  }
256  }
257  }
258  }
259  MFEM_SYNC_THREAD;
260 
261  // Compute geometric factors at quadrature points
262  MFEM_FOREACH_THREAD(kz,z,ORDER)
263  {
264  MFEM_FOREACH_THREAD(ky,y,ORDER)
265  {
266  MFEM_FOREACH_THREAD(kx,x,ORDER)
267  {
268  double Q_[(ddm2 + 1)*nv];
269  double grad_A_[sz_grad_A];
270  double grad_B_[sz_grad_B];
271  double mass_A_[sz_mass_A];
272  double mass_B_[sz_mass_B];
273  double local_mat_[sz_local_mat];
274 
275  DeviceTensor<4> Q(Q_, ddm2 + 1, 2, 2, 2);
276  DeviceTensor<2> local_mat(local_mat_, nv, nv);
277  DeviceTensor<6> grad_A(grad_A_, 3, 3, 2, 2, 2, 2);
278  DeviceTensor<7> grad_B(grad_B_, 3, 3, 2, 2, 2, 2, 2);
279  DeviceTensor<4> mass_A(mass_A_, 2, 2, 2, 2);
280  DeviceTensor<5> mass_B(mass_B_, 2, 2, 2, 2, 2);
281 
282  // local_mat is the local (dense) stiffness matrix
283  for (int i=0; i<sz_local_mat; ++i) { local_mat[i] = 0.0; }
284 
285  // Intermediate quantities
286  // (see e.g. Mora and Demkowicz for notation).
287  for (int i=0; i<sz_grad_A; ++i) { grad_A[i] = 0.0; }
288  for (int i=0; i<sz_grad_B; ++i) { grad_B[i] = 0.0; }
289 
290  for (int i=0; i<sz_mass_A; ++i) { mass_A[i] = 0.0; }
291  for (int i=0; i<sz_mass_B; ++i) { mass_B[i] = 0.0; }
292 
293  double vx[8], vy[8], vz[8];
294  LORVertexCoordinates3D<ORDER>(X, iel_ho, kx, ky, kz, vx, vy, vz);
295 
296  //MFEM_UNROLL(2)
297  for (int iqz=0; iqz<2; ++iqz)
298  {
299  //MFEM_UNROLL(2)
300  for (int iqy=0; iqy<2; ++iqy)
301  {
302  //MFEM_UNROLL(2)
303  for (int iqx=0; iqx<2; ++iqx)
304  {
305  const double x = iqx;
306  const double y = iqy;
307  const double z = iqz;
308  const double w = 1.0/8.0;
309 
310  double J_[3*3];
311  DeviceTensor<2> J(J_, 3, 3);
312 
313  Jacobian3D(x, y, z, vx, vy, vz, J);
314 
315  const double detJ = Det3D(J);
316  const double w_detJ = w/detJ;
317 
318  // adj(J)
319  double A_[3*3];
320  DeviceTensor<2> A(A_, 3, 3);
321  Adjugate3D(J, A);
322 
323  Q(0,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(0,0)+A(0,1)*A(0,1)+A(0,2)*A(0,2)); // 1,1
324  Q(1,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(1,0)+A(0,1)*A(1,1)+A(0,2)*A(1,2)); // 2,1
325  Q(2,iqz,iqy,iqx) = w_detJ*(A(0,0)*A(2,0)+A(0,1)*A(2,1)+A(0,2)*A(2,2)); // 3,1
326  Q(3,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(1,0)+A(1,1)*A(1,1)+A(1,2)*A(1,2)); // 2,2
327  Q(4,iqz,iqy,iqx) = w_detJ*(A(1,0)*A(2,0)+A(1,1)*A(2,1)+A(1,2)*A(2,2)); // 3,2
328  Q(5,iqz,iqy,iqx) = w_detJ*(A(2,0)*A(2,0)+A(2,1)*A(2,1)+A(2,2)*A(2,2)); // 3,3
329  Q(6,iqz,iqy,iqx) = w*detJ;
330  }
331  }
332  }
333 
334  //MFEM_UNROLL(2)
335  for (int iqx=0; iqx<2; ++iqx)
336  {
337  //MFEM_UNROLL(2)
338  for (int jz=0; jz<2; ++jz)
339  {
340  // Note loop starts at iz=jz here, taking advantage of
341  // symmetries.
342  //MFEM_UNROLL(2)
343  for (int iz=jz; iz<2; ++iz)
344  {
345  //MFEM_UNROLL(2)
346  for (int iqy=0; iqy<2; ++iqy)
347  {
348  //MFEM_UNROLL(2)
349  for (int iqz=0; iqz<2; ++iqz)
350  {
351  const double mq = const_mq ? MQ(0,0,0,0) : MQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
352  const double dq = const_dq ? DQ(0,0,0,0) : DQ(kx+iqx, ky+iqy, kz+iqz, iel_ho);
353 
354  const double biz = (iz == iqz) ? 1.0 : 0.0;
355  const double giz = (iz == 0) ? -1.0 : 1.0;
356 
357  const double bjz = (jz == iqz) ? 1.0 : 0.0;
358  const double gjz = (jz == 0) ? -1.0 : 1.0;
359 
360  const double J11 = Q(0,iqz,iqy,iqx);
361  const double J21 = Q(1,iqz,iqy,iqx);
362  const double J31 = Q(2,iqz,iqy,iqx);
363  const double J12 = J21;
364  const double J22 = Q(3,iqz,iqy,iqx);
365  const double J32 = Q(4,iqz,iqy,iqx);
366  const double J13 = J31;
367  const double J23 = J32;
368  const double J33 = Q(5,iqz,iqy,iqx);
369 
370  grad_A(0,0,iqy,iz,jz,iqx) += dq*J11*biz*bjz;
371  grad_A(1,0,iqy,iz,jz,iqx) += dq*J21*biz*bjz;
372  grad_A(2,0,iqy,iz,jz,iqx) += dq*J31*giz*bjz;
373  grad_A(0,1,iqy,iz,jz,iqx) += dq*J12*biz*bjz;
374  grad_A(1,1,iqy,iz,jz,iqx) += dq*J22*biz*bjz;
375  grad_A(2,1,iqy,iz,jz,iqx) += dq*J32*giz*bjz;
376  grad_A(0,2,iqy,iz,jz,iqx) += dq*J13*biz*gjz;
377  grad_A(1,2,iqy,iz,jz,iqx) += dq*J23*biz*gjz;
378  grad_A(2,2,iqy,iz,jz,iqx) += dq*J33*giz*gjz;
379 
380  double wdetJ = Q(6,iqz,iqy,iqx);
381  mass_A(iqy,iz,jz,iqx) += mq*wdetJ*biz*bjz;
382  }
383  //MFEM_UNROLL(2)
384  for (int jy=0; jy<2; ++jy)
385  {
386  //MFEM_UNROLL(2)
387  for (int iy=0; iy<2; ++iy)
388  {
389  const double biy = (iy == iqy) ? 1.0 : 0.0;
390  const double giy = (iy == 0) ? -1.0 : 1.0;
391 
392  const double bjy = (jy == iqy) ? 1.0 : 0.0;
393  const double gjy = (jy == 0) ? -1.0 : 1.0;
394 
395  grad_B(0,0,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(0,0,iqy,iz,jz,iqx);
396  grad_B(1,0,iy,jy,iz,jz,iqx) += giy*bjy*grad_A(1,0,iqy,iz,jz,iqx);
397  grad_B(2,0,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(2,0,iqy,iz,jz,iqx);
398  grad_B(0,1,iy,jy,iz,jz,iqx) += biy*gjy*grad_A(0,1,iqy,iz,jz,iqx);
399  grad_B(1,1,iy,jy,iz,jz,iqx) += giy*gjy*grad_A(1,1,iqy,iz,jz,iqx);
400  grad_B(2,1,iy,jy,iz,jz,iqx) += biy*gjy*grad_A(2,1,iqy,iz,jz,iqx);
401  grad_B(0,2,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(0,2,iqy,iz,jz,iqx);
402  grad_B(1,2,iy,jy,iz,jz,iqx) += giy*bjy*grad_A(1,2,iqy,iz,jz,iqx);
403  grad_B(2,2,iy,jy,iz,jz,iqx) += biy*bjy*grad_A(2,2,iqy,iz,jz,iqx);
404 
405  mass_B(iy,jy,iz,jz,iqx) += biy*bjy*mass_A(iqy,iz,jz,iqx);
406  }
407  }
408  }
409  //MFEM_UNROLL(2)
410  for (int jy=0; jy<2; ++jy)
411  {
412  //MFEM_UNROLL(2)
413  for (int jx=0; jx<2; ++jx)
414  {
415  //MFEM_UNROLL(2)
416  for (int iy=0; iy<2; ++iy)
417  {
418  //MFEM_UNROLL(2)
419  for (int ix=0; ix<2; ++ix)
420  {
421  const double bix = (ix == iqx) ? 1.0 : 0.0;
422  const double gix = (ix == 0) ? -1.0 : 1.0;
423 
424  const double bjx = (jx == iqx) ? 1.0 : 0.0;
425  const double gjx = (jx == 0) ? -1.0 : 1.0;
426 
427  int ii_loc = ix + 2*iy + 4*iz;
428  int jj_loc = jx + 2*jy + 4*jz;
429 
430  // Only store the lower-triangular part of
431  // the matrix (by symmetry).
432  if (jj_loc > ii_loc) { continue; }
433 
434  double val = 0.0;
435  val += gix*gjx*grad_B(0,0,iy,jy,iz,jz,iqx);
436  val += bix*gjx*grad_B(1,0,iy,jy,iz,jz,iqx);
437  val += bix*gjx*grad_B(2,0,iy,jy,iz,jz,iqx);
438  val += gix*bjx*grad_B(0,1,iy,jy,iz,jz,iqx);
439  val += bix*bjx*grad_B(1,1,iy,jy,iz,jz,iqx);
440  val += bix*bjx*grad_B(2,1,iy,jy,iz,jz,iqx);
441  val += gix*bjx*grad_B(0,2,iy,jy,iz,jz,iqx);
442  val += bix*bjx*grad_B(2,2,iy,jy,iz,jz,iqx);
443  val += bix*bjx*grad_B(1,2,iy,jy,iz,jz,iqx);
444 
445  val += bix*bjx*mass_B(iy,jy,iz,jz,iqx);
446 
447  local_mat(ii_loc, jj_loc) += val;
448  }
449  }
450  }
451  }
452  }
453  }
454  }
455  // Assemble the local matrix into the macro-element sparse matrix
456  // in a format similar to coordinate format. The (I,J) arrays
457  // are implicit (not stored explicitly).
458  //MFEM_UNROLL(8)
459  for (int ii_loc=0; ii_loc<nv; ++ii_loc)
460  {
461  const int ix = ii_loc%2;
462  const int iy = (ii_loc/2)%2;
463  const int iz = ii_loc/2/2;
464 
465  for (int jj_loc=0; jj_loc<nv; ++jj_loc)
466  {
467  const int jx = jj_loc%2;
468  const int jy = (jj_loc/2)%2;
469  const int jz = jj_loc/2/2;
470  const int jj_off = (jx-ix+1) + 3*(jy-iy+1) + 9*(jz-iz+1);
471 
472  if (jj_loc <= ii_loc)
473  {
474  AtomicAdd(V(jj_off, ix+kx, iy+ky, iz+kz, iel_ho), local_mat(ii_loc, jj_loc));
475  }
476  else
477  {
478  AtomicAdd(V(jj_off, ix+kx, iy+ky, iz+kz, iel_ho), local_mat(jj_loc, ii_loc));
479  }
480  }
481  }
482  }
483  }
484  }
485  });
486 
487  sparse_mapping.SetSize(nnz_per_row*ndof_per_el);
488  sparse_mapping = -1;
489  auto map = Reshape(sparse_mapping.HostReadWrite(), nnz_per_row, ndof_per_el);
490  for (int iz=0; iz<nd1d; ++iz)
491  {
492  const int jz_begin = (iz > 0) ? iz - 1 : 0;
493  const int jz_end = (iz < ORDER) ? iz + 1 : ORDER;
494  for (int iy=0; iy<nd1d; ++iy)
495  {
496  const int jy_begin = (iy > 0) ? iy - 1 : 0;
497  const int jy_end = (iy < ORDER) ? iy + 1 : ORDER;
498  for (int ix=0; ix<nd1d; ++ix)
499  {
500  const int jx_begin = (ix > 0) ? ix - 1 : 0;
501  const int jx_end = (ix < ORDER) ? ix + 1 : ORDER;
502 
503  const int ii_el = ix + nd1d*(iy + nd1d*iz);
504 
505  for (int jz=jz_begin; jz<=jz_end; ++jz)
506  {
507  for (int jy=jy_begin; jy<=jy_end; ++jy)
508  {
509  for (int jx=jx_begin; jx<=jx_end; ++jx)
510  {
511  const int jj_off = (jx-ix+1) + 3*(jy-iy+1) + 9*(jz-iz+1);
512  const int jj_el = jx + nd1d*(jy + nd1d*jz);
513  map(jj_off, ii_el) = jj_el;
514  }
515  }
516  }
517  }
518  }
519  }
520 }
521 
522 // Explicit template instantiations
523 template void BatchedLOR_H1::Assemble2D<1>();
524 template void BatchedLOR_H1::Assemble2D<2>();
525 template void BatchedLOR_H1::Assemble2D<3>();
526 template void BatchedLOR_H1::Assemble2D<4>();
527 template void BatchedLOR_H1::Assemble2D<5>();
528 template void BatchedLOR_H1::Assemble2D<6>();
529 template void BatchedLOR_H1::Assemble2D<7>();
530 template void BatchedLOR_H1::Assemble2D<8>();
531 
532 template void BatchedLOR_H1::Assemble3D<1>();
533 template void BatchedLOR_H1::Assemble3D<2>();
534 template void BatchedLOR_H1::Assemble3D<3>();
535 template void BatchedLOR_H1::Assemble3D<4>();
536 template void BatchedLOR_H1::Assemble3D<5>();
537 template void BatchedLOR_H1::Assemble3D<6>();
538 template void BatchedLOR_H1::Assemble3D<7>();
539 template void BatchedLOR_H1::Assemble3D<8>();
540 
542  FiniteElementSpace &fes_ho_,
543  Vector &X_vert_,
544  Vector &sparse_ij_,
545  Array<int> &sparse_mapping_)
546  : BatchedLORKernel(fes_ho_, X_vert_, sparse_ij_, sparse_mapping_)
547 {
548  ProjectLORCoefficient<MassIntegrator>(a, c1);
549  ProjectLORCoefficient<DiffusionIntegrator>(a, c2);
550 }
551 
552 } // namespace mfem
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Definition: array.hpp:327
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition: forall.hpp:763
Abstract base class for the batched LOR assembly kernels.
void forall_2D(int N, int X, int Y, lambda &&body)
Definition: forall.hpp:751
Array< int > & sparse_mapping
Local element sparsity pattern.
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:517
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition: backends.hpp:84
int Size() const
Returns the size of the vector.
Definition: vector.hpp:197
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:453
CoefficientVector c2
Coefficient of second integrator.
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:461
Vector & sparse_ij
Local element sparsity matrix data.
MFEM_HOST_DEVICE double Det3D(DeviceMatrix &J)
Definition: lor_util.hpp:188
MFEM_HOST_DEVICE double Det2D(DeviceMatrix &J)
Definition: lor_util.hpp:183
MFEM_HOST_DEVICE void Jacobian3D(const double x, const double y, const double z, const double vx[8], const double vy[8], const double vz[8], DeviceMatrix &J)
Definition: lor_util.hpp:126
FiniteElementSpace & fes_ho
The associated high-order space.
A basic generic Tensor class, appropriate for use on the GPU.
Definition: dtensor.hpp:81
int GetNE() const
Returns number of elements in the mesh.
Definition: fespace.hpp:736
MFEM_HOST_DEVICE void Jacobian2D(const double x, const double y, const double vx[4], const double vy[4], DeviceMatrix &J)
Definition: lor_util.hpp:115
Vector & X_vert
Mesh coordinate vector.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition: fespace.hpp:219
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:687
BatchedLOR_H1(BilinearForm &a, FiniteElementSpace &fes_ho_, Vector &X_vert_, Vector &sparse_ij_, Array< int > &sparse_mapping_)
Definition: lor_h1.cpp:541
double a
Definition: lissajous.cpp:41
A "square matrix" operator for the associated FE space and BLFIntegrators The sum of all the BLFInteg...
int dim
Definition: ex24.cpp:53
Vector data type.
Definition: vector.hpp:58
MFEM_HOST_DEVICE void Adjugate3D(const DeviceMatrix &J, DeviceMatrix &A)
Definition: lor_util.hpp:170
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition: dtensor.hpp:131
CoefficientVector c1
Coefficient of first integrator.