MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
lor_batched.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "lor_batched.hpp"
15#include <climits>
16#include "../pbilinearform.hpp"
18
19// Specializations
20#include "lor_h1.hpp"
21#include "lor_dg.hpp"
22#include "lor_nd.hpp"
23#include "lor_rt.hpp"
24
25namespace mfem
26{
27
28template <typename T1, typename T2>
30{
31 Array<BilinearFormIntegrator*> *integs = a.GetDBFI();
32 if (integs == NULL) { return false; }
33 if (integs->Size() == 1)
34 {
35 BilinearFormIntegrator *i0 = (*integs)[0];
36 if (dynamic_cast<T1*>(i0) || dynamic_cast<T2*>(i0)) { return true; }
37 }
38 else if (integs->Size() == 2)
39 {
40 BilinearFormIntegrator *i0 = (*integs)[0];
41 BilinearFormIntegrator *i1 = (*integs)[1];
42 if ((dynamic_cast<T1*>(i0) && dynamic_cast<T2*>(i1)) ||
43 (dynamic_cast<T2*>(i0) && dynamic_cast<T1*>(i1)))
44 {
45 return true;
46 }
47 }
48 return false;
49}
50
52{
53 const FiniteElementCollection *fec = a.FESpace()->FEColl();
54 // TODO: check for maximum supported orders
55
56 // Batched LOR requires all tensor elements
57 if (!UsesTensorBasis(*a.FESpace())) { return false; }
58
59 if (dynamic_cast<const H1_FECollection*>(fec) ||
60 dynamic_cast<const DG_FECollection*>(fec))
61 {
63 }
64 else if (dynamic_cast<const ND_FECollection*>(fec))
65 {
67 }
68 else if (dynamic_cast<const RT_FECollection*>(fec))
69 {
71 }
72 return false;
73}
74
76 Vector &X_vert)
77{
78 Mesh &mesh_ho = *fes_ho.GetMesh();
79 mesh_ho.EnsureNodes();
80
81 const bool dg = fes_ho.IsDGSpace();
82
83 // Get nodal points at the LOR vertices
84 const int dim = mesh_ho.Dimension();
85 const int sdim = mesh_ho.SpaceDimension();
86 const int nel_ho = mesh_ho.GetNE();
87 const int order = fes_ho.GetMaxElementOrder();
88 const int nd1d = dg ? order + 2 : order + 1;
89 const int ndof_per_el = static_cast<int>(pow(nd1d, dim));
90
91 const GridFunction *nodal_gf = mesh_ho.GetNodes();
92 const FiniteElementSpace *nodal_fes = nodal_gf->FESpace();
93 const Operator *nodal_restriction =
95
96 // Map from nodal L-vector to E-vector
97 Vector nodal_evec(nodal_restriction->Height());
98 nodal_restriction->Mult(*nodal_gf, nodal_evec);
99
101 mesh_ho.GetTypicalElementGeometry(), nd1d);
102
103 // Map from nodal E-vector to Q-vector at the LOR vertex points
104 X_vert.SetSize(sdim*ndof_per_el*nel_ho);
105 const QuadratureInterpolator *quad_interp =
106 nodal_fes->GetQuadratureInterpolator(ir);
108 quad_interp->Values(nodal_evec, X_vert);
109}
110
111// The following two functions (GetMinElt and GetAndIncrementNnzIndex) are
112// copied from restriction.cpp. Should they be factored out?
113
114// Return the minimal value found in both my_elts and nbr_elts
115static MFEM_HOST_DEVICE int GetMinElt(const int *my_elts, const int n_my_elts,
116 const int *nbr_elts, const int n_nbr_elts)
117{
118 int min_el = INT_MAX;
119 for (int i = 0; i < n_my_elts; i++)
120 {
121 const int e_i = my_elts[i];
122 if (e_i >= min_el) { continue; }
123 for (int j = 0; j < n_nbr_elts; j++)
124 {
125 if (e_i==nbr_elts[j])
126 {
127 min_el = e_i; // we already know e_i < min_el
128 break;
129 }
130 }
131 }
132 return min_el;
133}
134
135// Returns the index where a non-zero entry should be added and increment the
136// number of non-zeros for the row i_L.
137static MFEM_HOST_DEVICE int GetAndIncrementNnzIndex(const int i_L, int* I)
138{
139 int ind = AtomicAdd(I[i_L],1);
140 return ind;
141}
142
144{
145 static constexpr int Max = 16;
146
147 const int nvdof = fes_ho.GetVSize();
148
149 const int ndof_per_el = fes_ho.GetTypicalFE()->GetDof();
150 const int nel_ho = fes_ho.GetNE();
151 const int nnz_per_row = sparse_mapping.Size()/ndof_per_el;
152
154 const Operator *op = fes_ho.GetElementRestriction(ordering);
155 const ElementRestriction *el_restr =
156 dynamic_cast<const ElementRestriction*>(op);
157 MFEM_VERIFY(el_restr != nullptr, "Bad element restriction");
158
159 const Array<int> &el_dof_lex_ = el_restr->GatherMap();
160 const Array<int> &dof_glob2loc_ = el_restr->Indices();
161 const Array<int> &dof_glob2loc_offsets_ = el_restr->Offsets();
162
163 const auto el_dof_lex = Reshape(el_dof_lex_.Read(), ndof_per_el, nel_ho);
164 const auto dof_glob2loc = dof_glob2loc_.Read();
165 const auto K = dof_glob2loc_offsets_.Read();
166 const auto map = Reshape(sparse_mapping.Read(), nnz_per_row, ndof_per_el);
167
168
169 auto I = A.WriteI();
170
171 mfem::forall(nvdof + 1, [=] MFEM_HOST_DEVICE (int ii) { I[ii] = 0; });
172 mfem::forall(ndof_per_el*nel_ho, [=] MFEM_HOST_DEVICE (int i)
173 {
174 const int ii_el = i%ndof_per_el;
175 const int iel_ho = i/ndof_per_el;
176 const int sii = el_dof_lex(ii_el, iel_ho);
177 const int ii = (sii >= 0) ? sii : -1 -sii;
178 // Get number and list of elements containing this DOF
179 int i_elts[Max];
180 const int i_offset = K[ii];
181 const int i_next_offset = K[ii+1];
182 const int i_ne = i_next_offset - i_offset;
183 for (int e_i = 0; e_i < i_ne; ++e_i)
184 {
185 const int si_E = dof_glob2loc[i_offset+e_i]; // signed
186 const int i_E = (si_E >= 0) ? si_E : -1 - si_E;
187 i_elts[e_i] = i_E/ndof_per_el;
188 }
189 for (int j = 0; j < nnz_per_row; ++j)
190 {
191 int jj_el = map(j, ii_el);
192 if (jj_el < 0) { continue; }
193 // LDOF index of column
194 const int sjj = el_dof_lex(jj_el, iel_ho); // signed
195 const int jj = (sjj >= 0) ? sjj : -1 - sjj;
196 const int j_offset = K[jj];
197 const int j_next_offset = K[jj+1];
198 const int j_ne = j_next_offset - j_offset;
199 if (i_ne == 1 || j_ne == 1) // no assembly required
200 {
201 AtomicAdd(I[ii], 1);
202 }
203 else // assembly required
204 {
205 int j_elts[Max];
206 for (int e_j = 0; e_j < j_ne; ++e_j)
207 {
208 const int sj_E = dof_glob2loc[j_offset+e_j]; // signed
209 const int j_E = (sj_E >= 0) ? sj_E : -1 - sj_E;
210 const int elt = j_E/ndof_per_el;
211 j_elts[e_j] = elt;
212 }
213 const int min_e = GetMinElt(i_elts, i_ne, j_elts, j_ne);
214 if (iel_ho == min_e) // add the nnz only once
215 {
216 AtomicAdd(I[ii], 1);
217 }
218 }
219 }
220 });
221 // TODO: on device, this is a scan operation
222 // We need to sum the entries of I, we do it on CPU as it is very sequential.
223 auto h_I = A.HostReadWriteI();
224 int sum = 0;
225 for (int i = 0; i < nvdof; i++)
226 {
227 const int nnz = h_I[i];
228 h_I[i] = sum;
229 sum+=nnz;
230 }
231 h_I[nvdof] = sum;
232
233 // Return the number of nnz
234 return h_I[nvdof];
235}
236
238{
239 const int nvdof = fes_ho.GetVSize();
240 const int ndof_per_el = fes_ho.GetTypicalFE()->GetDof();
241 const int nel_ho = fes_ho.GetNE();
242 const int nnz_per_row = sparse_mapping.Size()/ndof_per_el;
243
245 const Operator *op = fes_ho.GetElementRestriction(ordering);
246 const ElementRestriction *el_restr =
247 dynamic_cast<const ElementRestriction*>(op);
248 MFEM_VERIFY(el_restr != nullptr, "Bad element restriction");
249
250 const Array<int> &el_dof_lex_ = el_restr->GatherMap();
251 const Array<int> &dof_glob2loc_ = el_restr->Indices();
252 const Array<int> &dof_glob2loc_offsets_ = el_restr->Offsets();
253
254 const auto el_dof_lex = Reshape(el_dof_lex_.Read(), ndof_per_el, nel_ho);
255 const auto dof_glob2loc = dof_glob2loc_.Read();
256 const auto K = dof_glob2loc_offsets_.Read();
257
258 const auto V = Reshape(sparse_ij.Read(), nnz_per_row, ndof_per_el, nel_ho);
259 const auto map = Reshape(sparse_mapping.Read(), nnz_per_row, ndof_per_el);
260
261 Array<int> I_(nvdof + 1);
262 const auto I = I_.Write();
263 const auto J = A.WriteJ();
264 auto AV = A.WriteData();
265
266 // Copy A.I into I, use it as a temporary buffer
267 {
268 const auto I2 = A.ReadI();
269 mfem::forall(nvdof + 1, [=] MFEM_HOST_DEVICE (int i) { I[i] = I2[i]; });
270 }
271
272 static constexpr int Max = 16;
273
274 mfem::forall(ndof_per_el*nel_ho, [=] MFEM_HOST_DEVICE (int i)
275 {
276 const int ii_el = i%ndof_per_el;
277 const int iel_ho = i/ndof_per_el;
278 // LDOF index of current row
279 const int sii = el_dof_lex(ii_el, iel_ho); // signed
280 const int ii = (sii >= 0) ? sii : -1 - sii;
281 // Get number and list of elements containing this DOF
282 int i_elts[Max];
283 int i_B[Max];
284 const int i_offset = K[ii];
285 const int i_next_offset = K[ii+1];
286 const int i_ne = i_next_offset - i_offset;
287 for (int e_i = 0; e_i < i_ne; ++e_i)
288 {
289 const int si_E = dof_glob2loc[i_offset+e_i]; // signed
290 const bool plus = si_E >= 0;
291 const int i_E = plus ? si_E : -1 - si_E;
292 i_elts[e_i] = i_E/ndof_per_el;
293 const int i_Bi = i_E % ndof_per_el;
294 i_B[e_i] = plus ? i_Bi : -1 - i_Bi; // encode with sign
295 }
296 for (int j=0; j<nnz_per_row; ++j)
297 {
298 int jj_el = map(j, ii_el);
299 if (jj_el < 0) { continue; }
300 // LDOF index of column
301 const int sjj = el_dof_lex(jj_el, iel_ho); // signed
302 const int jj = (sjj >= 0) ? sjj : -1 - sjj;
303 const int sgn = ((sjj >=0 && sii >= 0) || (sjj < 0 && sii <0)) ? 1 : -1;
304 const int j_offset = K[jj];
305 const int j_next_offset = K[jj+1];
306 const int j_ne = j_next_offset - j_offset;
307 if (i_ne == 1 || j_ne == 1) // no assembly required
308 {
309 const int nnz = GetAndIncrementNnzIndex(ii, I);
310 J[nnz] = jj;
311 AV[nnz] = sgn*V(j, ii_el, iel_ho);
312 }
313 else // assembly required
314 {
315 int j_elts[Max];
316 int j_B[Max];
317 for (int e_j = 0; e_j < j_ne; ++e_j)
318 {
319 const int sj_E = dof_glob2loc[j_offset+e_j]; // signed
320 const bool plus = sj_E >= 0;
321 const int j_E = plus ? sj_E : -1 - sj_E;
322 j_elts[e_j] = j_E/ndof_per_el;
323 const int j_Bj = j_E % ndof_per_el;
324 j_B[e_j] = plus ? j_Bj : -1 - j_Bj; // encode with sign
325 }
326 const int min_e = GetMinElt(i_elts, i_ne, j_elts, j_ne);
327 if (iel_ho == min_e) // add the nnz only once
328 {
329 real_t val = 0.0;
330 for (int k = 0; k < i_ne; k++)
331 {
332 const int iel_ho_2 = i_elts[k];
333 const int sii_el_2 = i_B[k]; // signed
334 const int ii_el_2 = (sii_el_2 >= 0) ? sii_el_2 : -1 -sii_el_2;
335 for (int l = 0; l < j_ne; l++)
336 {
337 const int jel_ho_2 = j_elts[l];
338 if (iel_ho_2 == jel_ho_2)
339 {
340 const int sjj_el_2 = j_B[l]; // signed
341 const int jj_el_2 = (sjj_el_2 >= 0) ? sjj_el_2 : -1 -sjj_el_2;
342 const int sgn_2 = ((sjj_el_2 >=0 && sii_el_2 >= 0)
343 || (sjj_el_2 < 0 && sii_el_2 <0)) ? 1 : -1;
344 int j2 = -1;
345 // find nonzero in matrix of other element
346 for (int m = 0; m < nnz_per_row; ++m)
347 {
348 if (map(m, ii_el_2) == jj_el_2)
349 {
350 j2 = m;
351 break;
352 }
353 }
354 MFEM_ASSERT_KERNEL(j >= 0, "Can't find nonzero");
355 val += sgn_2*V(j2, ii_el_2, iel_ho_2);
356 }
357 }
358 }
359 const int nnz = GetAndIncrementNnzIndex(ii, I);
360 J[nnz] = jj;
361 AV[nnz] = val;
362 }
363 }
364 }
365 });
366}
367
369{
370 const int ndof_per_el = fes_ho.GetFE(0)->GetDof();
371 const int nel_ho = fes_ho.GetNE();
372 const int nnz_per_row = sparse_ij.Size()/ndof_per_el/nel_ho;
373 const int dim = fes_ho.GetMesh()->Dimension();
374 const int nrows = nel_ho*ndof_per_el;
375 const int p = fes_ho.GetMaxElementOrder();
376 const int pp1 = p + 1;
377 const int nnz = nrows*nnz_per_row;
378
379 const int face_nbr_vsize = [&]()
380 {
381#ifdef MFEM_USE_MPI
382 if (auto *par_fes = dynamic_cast<ParFiniteElementSpace*>(&fes_ho))
383 {
384 return par_fes->GetFaceNbrVSize();
385 }
386#endif
387 return 0;
388 }();
389
390 // If A contains an existing SparseMatrix, reuse it (and try to reuse its
391 // I, J, A arrays if they are big enough)
392 SparseMatrix *A_mat = A.Is<SparseMatrix>();
393 if (!A_mat)
394 {
395 A_mat = new SparseMatrix;
396 A.Reset(A_mat);
397 }
398
399 // The second argument (nrows + face_nbr_vsize) accounts for additional
400 // columns contributed by DG face neighbors in parallel finite element
401 // spaces. In serial, face_nbr_vsize is set to 0.
402 A_mat->OverrideSize(nrows, nrows + face_nbr_vsize);
403
404 EnsureCapacity(A_mat->GetMemoryI(), nrows + 1);
405 EnsureCapacity(A_mat->GetMemoryJ(), nnz);
406 EnsureCapacity(A_mat->GetMemoryData(), nnz);
407
408 Array<int> nbr_info(nel_ho*3*2*dim);
409 auto h_nbr_info = Reshape(nbr_info.HostWrite(), nel_ho, 2*dim, 3);
410 const int num_faces = fes_ho.GetMesh()->GetNumFaces();
411 for (int f = 0; f < num_faces; f++)
412 {
414 int e0 = finfo.element[0].index;
415 int f0 = finfo.element[0].local_face_id;
416 if (finfo.IsBoundary())
417 {
418 h_nbr_info(e0,f0,0) = -1;
419 h_nbr_info(e0,f0,1)= -1;
420 h_nbr_info(e0,f0,2)= -1;
421 }
422 else if (finfo.IsShared())
423 {
424 // Face neighbors elements are indexed after the last local element
425 h_nbr_info(e0,f0,0) = nel_ho + finfo.element[1].index;
426 h_nbr_info(e0,f0,1)= finfo.element[1].orientation;
427 h_nbr_info(e0,f0,2)= finfo.element[1].local_face_id;
428 }
429 else if (finfo.IsInterior())
430 {
431 int e1 = finfo.element[1].index;
432 int f1 = finfo.element[1].local_face_id;
433 h_nbr_info(e0,f0,0) = e1;
434 h_nbr_info(e0,f0,1)= finfo.element[1].orientation;
435 h_nbr_info(e0,f0,2)= f1;
436 h_nbr_info(e1,f1,0) = e0;
437 h_nbr_info(e1,f1,1) = finfo.element[1].orientation;
438 h_nbr_info(e1,f1,2) = f0;
439 }
440 };
441
442 auto h_I = A_mat->HostWriteI();
443 h_I[0] = 0;
444 for (int i = 0; i < nrows; ++i)
445 {
446 const int iel_ho = i / ndof_per_el;
447 const int iloc = i % ndof_per_el;
448 static const int lex_map_2[4] = {3, 1, 0, 2};
449 static const int lex_map_3[6] = {4, 2, 1, 3, 0, 5};
450 const int local_i[3] = {iloc % pp1, (iloc/pp1)%pp1, iloc/pp1/pp1};
451 int bdr_count = 0;
452 for (int n_idx = 0; n_idx < dim; ++n_idx)
453 {
454 for (int e_i = 0; e_i < 2; ++e_i)
455 {
456 const int j_lex = e_i + n_idx*2;
457 const int f = (dim == 3) ? lex_map_3[j_lex]:lex_map_2[j_lex];
458 const bool boundary = (local_i[n_idx] == e_i * p);
459 if (boundary)
460 {
461 int neighbor_idx = h_nbr_info(iel_ho, f, 0);
462 if (neighbor_idx == -1)
463 {
464 ++bdr_count;
465 }
466 }
467 }
468 }
469 h_I[i+1] = h_I[i] + (nnz_per_row - bdr_count);
470 }
471
472 const auto V = Reshape(sparse_ij.Read(), nnz_per_row, ndof_per_el, nel_ho);
473 auto J = A_mat->WriteJ();
474 auto AV = A_mat->WriteData();
475 auto I = A_mat->ReadI();
476
477 auto d_nbr_info = Reshape(nbr_info.Read(), nel_ho, 2*dim, 3);
478 mfem::forall(nrows, [=] MFEM_HOST_DEVICE (int i)
479 {
480 const int e = i / ndof_per_el;
481 const int iloc = i % ndof_per_el;
482 const int local_x = iloc % pp1;
483 const int local_y = (iloc/pp1)%pp1;
484 const int local_z = iloc/pp1/pp1;
485 const int local_i[3] = {local_x, local_y, local_z};
486 int offset = I[i];
487 static const int lex_map_2[4] = {3, 1, 0, 2};
488 static const int lex_map_3[6] = {4,2,1,3,0,5};
489 const int *lex_map = (dim == 2) ? lex_map_2 : lex_map_3;
490 AV[offset] = V(0, iloc, e);
491 J[offset] = i;
492 ++offset;
493 for (int n_idx = 0; n_idx < dim; ++n_idx)
494 {
495 // qi is the face lexicographic index, obtained by taking the
496 // lexicographic index of the coordinates ommiting n_idx.
497 int qi = 0;
498 int stride = 1;
499 for (int d = 0; d < dim; ++d)
500 {
501 if (d != n_idx)
502 {
503 qi += local_i[d]*stride;
504 stride *= pp1;
505 }
506 }
507 for (int e_i = 0; e_i < 2; ++e_i)
508 {
509 const int j_lex = e_i + n_idx*2;
510 const int f = lex_map[j_lex];
511 const bool bdr = (local_i[n_idx] == e_i * p);
512 if (bdr)
513 {
514 const int nbr_e = d_nbr_info(e, f, 0);
515 const int nbr_ori = d_nbr_info(e, f, 1);
516 const int nbr_f = d_nbr_info(e, f, 2);
517 if (nbr_e != -1)
518 {
519 const int nbr_loc_idx = internal::FaceIdxToVolIdx(
520 dim, qi, pp1, f, nbr_f, 1, nbr_ori);
521 J[offset] = nbr_e*ndof_per_el + nbr_loc_idx;
522 AV[offset] = V(f+1, iloc, e);
523 ++offset;
524 }
525 }
526 else
527 {
528 int shift = (e_i == 0) ? -1 : 1;
529 for (int n = 0; n < n_idx; ++n) { shift *= pp1; }
530 J[offset] = i + shift;
531 AV[offset] = V(f+1, iloc, e);
532 ++offset;
533 }
534 }
535 }
536 });
537}
538
540{
541 const int nvdof = fes_ho.GetVSize();
542
543 // If A contains an existing SparseMatrix, reuse it (and try to reuse its
544 // I, J, A arrays if they are big enough)
545 SparseMatrix *A_mat = A.Is<SparseMatrix>();
546 if (!A_mat)
547 {
548 A_mat = new SparseMatrix;
549 A.Reset(A_mat);
550 }
551
552 A_mat->OverrideSize(nvdof, nvdof);
553 EnsureCapacity(A_mat->GetMemoryI(), nvdof + 1);
554
555 const int nnz = FillI(*A_mat);
556 EnsureCapacity(A_mat->GetMemoryJ(), nnz);
557 EnsureCapacity(A_mat->GetMemoryData(), nnz);
558 FillJAndData(*A_mat);
559}
560
561template <int ORDER, int SDIM, typename LOR_KERNEL>
562static void Assemble_(LOR_KERNEL &kernel, int dim)
563{
564 if (dim == 2) { kernel.template Assemble2D<ORDER,SDIM>(); }
565 else if (dim == 3) { kernel.template Assemble3D<ORDER>(); }
566 else { MFEM_ABORT("Unsupported dimension"); }
567}
568
569template <int ORDER, typename LOR_KERNEL>
570static void Assemble_(LOR_KERNEL &kernel, int dim, int sdim)
571{
572 if (sdim == 2) { Assemble_<ORDER,2>(kernel, dim); }
573 else if (sdim == 3) { Assemble_<ORDER,3>(kernel, dim); }
574 else { MFEM_ABORT("Unsupported space dimension."); }
575}
576
577template <typename LOR_KERNEL>
578static void Assemble_(LOR_KERNEL &kernel, int dim, int sdim, int order)
579{
580 switch (order)
581 {
582 case 1: Assemble_<1>(kernel, dim, sdim); break;
583 case 2: Assemble_<2>(kernel, dim, sdim); break;
584 case 3: Assemble_<3>(kernel, dim, sdim); break;
585 case 4: Assemble_<4>(kernel, dim, sdim); break;
586 case 5: Assemble_<5>(kernel, dim, sdim); break;
587 case 6: Assemble_<6>(kernel, dim, sdim); break;
588 case 7: Assemble_<7>(kernel, dim, sdim); break;
589 case 8: Assemble_<8>(kernel, dim, sdim); break;
590 default: MFEM_ABORT("No kernel order " << order << "!");
591 }
592}
593
594template <typename LOR_KERNEL>
596{
597 LOR_KERNEL kernel(a, fes_ho, X_vert, sparse_ij, sparse_mapping);
598
599 const int dim = fes_ho.GetMesh()->Dimension();
600 const int sdim = fes_ho.GetMesh()->SpaceDimension();
601 const int order = fes_ho.GetMaxElementOrder();
602
603 Assemble_(kernel, dim, sdim, order);
604}
605
607{
608 // Assemble the matrix, depending on what the form is.
609 // This fills in the arrays sparse_ij and sparse_mapping.
611
612 // Handle DG case separately, because assembly of CSR matrix requires
613 // handling face terms.
614 if (dynamic_cast<const DG_FECollection*>(fec))
615 {
617 {
619 }
621 return;
622 }
623
624 if (dynamic_cast<const H1_FECollection*>(fec))
625 {
627 {
629 }
630 }
631 else if (dynamic_cast<const ND_FECollection*>(fec))
632 {
634 {
636 }
637 }
638 else if (dynamic_cast<const RT_FECollection*>(fec))
639 {
641 {
643 }
644 }
645
646 SparseIJToCSR(A);
647}
648
649#ifdef MFEM_USE_MPI
652{
653 auto &par_fes = static_cast<ParFiniteElementSpace&>(fes_ho);
654
655 // handle the case when 'a' contains off-diagonal
656 const int lvsize = par_fes.GetVSize();
657 const Array<HYPRE_BigInt> &face_nbr_glob_ldof =
658 par_fes.GetFaceNbrGlobalDofMapArray();
659 const HYPRE_BigInt ldof_offset = par_fes.GetMyDofOffset();
660
661 const int nnz_local = A_local.NumNonZeroElems();
662 Array<HYPRE_BigInt> glob_J(nnz_local);
663
664 const HYPRE_BigInt *d_face_nbr_glob_ldof = face_nbr_glob_ldof.Read();
665 const int *d_J = A_local.ReadJ();
666 HYPRE_BigInt *d_glob_J = glob_J.Write();
667
668 mfem::forall(nnz_local, [=] MFEM_HOST_DEVICE (int i)
669 {
670 if (d_J[i] < lvsize)
671 {
672 d_glob_J[i] = d_J[i] + ldof_offset;
673 }
674 else
675 {
676 d_glob_J[i] = d_face_nbr_glob_ldof[d_J[i] - lvsize];
677 }
678 });
679
680 A.Reset(new HypreParMatrix(
681 par_fes.GetComm(), lvsize, par_fes.GlobalVSize(),
682 par_fes.GlobalVSize(), A_local.HostReadWriteI(),
683 glob_J.HostReadWrite(), A_local.HostReadWriteData(),
684 par_fes.GetDofOffsets(), par_fes.GetDofOffsets()));
685}
686
688 BilinearForm &a, const Array<int> &ess_dofs, OperatorHandle &A)
689{
690 // Assemble the system matrix local to this partition
691 OperatorHandle A_local;
692 AssembleWithoutBC(a, A_local);
693
694 if (dynamic_cast<const DG_FECollection*>(fes_ho.FEColl()))
695 {
696 ParAssemble_DG(*A_local.As<SparseMatrix>(), A);
697 }
698 else
699 {
700 ParBilinearForm *pa =
701 dynamic_cast<ParBilinearForm*>(&a);
702 pa->ParallelRAP(*A_local.As<SparseMatrix>(), A, true);
703 A.As<HypreParMatrix>()->EliminateBC(ess_dofs,
705 }
706}
707#endif
708
710 BilinearForm &a, const Array<int> ess_dofs, OperatorHandle &A)
711{
712#ifdef MFEM_USE_MPI
713 if (dynamic_cast<ParFiniteElementSpace*>(&fes_ho))
714 {
715 return ParAssemble(a, ess_dofs, A);
716 }
717#endif
718
720
722 if (P)
723 {
724 std::unique_ptr<SparseMatrix> R(Transpose(*P));
725 std::unique_ptr<SparseMatrix> RA(mfem::Mult(*R, *A.As<SparseMatrix>()));
726 A.Reset(mfem::Mult(*RA, *P));
727 }
728
729 A.As<SparseMatrix>()->EliminateBC(ess_dofs,
731}
732
738
740{
742 return irs.Get(geom, 2*nd1d - 3);
743}
744
750
756
757} // namespace mfem
MFEM_HOST_DEVICE T AtomicAdd(T &add, const T val)
Definition backends.hpp:94
int Size() const
Return the logical size of the array.
Definition array.hpp:166
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:389
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:381
T * HostReadWrite()
Shortcut for mfem::ReadWrite(a.GetMemory(), a.Size(), false).
Definition array.hpp:401
T * HostWrite()
Shortcut for mfem::Write(a.GetMemory(), a.Size(), false).
Definition array.hpp:393
void FillJAndData(SparseMatrix &A) const
Fill the J and data arrays of the sparse matrix A.
void SparseIJToCSR(OperatorHandle &A) const
After assembling the "sparse IJ" format, convert it to CSR.
void AssemblyKernel(BilinearForm &a)
Fill in sparse_ij and sparse_mapping using one of the specialized LOR assembly kernel classes.
Array< int > sparse_mapping
The sparsity pattern of the element matrices.
void AssembleWithoutBC(BilinearForm &a, OperatorHandle &A)
Assemble the system without eliminating essential DOFs.
Vector X_vert
LOR vertex coordinates.
void ParAssemble_DG(SparseMatrix &A_local, OperatorHandle &A)
Assemble the parallel DG matrix (with shared faces).
Vector sparse_ij
The elementwise LOR matrices in a sparse "ij" format.
static void FormLORVertexCoordinates(FiniteElementSpace &fes_ho, Vector &X_vert)
Compute the vertices of the LOR mesh and place the result in X_vert.
void ParAssemble(BilinearForm &a, const Array< int > &ess_dofs, OperatorHandle &A)
Assemble the system in parallel and place the result in A.
static bool FormIsSupported(BilinearForm &a)
Returns true if the form a supports batched assembly, false otherwise.
FiniteElementSpace & fes_ho
The high-order space.
BatchedLORAssembly(FiniteElementSpace &fes_ho_)
Construct the batched assembly object corresponding to fes_ho_.
void Assemble(BilinearForm &a, const Array< int > ess_dofs, OperatorHandle &A)
Assemble the given form as a matrix and place the result in A.
int FillI(SparseMatrix &A) const
Fill the I array of the sparse matrix A.
void SparseIJToCSR_DG(OperatorHandle &A) const
Specialized implementation of SparseIJToCSR for DG spaces.
Abstract base class BilinearFormIntegrator.
A "square matrix" operator for the associated FE space and BLFIntegrators The sum of all the BLFInteg...
Operator that converts FiniteElementSpace L-vectors to E-vectors.
const Array< int > & GatherMap() const
const Array< int > & Offsets() const
const Array< int > & Indices() const
Collection of finite elements from the same family in multiple dimensions. This class is used to matc...
Definition fe_coll.hpp:27
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:208
const QuadratureInterpolator * GetQuadratureInterpolator(const IntegrationRule &ir) const
Return a QuadratureInterpolator that interpolates E-vectors to quadrature point values and/or derivat...
Definition fespace.cpp:1552
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
Definition fespace.cpp:3824
int GetNE() const
Returns number of elements in the mesh.
Definition fespace.hpp:869
const ElementRestrictionOperator * GetElementRestriction(ElementDofOrdering e_ordering) const
Return an Operator that converts L-vectors to E-vectors.
Definition fespace.cpp:1480
const SparseMatrix * GetConformingProlongation() const
Definition fespace.cpp:1426
const FiniteElementCollection * FEColl() const
Definition fespace.hpp:856
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:644
int GetVSize() const
Return the number of vector dofs, i.e. GetNDofs() x GetVDim().
Definition fespace.hpp:826
bool IsDGSpace() const
Return whether or not the space is discontinuous (L2)
Definition fespace.hpp:1503
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Definition fespace.cpp:3860
virtual int GetMaxElementOrder() const
Return the maximum polynomial order over all elements.
Definition fespace.hpp:674
int GetDof() const
Returns the number of degrees of freedom in the finite element.
Definition fe_base.hpp:337
Class for grid function - Vector with associated FE space.
Definition gridfunc.hpp:32
FiniteElementSpace * FESpace()
Arbitrary order H1-conforming (continuous) finite elements.
Definition fe_coll.hpp:279
Wrapper for hypre's ParCSR matrix class.
Definition hypre.hpp:419
Class for an integration rule - an Array of IntegrationPoint.
Definition intrules.hpp:100
Container class for integration rules.
Definition intrules.hpp:422
const IntegrationRule & Get(int GeomType, int Order)
Returns an integration rule for given GeomType and Order.
Arbitrary order "L2-conforming" discontinuous finite elements.
Definition fe_coll.hpp:350
Mesh data type.
Definition mesh.hpp:65
int GetNumFaces() const
Return the number of faces (3D), edges (2D) or vertices (1D).
Definition mesh.cpp:6846
void EnsureNodes()
Make sure that the mesh has valid nodes, i.e. its geometry is described by a vector finite element gr...
Definition mesh.cpp:6747
Geometry::Type GetTypicalElementGeometry() const
If the local mesh is not empty, return GetElementGeometry(0); otherwise, return a typical Geometry pr...
Definition mesh.cpp:1628
int GetNE() const
Returns number of elements.
Definition mesh.hpp:1377
int Dimension() const
Dimension of the reference space used within the elements.
Definition mesh.hpp:1306
FaceInformation GetFaceInformation(int f) const
Definition mesh.cpp:1293
int SpaceDimension() const
Dimension of the physical space containing the mesh.
Definition mesh.hpp:1309
void GetNodes(Vector &node_coord) const
Definition mesh.cpp:9627
Geometry::Type GetTypicalFaceGeometry() const
If the local mesh is not empty, return GetFaceGeometry(0); otherwise return a typical face geometry p...
Definition mesh.cpp:1596
Arbitrary order H(curl)-conforming Nedelec finite elements.
Definition fe_coll.hpp:500
Pointer to an Operator of a specified type.
Definition handle.hpp:34
OpType * As() const
Return the Operator pointer statically cast to a specified OpType. Similar to the method Get().
Definition handle.hpp:104
void Reset(OpType *A, bool own_A=true)
Reset the OperatorHandle to the given OpType pointer, A.
Definition handle.hpp:145
OpType * Is() const
Return the Operator pointer dynamically cast to a specified OpType.
Definition handle.hpp:108
Abstract operator.
Definition operator.hpp:25
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition operator.hpp:66
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
@ DIAG_ONE
Set the diagonal value to one.
Definition operator.hpp:50
@ DIAG_KEEP
Keep the diagonal value.
Definition operator.hpp:51
Class for parallel bilinear form.
void ParallelRAP(SparseMatrix &loc_A, OperatorHandle &A, bool steal_loc_A=false)
Compute parallel RAP operator and store it in A as a HypreParMatrix.
Abstract parallel finite element space.
Definition pfespace.hpp:31
A class that performs interpolation from an E-vector to quadrature point values and/or derivatives (Q...
void SetOutputLayout(QVectorLayout layout) const
Set the desired output Q-vector layout. The default value is QVectorLayout::byNODES.
void Values(const Vector &e_vec, Vector &q_val) const
Interpolate the values of the E-vector e_vec at quadrature points.
Arbitrary order H(div)-conforming Raviart-Thomas finite elements.
Definition fe_coll.hpp:407
Data type sparse matrix.
Definition sparsemat.hpp:51
int NumNonZeroElems() const override
Returns the number of the nonzero elements in the matrix.
int * WriteJ(bool on_dev=true)
Memory< int > & GetMemoryI()
const int * ReadI(bool on_dev=true) const
int * WriteI(bool on_dev=true)
real_t * HostReadWriteData()
const int * ReadJ(bool on_dev=true) const
void OverrideSize(int height_, int width_)
Sets the height and width of the matrix.
Memory< int > & GetMemoryJ()
Memory< real_t > & GetMemoryData()
real_t * WriteData(bool on_dev=true)
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:520
int Size() const
Returns the size of the vector.
Definition vector.hpp:234
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:584
int dim
Definition ex24.cpp:53
HYPRE_Int HYPRE_BigInt
real_t a
Definition lissajous.cpp:41
void Mult(const Table &A, const Table &B, Table &C)
C = A * B (as boolean matrices)
Definition table.cpp:505
void EnsureCapacity(Memory< T > &mem, int capacity)
Ensure that mem has at least capacity capacity.
void Transpose(const Table &A, Table &At, int ncols_A_)
Transpose a Table.
Definition table.cpp:443
bool HasIntegrators(BilinearForm &a)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
IntegrationRule GetLobattoIntRule(Geometry::Type geom, int nd1d)
Return the Gauss-Lobatto rule for geometry geom with nd1d points per dimension.
bool UsesTensorBasis(const FiniteElementSpace &fes)
Return true if the mesh contains only one topology and the elements are tensor elements.
Definition fespace.hpp:1548
float real_t
Definition config.hpp:46
void EliminateBC(const HypreParMatrix &A, const HypreParMatrix &Ae, const Array< int > &ess_dof_list, const Vector &X, Vector &B)
Eliminate essential BC specified by ess_dof_list from the solution X to the r.h.s....
Definition hypre.cpp:3453
ElementDofOrdering
Constants describing the possible orderings of the DOFs in one element.
Definition fespace.hpp:47
IntegrationRule GetCollocatedIntRule(FiniteElementSpace &fes)
Return the Gauss-Lobatto rule collocated with the element nodes.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
void forall(int N, lambda &&body)
Definition forall.hpp:839
IntegrationRule GetCollocatedFaceIntRule(FiniteElementSpace &fes)
Return the Gauss-Lobatto rule collocated with face nodes.
real_t p(const Vector &x, real_t t)
This structure is used as a human readable output format that deciphers the information contained in ...
Definition mesh.hpp:2081
bool IsBoundary() const
Return true if the face is a boundary face.
Definition mesh.hpp:2122
struct mfem::Mesh::FaceInformation::@15 element[2]
bool IsInterior() const
return true if the face is an interior face to the computation domain, either a local or shared inter...
Definition mesh.hpp:2115
bool IsShared() const
Return true if the face is a shared interior face which is NOT a master nonconforming face.
Definition mesh.hpp:2106