MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
batchitrans.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
14#include "fem.hpp"
15
16#include "eltrans.hpp"
17
18#include "../general/forall.hpp"
19
20#include <cmath>
21
22namespace mfem
23{
34
38
40
42 MemoryType d_mt)
43{
44 UpdateNodes(*mesh.GetNodes(), d_mt);
45}
46
48 MemoryType d_mt)
49{
50 MemoryType my_d_mt =
52
53 gf_ = &gf;
54 const FiniteElementSpace *fespace = gf.FESpace();
55 const int max_order = fespace->GetMaxElementOrder();
56 const int ndof1d = max_order + 1;
57 int ND = ndof1d;
58 const int dim = fespace->GetMesh()->Dimension();
59 MFEM_VERIFY(fespace->GetMesh()->GetNumGeometries(dim) <= 1,
60 "Mixed meshes are not supported.");
61 for (int d = 1; d < dim; ++d)
62 {
63 ND *= ndof1d;
64 }
65 const int vdim = fespace->GetVDim();
66 const int NE = fespace->GetNE();
67 node_pos.SetSize(vdim * ND * NE, my_d_mt);
68
69 const FiniteElement *fe = fespace->GetTypicalFE();
70 const TensorBasisElement *tfe = dynamic_cast<const TensorBasisElement *>(fe);
71 if (fespace->IsVariableOrder() || tfe == nullptr)
72 {
73 MFEM_VERIFY(fe->GetGeomType() == Geometry::SEGMENT ||
76 "unsupported geometry type");
77 // project onto GLL nodes
78 basis_type = BasisType::GaussLobatto;
79 points1d = poly1d.GetPointsArray(max_order, BasisType::GaussLobatto);
80 points1d->HostRead();
81 // either mixed order, or not a tensor basis
82 node_pos.HostWrite();
83 real_t tmp[3];
84 int sdim = vdim;
85 Vector pos(tmp, sdim);
87 int idcs[3];
88 for (int e = 0; e < NE; ++e)
89 {
90 fe = fespace->GetFE(e);
91 for (int i = 0; i < ND; ++i)
92 {
93 idcs[0] = i % ndof1d;
94 idcs[1] = i / ndof1d;
95 idcs[2] = idcs[1] / ndof1d;
96 idcs[1] = idcs[1] % ndof1d;
97 ip.x = (*points1d)[idcs[0]];
98 ip.y = (*points1d)[idcs[1]];
99 ip.z = (*points1d)[idcs[2]];
100 gf.GetVectorValue(e, ip, pos);
101 for (int d = 0; d < sdim; ++d)
102 {
103 node_pos[i + (d + e * sdim) * ND] = pos[d];
104 }
105 }
106 }
107 }
108 else
109 {
110 const Operator *elem_restr =
112 elem_restr->Mult(gf, node_pos);
113 basis_type = tfe->GetBasisType();
114 points1d = poly1d.GetPointsArray(max_order, basis_type);
115 }
116}
117
119 const Array<int> &elems,
120 Array<int> &types,
121 Vector &refs, bool use_device,
122 Array<int> *iters) const
123{
125 {
126 // no devices available
127 use_device = false;
128 }
129 const FiniteElementSpace *fespace = gf_->FESpace();
130 const FiniteElement *fe = fespace->GetTypicalFE();
131 const int dim = fe->GetDim();
132 const int vdim = fespace->GetVDim();
133 const int NE = fespace->GetNE();
134 const int order = fe->GetOrder();
135 int npts = elems.Size();
136 auto geom = fe->GetGeomType();
137
138 types.SetSize(npts);
139 refs.SetSize(npts * dim);
140
141 auto pptr = pts.Read(use_device);
142 auto eptr = elems.Read(use_device);
143 auto mptr = node_pos.Read(use_device);
144 auto tptr = types.Write(use_device);
145 auto xptr = refs.ReadWrite(use_device);
146 int* iter_ptr = nullptr;
147 if (iters != nullptr)
148 {
149 iters->SetSize(npts);
150 iter_ptr = iters->Write(use_device);
151 }
152 auto nptr = points1d->Read(use_device);
153 int ndof1d = points1d->Size();
154
155 switch (init_guess_type)
156 {
158 {
159 real_t cx, cy, cz;
160 auto ip0 = Geometries.GetCenter(geom);
161 cx = ip0.x;
162 cy = ip0.y;
163 cz = ip0.z;
164 switch (dim)
165 {
166 case 1:
167 forall_switch(use_device, npts,
168 [=] MFEM_HOST_DEVICE(int i) { xptr[i] = cx; });
169 break;
170 case 2:
171 forall_switch(use_device, npts, [=] MFEM_HOST_DEVICE(int i)
172 {
173 xptr[i] = cx;
174 xptr[i + npts] = cy;
175 });
176 break;
177 case 3:
178 forall_switch(use_device, npts, [=] MFEM_HOST_DEVICE(int i)
179 {
180 xptr[i] = cx;
181 xptr[i + npts] = cy;
182 xptr[i + 2 * npts] = cz;
183 });
184 break;
185 }
186 } break;
189 {
190 int nq1d = (qpts_order >= 0 ? qpts_order
191 : std::max(order + rel_qpts_order, 0)) +
192 1;
193 int btype = BasisType::GetNodalBasis(guess_points_type);
194
195 if ((btype == BasisType::Invalid || btype == basis_type) &&
196 nq1d == ndof1d)
197 {
198 // special case: test points are basis nodal points
199 if (init_guess_type ==
201 {
202 FindClosestPhysDof::Run(geom, vdim, use_device, npts, NE, ndof1d,
203 mptr, pptr, eptr, nptr, xptr);
204 }
205 else
206 {
207 FindClosestRefDof::Run(geom, vdim, use_device, npts, NE, ndof1d, mptr,
208 pptr, eptr, nptr, xptr);
209 }
210 }
211 else
212 {
213 BasisType::Check(btype);
214 auto qpoints = poly1d.GetPointsArray(nq1d - 1, btype);
215 auto qptr = qpoints->Read(use_device);
216 if (init_guess_type ==
218 {
219 FindClosestPhysPoint::Run(geom, vdim, use_device, npts, NE, ndof1d,
220 nq1d, mptr, pptr, eptr, nptr, qptr,
221 xptr);
222 }
223 else
224 {
225 FindClosestRefPoint::Run(geom, vdim, use_device, npts, NE, ndof1d,
226 nq1d, mptr, pptr, eptr, nptr, qptr, xptr);
227 }
228 }
229 } break;
231 // nothing to do here
232 break;
234 {
235 int nq1d = (qpts_order >= 0 ? qpts_order
236 : std::max(order + rel_qpts_order, 0)) +
237 1;
238 int btype = BasisType::GetNodalBasis(guess_points_type);
239 if (btype == BasisType::Invalid)
240 {
241 // default to closed uniform points
243 }
244 auto qpoints = poly1d.GetPointsArray(nq1d - 1, btype);
245 auto qptr = qpoints->Read(use_device);
246 NewtonEdgeScan::Run(geom, vdim, solver_type, use_device, ref_tol,
247 phys_rtol, max_iter, npts, NE, ndof1d, mptr, pptr,
248 eptr, nptr, qptr, nq1d, tptr, iter_ptr,
249 xptr);
250 }
251 return;
252 }
253 // general case: for each point, use guess inside refs
254 NewtonSolve::Run(geom, vdim, solver_type, use_device, ref_tol, phys_rtol,
255 max_iter, npts, NE, ndof1d, mptr, pptr, eptr, nptr, tptr,
256 iter_ptr, xptr);
257}
258
259/// \cond DO_NOT_DOCUMENT
260namespace internal
261{
262// data for batch inverse transform newton solvers
263struct InvTNewtonSolverBase
264{
265 real_t ref_tol;
266 real_t phys_rtol;
267 // physical space coordinates of mesh element nodes
268 const real_t *mptr;
269 // physical space point coordinates to find
270 const real_t *pptr;
271 // element indices
272 const int *eptr;
273 // newton solve result code
274 int *tptr;
275 // number of iterations taken
276 int *iter_ptr;
277 // result ref coords
278 real_t *xptr;
279 eltrans::Lagrange basis1d;
280
281 int max_iter;
282 // number of points in pptr
283 int npts;
284};
285
286// helper for computing dx = (pseudo)-inverse jac * [pt - F(x)]
287template <int Dim, int SDim> struct InvTLinSolve;
288
289template <> struct InvTLinSolve<1, 1>
290{
291 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
292 real_t *dx)
293 {
294 dx[0] = rhs[0] / jac[0];
295 }
296};
297
298template <> struct InvTLinSolve<1, 2>
299{
300 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
301 real_t *dx)
302 {
303 auto a00 = jac[0];
304 auto a10 = jac[1 * MFEM_THREAD_SIZE(x)];
305 real_t den = a00 * a00 +
306 a10 * a10;
307 dx[0] = (a00 * rhs[0] + a10 * rhs[1 * MFEM_THREAD_SIZE(x)]) / den;
308 }
309};
310
311template <> struct InvTLinSolve<1, 3>
312{
313 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
314 real_t *dx)
315 {
316 auto a00 = jac[0];
317 auto a10 = jac[1 * MFEM_THREAD_SIZE(x)];
318 auto a20 = jac[2 * MFEM_THREAD_SIZE(x)];
319 real_t den = a00 * a00 + a10 * a10 + a20 * a20;
320 dx[0] = (a00 * rhs[0] + a10 * rhs[1 * MFEM_THREAD_SIZE(x)] +
321 a20 * rhs[2 * MFEM_THREAD_SIZE(x)]) /
322 den;
323 }
324};
325
326template <> struct InvTLinSolve<2, 2>
327{
328 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
329 real_t *dx)
330 {
331 auto a00 = jac[(0 + 0 * 2) * MFEM_THREAD_SIZE(x)];
332 auto a10 = jac[(1 + 0 * 2) * MFEM_THREAD_SIZE(x)];
333 auto a01 = jac[(0 + 1 * 2) * MFEM_THREAD_SIZE(x)];
334 auto a11 = jac[(1 + 1 * 2) * MFEM_THREAD_SIZE(x)];
335 real_t den = 1 / (a00 * a11 - a01 * a10);
336 dx[0] = (a11 * rhs[0 * MFEM_THREAD_SIZE(x)] -
337 a01 * rhs[1 * MFEM_THREAD_SIZE(x)]) *
338 den;
339 dx[1] = (a00 * rhs[1 * MFEM_THREAD_SIZE(x)] -
340 a10 * rhs[0 * MFEM_THREAD_SIZE(x)]) *
341 den;
342 }
343};
344
345template <> struct InvTLinSolve<2, 3>
346{
347 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
348 real_t *dx)
349 {
350 auto a00 = jac[(0 + 0 * 3) * MFEM_THREAD_SIZE(x)];
351 auto a01 = jac[(0 + 1 * 3) * MFEM_THREAD_SIZE(x)];
352 auto a10 = jac[(1 + 0 * 3) * MFEM_THREAD_SIZE(x)];
353 auto a11 = jac[(1 + 1 * 3) * MFEM_THREAD_SIZE(x)];
354 auto a20 = jac[(2 + 0 * 3) * MFEM_THREAD_SIZE(x)];
355 auto a21 = jac[(2 + 1 * 3) * MFEM_THREAD_SIZE(x)];
356 // a00**2*a11**2 + a00**2*a21**2 - 2*a00*a01*a10*a11 - 2*a00*a01*a20*a21 +
357 // a01**2*a10**2 + a01**2*a20**2 + a10**2*a21**2 - 2*a10*a11*a20*a21 +
358 // a11**2*a20**2
359 real_t den = 1 / (a00 * a00 * a11 * a11 + a00 * a00 * a21 * a21 -
360 2 * a00 * a01 * a10 * a11 - 2 * a00 * a01 * a20 * a21 +
361 a01 * a01 * a10 * a10 + a01 * a01 * a20 * a20 +
362 a10 * a10 * a21 * a21 - 2 * a10 * a11 * a20 * a21 +
363 a11 * a11 * a20 * a20);
364 // x0*(a00*(a01**2 + a11**2 + a21**2) - a01*(a00*a01 + a10*a11 + a20*a21))
365 // + x1*(a10*(a01**2 + a11**2 + a21**2) - a11*(a00*a01 + a10*a11 + a20*a21))
366 // + x2*(a20*(a01**2 + a11**2 + a21**2) - a21*(a00*a01 + a10*a11 + a20*a21))
367 dx[0] = (rhs[0 * MFEM_THREAD_SIZE(x)] *
368 (a00 * (a01 * a01 + a11 * a11 + a21 * a21) -
369 a01 * (a00 * a01 + a10 * a11 + a20 * a21)) +
370 rhs[1 * MFEM_THREAD_SIZE(x)] *
371 (a10 * (a01 * a01 + a11 * a11 + a21 * a21) -
372 a11 * (a00 * a01 + a10 * a11 + a20 * a21)) +
373 rhs[2 * MFEM_THREAD_SIZE(x)] *
374 (a20 * (a01 * a01 + a11 * a11 + a21 * a21) -
375 a21 * (a00 * a01 + a10 * a11 + a20 * a21))) *
376 den;
377 // x0*(a01*(a00**2 + a10**2 + a20**2)-a00*(a00*a01 + a10*a11 + a20*a21))
378 // +x1*(a11*(a00**2 + a10**2 + a20**2)-a10*(a00*a01 + a10*a11 + a20*a21))
379 // +x2*(a21*(a00**2 + a10**2 + a20**2)-a20*(a00*a01 + a10*a11 + a20*a21))
380 dx[1] = (rhs[0 * MFEM_THREAD_SIZE(x)] *
381 (a01 * (a00 * a00 + a10 * a10 + a20 * a20) -
382 a00 * (a00 * a01 + a10 * a11 + a20 * a21)) +
383 rhs[1 * MFEM_THREAD_SIZE(x)] *
384 (a11 * (a00 * a00 + a10 * a10 + a20 * a20) -
385 a10 * (a00 * a01 + a10 * a11 + a20 * a21)) +
386 rhs[2 * MFEM_THREAD_SIZE(x)] *
387 (a21 * (a00 * a00 + a10 * a10 + a20 * a20) -
388 a20 * (a00 * a01 + a10 * a11 + a20 * a21))) *
389 den;
390 }
391};
392
393template <> struct InvTLinSolve<3, 3>
394{
395 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
396 real_t *dx)
397 {
398 auto a00 = jac[(0 + 0 * 3) * MFEM_THREAD_SIZE(x)];
399 auto a01 = jac[(0 + 1 * 3) * MFEM_THREAD_SIZE(x)];
400 auto a02 = jac[(0 + 2 * 3) * MFEM_THREAD_SIZE(x)];
401 auto a10 = jac[(1 + 0 * 3) * MFEM_THREAD_SIZE(x)];
402 auto a11 = jac[(1 + 1 * 3) * MFEM_THREAD_SIZE(x)];
403 auto a12 = jac[(1 + 2 * 3) * MFEM_THREAD_SIZE(x)];
404 auto a20 = jac[(2 + 0 * 3) * MFEM_THREAD_SIZE(x)];
405 auto a21 = jac[(2 + 1 * 3) * MFEM_THREAD_SIZE(x)];
406 auto a22 = jac[(2 + 2 * 3) * MFEM_THREAD_SIZE(x)];
407
408 real_t den = 1 / (a00 * a11 * a22 - a00 * a12 * a22 - a01 * a10 * a22 +
409 a01 * a12 * a20 + a02 * a10 * a21 - a02 * a11 * a20);
410 dx[0] = (rhs[0 * MFEM_THREAD_SIZE(x)] * (a11 * a22 - a12 * a21) -
411 rhs[1 * MFEM_THREAD_SIZE(x)] * (a01 * a22 - a02 * a21) +
412 rhs[2 * MFEM_THREAD_SIZE(x)] * (a01 * a12 - a02 * a11)) *
413 den;
414 dx[1] = (rhs[0 * MFEM_THREAD_SIZE(x)] * (a12 * a20 - a10 * a22) +
415 rhs[1 * MFEM_THREAD_SIZE(x)] * (a00 * a22 - a02 * a20) -
416 rhs[2 * MFEM_THREAD_SIZE(x)] * (a00 * a12 - a02 * a10)) *
417 den;
418 dx[2] = (rhs[0 * MFEM_THREAD_SIZE(x)] * (a10 * a21 - a11 * a20) -
419 rhs[1 * MFEM_THREAD_SIZE(x)] * (a00 * a21 - a01 * a20) +
420 rhs[2 * MFEM_THREAD_SIZE(x)] * (a00 * a11 - a01 * a10)) *
421 den;
422 }
423};
424
425template <int Geom, InverseElementTransformation::SolverType SolverType>
426struct ProjectType;
427
428template <>
429struct ProjectType<Geometry::SEGMENT, InverseElementTransformation::Newton>
430{
431 static MFEM_HOST_DEVICE bool project(real_t &x, real_t &dx)
432 {
433 x += dx;
434 return false;
435 }
436};
437
438template <>
439struct ProjectType<Geometry::SQUARE, InverseElementTransformation::Newton>
440{
441 static MFEM_HOST_DEVICE bool project(real_t &x, real_t &y, real_t &dx,
442 real_t &dy)
443 {
444 x += dx;
445 y += dy;
446 return false;
447 }
448};
449
450template <>
451struct ProjectType<Geometry::CUBE, InverseElementTransformation::Newton>
452{
453 static MFEM_HOST_DEVICE bool project(real_t &x, real_t &y, real_t &z,
454 real_t &dx, real_t &dy, real_t &dz)
455 {
456 x += dx;
457 y += dy;
458 z += dz;
459 return false;
460 }
461};
462
463template <int Geom>
464struct ProjectType<Geom, InverseElementTransformation::NewtonElementProject>
465{
466 template <class... Ts> static MFEM_HOST_DEVICE bool project(Ts &&...args)
467 {
468 return eltrans::GeometryUtils<Geom>::project(args...);
469 }
470};
471
472template <int Geom, int SDim,
473 InverseElementTransformation::SolverType SolverType, int max_team_x>
474struct InvTNewtonSolver;
475
476template <int SDim, InverseElementTransformation::SolverType SType,
477 int max_team_x>
478struct InvTNewtonSolver<Geometry::SEGMENT, SDim, SType, max_team_x>
479 : public InvTNewtonSolverBase
480{
481 static int ndofs(int ndof1d) { return ndof1d; }
482
483 // theoretically unbounded
484 static constexpr MFEM_HOST_DEVICE int max_dof1d() { return 0x1000; }
485
486 int MFEM_HOST_DEVICE operator()(int idx) const
487 {
488 // parallelize one team per pt
489 constexpr int Dim = 1;
490 int iter = 0;
491 MFEM_SHARED real_t ref_coord[Dim];
492 // contiguous in team_x, then SDim
493 MFEM_SHARED real_t phys_coord[SDim * max_team_x];
494 // contiguous in team_x, SDim, then Dim
495 MFEM_SHARED real_t jac[SDim * Dim * max_team_x];
496 MFEM_SHARED bool term_flag[1];
497 MFEM_SHARED int res[1];
498 MFEM_SHARED real_t dx[Dim];
499 MFEM_SHARED real_t prev_dx[Dim];
500 MFEM_SHARED bool hit_bdr[1];
501 MFEM_SHARED bool prev_hit_bdr[1];
502 real_t phys_tol = 0;
503 if (MFEM_THREAD_ID(x) == 0)
504 {
505 term_flag[0] = false;
506 res[0] = InverseElementTransformation::Unknown;
507 for (int d = 0; d < Dim; ++d)
508 {
509 ref_coord[d] = xptr[idx + d * npts];
510 dx[d] = 0;
511 prev_dx[d] = 0;
512 }
513 for (int d = 0; d < SDim; ++d)
514 {
515 phys_tol += pptr[idx + d * npts] * pptr[idx + d * npts];
516 }
517 phys_tol = fmax(phys_rtol * phys_rtol, phys_tol * phys_rtol * phys_rtol);
518 hit_bdr[0] = prev_hit_bdr[0] = false;
519 }
520 // for each iteration
521 while (true)
522 {
523 MFEM_SYNC_THREAD;
524 // compute phys_coord and jacobian at the same time
525 for (int d = 0; d < SDim; ++d)
526 {
527 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] = 0;
528 }
529 for (int i = 0; i < SDim * Dim; ++i)
530 {
531 jac[MFEM_THREAD_ID(x) + i * MFEM_THREAD_SIZE(x)] = 0;
532 }
533 MFEM_SYNC_THREAD;
534 MFEM_FOREACH_THREAD(j0, x, basis1d.pN)
535 {
536 real_t b0, db0;
537 basis1d.eval_d1(b0, db0, ref_coord[0], j0);
538 for (int d = 0; d < SDim; ++d)
539 {
540 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] +=
541 mptr[j0 + (eptr[idx] * SDim + d) * basis1d.pN] * b0;
542 jac[MFEM_THREAD_ID(x) + (d + 0 * SDim) * MFEM_THREAD_SIZE(x)] +=
543 mptr[j0 + (eptr[idx] * SDim + d) * basis1d.pN] * db0;
544 }
545 }
546
547 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
548 {
549 MFEM_SYNC_THREAD;
550 int a = MFEM_THREAD_ID(x);
551 int b = a + i;
552 if (a < i && b < basis1d.pN)
553 {
554 for (int d = 0; d < SDim; ++d)
555 {
556 phys_coord[a + d * MFEM_THREAD_SIZE(x)] +=
557 phys_coord[b + d * MFEM_THREAD_SIZE(x)];
558 }
559 for (int j = 0; j < SDim * Dim; ++j)
560 {
561 jac[a + j * MFEM_THREAD_SIZE(x)] +=
562 jac[b + j * MFEM_THREAD_SIZE(x)];
563 }
564 }
565 }
566 MFEM_SYNC_THREAD;
567
568 // rest of newton solve logic is serial, have thread 0 solve for it
569 if (MFEM_THREAD_ID(x) == 0)
570 {
571 // compute objective function
572 // f(x) = 1/2 |pt - F(x)|^2
573 real_t dist = 0;
574 for (int d = 0; d < SDim; ++d)
575 {
576 real_t tmp =
577 pptr[idx + d * npts] - phys_coord[d * MFEM_THREAD_SIZE(x)];
578 phys_coord[d * MFEM_THREAD_SIZE(x)] = tmp;
579 dist += tmp * tmp;
580 }
581 // phys_coord now contains pt - F(x)
582 // check for phys_tol convergence
583 if (dist <= phys_tol)
584 {
585 // found solution
586 res[0] = eltrans::GeometryUtils<Geometry::SEGMENT>::inside(
587 ref_coord[0])
588 ? InverseElementTransformation::Inside
589 : InverseElementTransformation::Outside;
590 tptr[idx] = res[0];
591 for (int d = 0; d < Dim; ++d)
592 {
593 xptr[idx + d * npts] = ref_coord[d];
594 }
595 if (iter_ptr)
596 {
597 iter_ptr[idx] = iter + 1;
598 }
599 term_flag[0] = true;
600 }
601 else if (iter >= max_iter)
602 {
603 // terminate on max iterations
604 tptr[idx] = InverseElementTransformation::Unknown;
605 res[0] = InverseElementTransformation::Unknown;
606 // might as well save where we failed at
607 for (int d = 0; d < Dim; ++d)
608 {
609 xptr[idx + d * npts] = ref_coord[d];
610 }
611 if (iter_ptr)
612 {
613 iter_ptr[idx] = iter + 1;
614 }
615 term_flag[0] = true;
616 }
617 else
618 {
619 // compute dx = (pseudo)-inverse jac * [pt - F(x)]
620 InvTLinSolve<Dim, SDim>::solve(jac, phys_coord, dx);
621
622 hit_bdr[0] = ProjectType<Geometry::SEGMENT, SType>::project(
623 ref_coord[0], dx[0]);
624
625 // check for ref coord convergence or stagnation on boundary
626 if (hit_bdr[0])
627 {
628 if (prev_hit_bdr[0])
629 {
630 real_t dx_change = 0;
631 for (int d = 0; d < Dim; ++d)
632 {
633 real_t tmp = dx[d] - prev_dx[d];
634 dx_change += tmp * tmp;
635 }
636 if (dx_change <= ref_tol * ref_tol)
637 {
638 // stuck on the boundary
639 tptr[idx] = InverseElementTransformation::Outside;
640 res[0] = InverseElementTransformation::Outside;
641 for (int d = 0; d < Dim; ++d)
642 {
643 xptr[idx + d * npts] = ref_coord[d];
644 }
645 if (iter_ptr)
646 {
647 iter_ptr[idx] = iter + 1;
648 }
649 term_flag[0] = true;
650 }
651 }
652 }
653
654 prev_hit_bdr[0] = hit_bdr[0];
655 }
656 }
657
658 MFEM_SYNC_THREAD;
659 if (term_flag[0])
660 {
661 return res[0];
662 }
663 MFEM_FOREACH_THREAD(d, x, Dim) { prev_dx[d] = dx[d]; }
664 ++iter;
665 }
666 }
667};
668
669template <int SDim, InverseElementTransformation::SolverType SType,
670 int max_team_x>
671struct InvTNewtonSolver<Geometry::SQUARE, SDim, SType, max_team_x>
672 : public InvTNewtonSolverBase
673{
674 static int ndofs(int ndof1d) { return ndof1d * ndof1d; }
675
676 static constexpr MFEM_HOST_DEVICE int max_dof1d() { return 32; }
677
678 int MFEM_HOST_DEVICE operator()(int idx) const
679 {
680 // parallelize one team per pt
681 constexpr int Dim = 2;
682 int iter = 0;
683 MFEM_SHARED real_t ref_coord[Dim];
684 // contiguous in team_x, then SDim
685 MFEM_SHARED real_t phys_coord[SDim * max_team_x];
686 MFEM_SHARED real_t basis0[max_dof1d()];
687 MFEM_SHARED real_t dbasis0[max_dof1d()];
688 MFEM_SHARED real_t basis1[max_dof1d()];
689 MFEM_SHARED real_t dbasis1[max_dof1d()];
690 // contiguous in team_x, SDim, then Dim
691 MFEM_SHARED real_t jac[SDim * Dim * max_team_x];
692 MFEM_SHARED bool term_flag[1];
693 MFEM_SHARED int res[1];
694 MFEM_SHARED real_t dx[Dim];
695 MFEM_SHARED real_t prev_dx[Dim];
696 MFEM_SHARED bool hit_bdr[1];
697 MFEM_SHARED bool prev_hit_bdr[1];
698 real_t phys_tol = 0;
699 if (MFEM_THREAD_ID(x) == 0)
700 {
701 term_flag[0] = false;
702 res[0] = InverseElementTransformation::Unknown;
703 hit_bdr[0] = false;
704 prev_hit_bdr[0] = false;
705 for (int d = 0; d < Dim; ++d)
706 {
707 ref_coord[d] = xptr[idx + d * npts];
708 dx[d] = 0;
709 prev_dx[d] = 0;
710 }
711 for (int d = 0; d < SDim; ++d)
712 {
713 phys_tol += pptr[idx + d * npts] * pptr[idx + d * npts];
714 }
715 phys_tol = fmax(phys_rtol * phys_rtol, phys_tol * phys_rtol * phys_rtol);
716 }
717 // for each iteration
718 while (true)
719 {
720 MFEM_SYNC_THREAD;
721 // compute phys_coord and jacobian at the same time
722 for (int d = 0; d < SDim; ++d)
723 {
724 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] = 0;
725 }
726 for (int i = 0; i < SDim * Dim; ++i)
727 {
728 jac[MFEM_THREAD_ID(x) + i * MFEM_THREAD_SIZE(x)] = 0;
729 }
730 MFEM_FOREACH_THREAD(j0, x, basis1d.pN)
731 {
732 basis1d.eval_d1(basis0[j0], dbasis0[j0], ref_coord[0], j0);
733 basis1d.eval_d1(basis1[j0], dbasis1[j0], ref_coord[1], j0);
734 }
735 MFEM_SYNC_THREAD;
736 MFEM_FOREACH_THREAD(jidx, x, basis1d.pN * basis1d.pN)
737 {
738 int idcs[Dim];
739 idcs[0] = jidx % basis1d.pN;
740 idcs[1] = jidx / basis1d.pN;
741 for (int d = 0; d < SDim; ++d)
742 {
743 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] +=
744 mptr[idcs[0] +
745 (idcs[1] + (eptr[idx] * SDim + d) * basis1d.pN) *
746 basis1d.pN] *
747 basis0[idcs[0]] * basis1[idcs[1]];
748 jac[MFEM_THREAD_ID(x) + (d + 0 * SDim) * MFEM_THREAD_SIZE(x)] +=
749 mptr[idcs[0] +
750 (idcs[1] + (eptr[idx] * SDim + d) * basis1d.pN) *
751 basis1d.pN] *
752 dbasis0[idcs[0]] * basis1[idcs[1]];
753 jac[MFEM_THREAD_ID(x) + (d + 1 * SDim) * MFEM_THREAD_SIZE(x)] +=
754 mptr[idcs[0] +
755 (idcs[1] + (eptr[idx] * SDim + d) * basis1d.pN) *
756 basis1d.pN] *
757 basis0[idcs[0]] * dbasis1[idcs[1]];
758 }
759 }
760
761 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
762 {
763 MFEM_SYNC_THREAD;
764 int a = MFEM_THREAD_ID(x);
765 int b = a + i;
766 if (a < i && b < basis1d.pN * basis1d.pN)
767 {
768 for (int d = 0; d < SDim; ++d)
769 {
770 phys_coord[a + d * MFEM_THREAD_SIZE(x)] +=
771 phys_coord[b + d * MFEM_THREAD_SIZE(x)];
772 }
773 for (int j = 0; j < SDim * Dim; ++j)
774 {
775 jac[a + j * MFEM_THREAD_SIZE(x)] +=
776 jac[b + j * MFEM_THREAD_SIZE(x)];
777 }
778 }
779 }
780 MFEM_SYNC_THREAD;
781
782 // rest of newton solve logic is serial, have thread 0 solve for it
783 if (MFEM_THREAD_ID(x) == 0)
784 {
785 // compute objective function
786 // f(x) = 1/2 |pt - F(x)|^2
787 real_t dist = 0;
788 for (int d = 0; d < SDim; ++d)
789 {
790 real_t tmp =
791 pptr[idx + d * npts] - phys_coord[d * MFEM_THREAD_SIZE(x)];
792 phys_coord[d * MFEM_THREAD_SIZE(x)] = tmp;
793 dist += tmp * tmp;
794 }
795 // phys_coord now contains pt - F(x)
796 // check for phys_tol convergence
797 if (dist <= phys_tol)
798 {
799 // found solution
800 res[0] = eltrans::GeometryUtils<Geometry::SQUARE>::inside(
801 ref_coord[0], ref_coord[1])
802 ? InverseElementTransformation::Inside
803 : InverseElementTransformation::Outside;
804 tptr[idx] = res[0];
805 for (int d = 0; d < Dim; ++d)
806 {
807 xptr[idx + d * npts] = ref_coord[d];
808 }
809 if (iter_ptr)
810 {
811 iter_ptr[idx] = iter + 1;
812 }
813 term_flag[0] = true;
814 }
815 else if (iter >= max_iter)
816 {
817 // terminate on max iterations
818 tptr[idx] = InverseElementTransformation::Unknown;
819 res[0] = InverseElementTransformation::Unknown;
820 // might as well save where we failed at
821 for (int d = 0; d < Dim; ++d)
822 {
823 xptr[idx + d * npts] = ref_coord[d];
824 }
825 if (iter_ptr)
826 {
827 iter_ptr[idx] = iter + 1;
828 }
829 term_flag[0] = true;
830 }
831 else
832 {
833 // compute dx = (pseudo)-inverse jac * [pt - F(x)]
834 InvTLinSolve<Dim, SDim>::solve(jac, phys_coord, dx);
835
836 hit_bdr[0] = ProjectType<Geometry::SQUARE, SType>::project(
837 ref_coord[0], ref_coord[1], dx[0], dx[1]);
838
839 // check for ref coord convergence or stagnation on boundary
840 if (hit_bdr[0])
841 {
842 if (prev_hit_bdr[0])
843 {
844 real_t dx_change = 0;
845 for (int d = 0; d < Dim; ++d)
846 {
847 real_t tmp = dx[d] - prev_dx[d];
848 dx_change += tmp * tmp;
849 }
850 if (dx_change <= ref_tol * ref_tol)
851 {
852 // stuck on the boundary
853 tptr[idx] = InverseElementTransformation::Outside;
854 res[0] = InverseElementTransformation::Outside;
855 for (int d = 0; d < Dim; ++d)
856 {
857 xptr[idx + d * npts] = ref_coord[d];
858 }
859 if (iter_ptr)
860 {
861 iter_ptr[idx] = iter + 1;
862 }
863 term_flag[0] = true;
864 }
865 }
866 }
867
868 prev_hit_bdr[0] = hit_bdr[0];
869 }
870 }
871
872 MFEM_SYNC_THREAD;
873 if (term_flag[0])
874 {
875 return res[0];
876 }
877 MFEM_FOREACH_THREAD(d, x, Dim) { prev_dx[d] = dx[d]; }
878 MFEM_SYNC_THREAD;
879 ++iter;
880 }
881 }
882};
883
884template <int SDim, InverseElementTransformation::SolverType SType,
885 int max_team_x>
886struct InvTNewtonSolver<Geometry::CUBE, SDim, SType, max_team_x>
887 : public InvTNewtonSolverBase
888{
889 static int ndofs(int ndof1d) { return ndof1d * ndof1d * ndof1d; }
890
891 static constexpr MFEM_HOST_DEVICE int max_dof1d() { return 32; }
892
893 int MFEM_HOST_DEVICE operator()(int idx) const
894 {
895 // parallelize one team per pt
896 constexpr int Dim = 3;
897 int iter = 0;
898 MFEM_SHARED real_t ref_coord[Dim];
899 // contiguous in team_x, then SDim
900 MFEM_SHARED real_t phys_coord[SDim * max_team_x];
901 MFEM_SHARED real_t basis0[max_dof1d()];
902 MFEM_SHARED real_t dbasis0[max_dof1d()];
903 MFEM_SHARED real_t basis1[max_dof1d()];
904 MFEM_SHARED real_t dbasis1[max_dof1d()];
905 MFEM_SHARED real_t basis2[max_dof1d()];
906 MFEM_SHARED real_t dbasis2[max_dof1d()];
907 // contiguous in team_x, SDim, then Dim
908 MFEM_SHARED real_t jac[SDim * Dim * max_team_x];
909 MFEM_SHARED bool term_flag[1];
910 MFEM_SHARED int res[1];
911 MFEM_SHARED real_t dx[Dim];
912 MFEM_SHARED real_t prev_dx[Dim];
913 MFEM_SHARED bool hit_bdr[1];
914 MFEM_SHARED bool prev_hit_bdr[1];
915 real_t phys_tol = 0;
916 if (MFEM_THREAD_ID(x) == 0)
917 {
918 term_flag[0] = false;
919 res[0] = InverseElementTransformation::Unknown;
920 hit_bdr[0] = false;
921 prev_hit_bdr[0] = false;
922 for (int d = 0; d < Dim; ++d)
923 {
924 ref_coord[d] = xptr[idx + d * npts];
925 dx[d] = 0;
926 prev_dx[d] = 0;
927 }
928 for (int d = 0; d < SDim; ++d)
929 {
930 phys_tol += pptr[idx + d * npts] * pptr[idx + d * npts];
931 }
932 phys_tol = fmax(phys_rtol * phys_rtol, phys_tol * phys_rtol * phys_rtol);
933 }
934 // for each iteration
935 while (true)
936 {
937 MFEM_SYNC_THREAD;
938 // compute phys_coord and jacobian at the same time
939 for (int d = 0; d < SDim; ++d)
940 {
941 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] = 0;
942 }
943 for (int i = 0; i < SDim * Dim; ++i)
944 {
945 jac[MFEM_THREAD_ID(x) + i * MFEM_THREAD_SIZE(x)] = 0;
946 }
947 MFEM_FOREACH_THREAD(j0, x, basis1d.pN)
948 {
949 basis1d.eval_d1(basis0[j0], dbasis0[j0], ref_coord[0], j0);
950 basis1d.eval_d1(basis1[j0], dbasis1[j0], ref_coord[1], j0);
951 basis1d.eval_d1(basis2[j0], dbasis2[j0], ref_coord[2], j0);
952 }
953 MFEM_SYNC_THREAD;
954 MFEM_FOREACH_THREAD(jidx, x, basis1d.pN * basis1d.pN * basis1d.pN)
955 {
956 int idcs[Dim];
957 idcs[0] = jidx % basis1d.pN;
958 idcs[1] = jidx / basis1d.pN;
959 idcs[2] = idcs[1] / basis1d.pN;
960 idcs[1] %= basis1d.pN;
961 for (int d = 0; d < SDim; ++d)
962 {
963 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] +=
964 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
965 basis1d.pN) *
966 basis1d.pN) *
967 basis1d.pN] *
968 basis0[idcs[0]] * basis1[idcs[1]] * basis2[idcs[2]];
969 jac[MFEM_THREAD_ID(x) + (d + 0 * SDim) * MFEM_THREAD_SIZE(x)] +=
970 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
971 basis1d.pN) *
972 basis1d.pN) *
973 basis1d.pN] *
974 dbasis0[idcs[0]] * basis1[idcs[1]] * basis2[idcs[2]];
975 jac[MFEM_THREAD_ID(x) + (d + 1 * SDim) * MFEM_THREAD_SIZE(x)] +=
976 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
977 basis1d.pN) *
978 basis1d.pN) *
979 basis1d.pN] *
980 basis0[idcs[0]] * dbasis1[idcs[1]] * basis2[idcs[2]];
981 jac[MFEM_THREAD_ID(x) + (d + 2 * SDim) * MFEM_THREAD_SIZE(x)] +=
982 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
983 basis1d.pN) *
984 basis1d.pN) *
985 basis1d.pN] *
986 basis0[idcs[0]] * basis1[idcs[1]] * dbasis2[idcs[2]];
987 }
988 }
989
990 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
991 {
992 MFEM_SYNC_THREAD;
993 int a = MFEM_THREAD_ID(x);
994 int b = a + i;
995 if (a < i && b < basis1d.pN * basis1d.pN * basis1d.pN)
996 {
997 for (int d = 0; d < SDim; ++d)
998 {
999 phys_coord[a + d * MFEM_THREAD_SIZE(x)] +=
1000 phys_coord[b + d * MFEM_THREAD_SIZE(x)];
1001 }
1002 for (int j = 0; j < SDim * Dim; ++j)
1003 {
1004 jac[a + j * MFEM_THREAD_SIZE(x)] +=
1005 jac[b + j * MFEM_THREAD_SIZE(x)];
1006 }
1007 }
1008 }
1009 MFEM_SYNC_THREAD;
1010
1011 // rest of newton solve logic is serial, have thread 0 solve for it
1012 if (MFEM_THREAD_ID(x) == 0)
1013 {
1014 // compute objective function
1015 // f(x) = 1/2 |pt - F(x)|^2
1016 real_t dist = 0;
1017 for (int d = 0; d < SDim; ++d)
1018 {
1019 real_t tmp =
1020 pptr[idx + d * npts] - phys_coord[d * MFEM_THREAD_SIZE(x)];
1021 phys_coord[d * MFEM_THREAD_SIZE(x)] = tmp;
1022 dist += tmp * tmp;
1023 }
1024 // phys_coord now contains pt - F(x)
1025 // check for phys_tol convergence
1026 if (dist <= phys_tol)
1027 {
1028 // found solution
1029 res[0] = eltrans::GeometryUtils<Geometry::CUBE>::inside(
1030 ref_coord[0], ref_coord[1], ref_coord[2])
1031 ? InverseElementTransformation::Inside
1032 : InverseElementTransformation::Outside;
1033 tptr[idx] = res[0];
1034 for (int d = 0; d < Dim; ++d)
1035 {
1036 xptr[idx + d * npts] = ref_coord[d];
1037 }
1038 if (iter_ptr)
1039 {
1040 iter_ptr[idx] = iter + 1;
1041 }
1042 term_flag[0] = true;
1043 }
1044 else if (iter >= max_iter)
1045 {
1046 // terminate on max iterations
1047 tptr[idx] = InverseElementTransformation::Unknown;
1048 res[0] = InverseElementTransformation::Unknown;
1049 // might as well save where we failed at
1050 for (int d = 0; d < Dim; ++d)
1051 {
1052 xptr[idx + d * npts] = ref_coord[d];
1053 }
1054 if (iter_ptr)
1055 {
1056 iter_ptr[idx] = iter + 1;
1057 }
1058 term_flag[0] = true;
1059 }
1060 else
1061 {
1062 // compute dx = (pseudo)-inverse jac * [pt - F(x)]
1063 InvTLinSolve<Dim, SDim>::solve(jac, phys_coord, dx);
1064
1065 hit_bdr[0] = ProjectType<Geometry::CUBE, SType>::project(
1066 ref_coord[0], ref_coord[1], ref_coord[2], dx[0], dx[1],
1067 dx[2]);
1068
1069 // check for ref coord convergence or stagnation on boundary
1070 if (hit_bdr[0])
1071 {
1072 if (prev_hit_bdr[0])
1073 {
1074 real_t dx_change = 0;
1075 for (int d = 0; d < Dim; ++d)
1076 {
1077 real_t tmp = dx[d] - prev_dx[d];
1078 dx_change += tmp * tmp;
1079 }
1080 if (dx_change <= ref_tol * ref_tol)
1081 {
1082 // stuck on the boundary
1083 tptr[idx] = InverseElementTransformation::Outside;
1084 res[0] = InverseElementTransformation::Outside;
1085 for (int d = 0; d < Dim; ++d)
1086 {
1087 xptr[idx + d * npts] = ref_coord[d];
1088 }
1089 if (iter_ptr)
1090 {
1091 iter_ptr[idx] = iter + 1;
1092 }
1093 term_flag[0] = true;
1094 }
1095 }
1096 }
1097 }
1098 }
1099
1100 MFEM_SYNC_THREAD;
1101 if (term_flag[0])
1102 {
1103 return res[0];
1104 }
1105 MFEM_FOREACH_THREAD(d, x, Dim) { prev_dx[d] = dx[d]; }
1106 ++iter;
1107 }
1108 }
1109};
1110
1111// data for finding the batch transform initial guess co-located at dofs
1112struct DofFinderBase
1113{
1114 // physical space coordinates of mesh element nodes
1115 const real_t *mptr;
1116 // physical space point coordinates to find
1117 const real_t *pptr;
1118 // element indices
1119 const int *eptr;
1120 // reference space nodes to test
1121 const real_t *qptr;
1122 // initial guess results
1123 real_t *xptr;
1124 eltrans::Lagrange basis1d;
1125
1126 // number of points in pptr
1127 int npts;
1128};
1129
1130template <int Geom, int SDim, int max_team_x>
1131struct PhysDofFinder;
1132
1133template <int SDim, int max_team_x>
1134struct PhysDofFinder<Geometry::SEGMENT, SDim, max_team_x>
1135 : public DofFinderBase
1136{
1137 static int ndofs(int ndofs1d) { return ndofs1d; }
1138
1139 void MFEM_HOST_DEVICE operator()(int idx) const
1140 {
1141 constexpr int Dim = 1;
1142 // L-2 norm squared
1143 MFEM_SHARED real_t dists[max_team_x];
1144 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1145 MFEM_FOREACH_THREAD(i, x, max_team_x)
1146 {
1147#ifdef MFEM_USE_DOUBLE
1148 dists[i] = HUGE_VAL;
1149#else
1150 dists[i] = HUGE_VALF;
1151#endif
1152 }
1153 MFEM_SYNC_THREAD;
1154 // team serial portion
1155 MFEM_FOREACH_THREAD(i, x, basis1d.pN)
1156 {
1157 real_t phys_coord[SDim];
1158 for (int d = 0; d < SDim; ++d)
1159 {
1160 phys_coord[d] = mptr[i + (d + eptr[idx] * SDim) * basis1d.pN];
1161 }
1162 real_t dist = 0;
1163 // L-2 norm squared
1164 for (int d = 0; d < SDim; ++d)
1165 {
1166 real_t tmp = phys_coord[d] - pptr[idx + d * npts];
1167 dist += tmp * tmp;
1168 }
1169 if (dist < dists[MFEM_THREAD_ID(x)])
1170 {
1171 // closer guess in physical space
1172 dists[MFEM_THREAD_ID(x)] = dist;
1173 ref_buf[MFEM_THREAD_ID(x)] = basis1d.z[i];
1174 }
1175 }
1176 // now do tree reduce
1177 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1178 {
1179 MFEM_SYNC_THREAD;
1180 if (MFEM_THREAD_ID(x) < i)
1181 {
1182 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1183 {
1184 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1185 ref_buf[MFEM_THREAD_ID(x)] = ref_buf[MFEM_THREAD_ID(x) + i];
1186 }
1187 }
1188 }
1189 // write results out
1190 // not needed in 1D
1191 // MFEM_SYNC_THREAD;
1192 if (MFEM_THREAD_ID(x) == 0)
1193 {
1194 xptr[idx] = ref_buf[0];
1195 }
1196 }
1197};
1198
1199template <int SDim, int max_team_x>
1200struct PhysDofFinder<Geometry::SQUARE, SDim, max_team_x>
1201 : public DofFinderBase
1202{
1203 static int ndofs(int ndofs1d) { return ndofs1d * ndofs1d; }
1204
1205 void MFEM_HOST_DEVICE operator()(int idx) const
1206 {
1207 constexpr int Dim = 2;
1208 int n = basis1d.pN * basis1d.pN;
1209 if (n > max_team_x)
1210 {
1211 n = max_team_x;
1212 }
1213 // L-2 norm squared
1214 MFEM_SHARED real_t dists[max_team_x];
1215 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1216 MFEM_FOREACH_THREAD(i, x, max_team_x)
1217 {
1218#ifdef MFEM_USE_DOUBLE
1219 dists[i] = HUGE_VAL;
1220#else
1221 dists[i] = HUGE_VALF;
1222#endif
1223 }
1224 // team serial portion
1225 MFEM_FOREACH_THREAD(j, x, basis1d.pN * basis1d.pN)
1226 {
1227 real_t phys_coord[SDim] = {0};
1228 int idcs[Dim];
1229 idcs[0] = j % basis1d.pN;
1230 idcs[1] = j / basis1d.pN;
1231 for (int d = 0; d < SDim; ++d)
1232 {
1233 phys_coord[d] =
1234 mptr[idcs[0] + (idcs[1] + (d + eptr[idx] * SDim) * basis1d.pN) *
1235 basis1d.pN];
1236 }
1237 real_t dist = 0;
1238 // L-2 norm squared
1239 for (int d = 0; d < SDim; ++d)
1240 {
1241 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1242 dist += tmp * tmp;
1243 }
1244 if (dist < dists[MFEM_THREAD_ID(x)])
1245 {
1246 // closer guess in physical space
1247 dists[MFEM_THREAD_ID(x)] = dist;
1248 for (int d = 0; d < Dim; ++d)
1249 {
1250 ref_buf[MFEM_THREAD_ID(x) + d * n] = basis1d.z[idcs[d]];
1251 }
1252 }
1253 }
1254 // now do tree reduce
1255 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1256 {
1257 MFEM_SYNC_THREAD;
1258 if (MFEM_THREAD_ID(x) < i)
1259 {
1260 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1261 {
1262 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1263 for (int d = 0; d < Dim; ++d)
1264 {
1265 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1266 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1267 }
1268 }
1269 }
1270 }
1271 // write results out
1272 MFEM_SYNC_THREAD;
1273 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1274 }
1275};
1276
1277template <int SDim, int max_team_x>
1278struct PhysDofFinder<Geometry::CUBE, SDim, max_team_x>
1279 : public DofFinderBase
1280{
1281 static int ndofs(int ndofs1d) { return ndofs1d * ndofs1d * ndofs1d; }
1282
1283 void MFEM_HOST_DEVICE operator()(int idx) const
1284 {
1285 constexpr int Dim = 3;
1286 int n = basis1d.pN * basis1d.pN * basis1d.pN;
1287 if (n > max_team_x)
1288 {
1289 n = max_team_x;
1290 }
1291 // L-2 norm squared
1292 MFEM_SHARED real_t dists[max_team_x];
1293 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1294 MFEM_FOREACH_THREAD(i, x, max_team_x)
1295 {
1296#ifdef MFEM_USE_DOUBLE
1297 dists[i] = HUGE_VAL;
1298#else
1299 dists[i] = HUGE_VALF;
1300#endif
1301 }
1302 // team serial portion
1303 MFEM_FOREACH_THREAD(j, x, basis1d.pN * basis1d.pN * basis1d.pN)
1304 {
1305 real_t phys_coord[SDim] = {0};
1306 int idcs[Dim];
1307 idcs[0] = j % basis1d.pN;
1308 idcs[1] = j / basis1d.pN;
1309 idcs[2] = idcs[1] / basis1d.pN;
1310 idcs[1] = idcs[1] % basis1d.pN;
1311 for (int d = 0; d < SDim; ++d)
1312 {
1313 phys_coord[d] =
1314 mptr[idcs[0] + (idcs[1] + (idcs[2] + (d + eptr[idx] * SDim) *
1315 basis1d.pN) *
1316 basis1d.pN) *
1317 basis1d.pN];
1318 }
1319 real_t dist = 0;
1320 // L-2 norm squared
1321 for (int d = 0; d < SDim; ++d)
1322 {
1323 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1324 dist += tmp * tmp;
1325 }
1326 if (dist < dists[MFEM_THREAD_ID(x)])
1327 {
1328 // closer guess in physical space
1329 dists[MFEM_THREAD_ID(x)] = dist;
1330 for (int d = 0; d < Dim; ++d)
1331 {
1332 ref_buf[MFEM_THREAD_ID(x) + d * n] = basis1d.z[idcs[d]];
1333 }
1334 }
1335 }
1336 // now do tree reduce
1337 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1338 {
1339 MFEM_SYNC_THREAD;
1340 if (MFEM_THREAD_ID(x) < i)
1341 {
1342 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1343 {
1344 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1345 for (int d = 0; d < Dim; ++d)
1346 {
1347 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1348 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1349 }
1350 }
1351 }
1352 }
1353 // write results out
1354 MFEM_SYNC_THREAD;
1355 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1356 }
1357};
1358
1359// data for finding the batch transform initial guess
1360struct NodeFinderBase
1361{
1362 // physical space coordinates of mesh element nodes
1363 const real_t *mptr;
1364 // physical space point coordinates to find
1365 const real_t *pptr;
1366 // element indices
1367 const int *eptr;
1368 // reference space nodes to test
1369 const real_t *qptr;
1370 // initial guess results
1371 real_t *xptr;
1372 eltrans::Lagrange basis1d;
1373
1374 // number of points in pptr
1375 int npts;
1376 // number of points per element along each dimension to test
1377 int nq1d;
1378 // total number of points to test
1379 int nq;
1380};
1381
1382template <int Geom, int SDim, int max_team_x, int max_q1d>
1383struct PhysNodeFinder;
1384
1385template <int SDim, int max_team_x, int max_q1d>
1386struct PhysNodeFinder<Geometry::SEGMENT, SDim, max_team_x, max_q1d>
1387 : public NodeFinderBase
1388{
1389
1390 static int compute_nq(int nq1d) { return nq1d; }
1391
1392 static int ndofs(int ndof1d) { return ndof1d; }
1393
1394 void MFEM_HOST_DEVICE operator()(int idx) const
1395 {
1396 constexpr int Dim = 1;
1397 // L-2 norm squared
1398 MFEM_SHARED real_t dists[max_team_x];
1399 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1400 MFEM_FOREACH_THREAD(i, x, max_team_x)
1401 {
1402#ifdef MFEM_USE_DOUBLE
1403 dists[i] = HUGE_VAL;
1404#else
1405 dists[i] = HUGE_VALF;
1406#endif
1407 }
1408 MFEM_SYNC_THREAD;
1409 // team serial portion
1410 MFEM_FOREACH_THREAD(i, x, nq)
1411 {
1412 real_t phys_coord[SDim] = {0};
1413 for (int j0 = 0; j0 < basis1d.pN; ++j0)
1414 {
1415 real_t b = basis1d.eval(qptr[i], j0);
1416 for (int d = 0; d < SDim; ++d)
1417 {
1418 phys_coord[d] +=
1419 mptr[j0 + (d + eptr[idx] * SDim) * basis1d.pN] * b;
1420 }
1421 }
1422 real_t dist = 0;
1423 // L-2 norm squared
1424 for (int d = 0; d < SDim; ++d)
1425 {
1426 real_t tmp = phys_coord[d] - pptr[idx + d * npts];
1427 dist += tmp * tmp;
1428 }
1429 if (dist < dists[MFEM_THREAD_ID(x)])
1430 {
1431 // closer guess in physical space
1432 dists[MFEM_THREAD_ID(x)] = dist;
1433 ref_buf[MFEM_THREAD_ID(x)] = qptr[i];
1434 }
1435 }
1436 // now do tree reduce
1437 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1438 {
1439 MFEM_SYNC_THREAD;
1440 if (MFEM_THREAD_ID(x) < i)
1441 {
1442 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1443 {
1444 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1445 ref_buf[MFEM_THREAD_ID(x)] = ref_buf[MFEM_THREAD_ID(x) + i];
1446 }
1447 }
1448 }
1449 // write results out
1450 // not needed in 1D
1451 // MFEM_SYNC_THREAD;
1452 if (MFEM_THREAD_ID(x) == 0)
1453 {
1454 xptr[idx] = ref_buf[0];
1455 }
1456 }
1457};
1458
1459template <int SDim, int max_team_x, int max_q1d>
1460struct PhysNodeFinder<Geometry::SQUARE, SDim, max_team_x, max_q1d>
1461 : public NodeFinderBase
1462{
1463
1464 static int compute_nq(int nq1d) { return nq1d * nq1d; }
1465
1466 static int ndofs(int ndof1d)
1467 {
1468 return ndof1d * ndof1d;
1469 }
1470
1471 void MFEM_HOST_DEVICE operator()(int idx) const
1472 {
1473 constexpr int Dim = 2;
1474 constexpr int max_dof1d = 32;
1475 int n = (nq < max_team_x) ? nq : max_team_x;
1476 // L-2 norm squared
1477 MFEM_SHARED real_t dists[max_team_x];
1478 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1479 MFEM_SHARED real_t basis[max_dof1d * max_q1d];
1480 MFEM_FOREACH_THREAD(i, x, max_team_x)
1481 {
1482#ifdef MFEM_USE_DOUBLE
1483 dists[i] = HUGE_VAL;
1484#else
1485 dists[i] = HUGE_VALF;
1486#endif
1487 }
1488 MFEM_FOREACH_THREAD(j0, x, nq1d)
1489 {
1490 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1491 {
1492 basis[j0 + i0 * nq1d] = basis1d.eval(qptr[j0], i0);
1493 }
1494 }
1495 MFEM_SYNC_THREAD;
1496 // team serial portion
1497 MFEM_FOREACH_THREAD(j, x, nq)
1498 {
1499 real_t phys_coord[SDim] = {0};
1500 int idcs[Dim];
1501 idcs[0] = j % nq1d;
1502 idcs[1] = j / nq1d;
1503 for (int i1 = 0; i1 < basis1d.pN; ++i1)
1504 {
1505 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1506 {
1507 real_t b = basis[idcs[0] + i0 * nq1d] * basis[idcs[1] + i1 * nq1d];
1508 for (int d = 0; d < SDim; ++d)
1509 {
1510 phys_coord[d] +=
1511 mptr[i0 + (i1 + (d + eptr[idx] * SDim) * basis1d.pN) *
1512 basis1d.pN] *
1513 b;
1514 }
1515 }
1516 }
1517 real_t dist = 0;
1518 // L-2 norm squared
1519 for (int d = 0; d < SDim; ++d)
1520 {
1521 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1522 dist += tmp * tmp;
1523 }
1524 if (dist < dists[MFEM_THREAD_ID(x)])
1525 {
1526 // closer guess in physical space
1527 dists[MFEM_THREAD_ID(x)] = dist;
1528 for (int d = 0; d < Dim; ++d)
1529 {
1530 ref_buf[MFEM_THREAD_ID(x) + d * n] = qptr[idcs[d]];
1531 }
1532 }
1533 }
1534 // now do tree reduce
1535 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1536 {
1537 MFEM_SYNC_THREAD;
1538 if (MFEM_THREAD_ID(x) < i)
1539 {
1540 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1541 {
1542 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1543 for (int d = 0; d < Dim; ++d)
1544 {
1545 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1546 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1547 }
1548 }
1549 }
1550 }
1551 // write results out
1552 MFEM_SYNC_THREAD;
1553 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1554 }
1555};
1556
1557template <int SDim, int max_team_x, int max_q1d>
1558struct PhysNodeFinder<Geometry::CUBE, SDim, max_team_x, max_q1d>
1559 : public NodeFinderBase
1560{
1561
1562 static int compute_nq(int nq1d) { return nq1d * nq1d * nq1d; }
1563
1564 static int ndofs(int ndof1d)
1565 {
1566 return ndof1d * ndof1d * ndof1d;
1567 }
1568
1569 void MFEM_HOST_DEVICE operator()(int idx) const
1570 {
1571 constexpr int Dim = 3;
1572 constexpr int max_dof1d = 32;
1573 int n = (nq < max_team_x) ? nq : max_team_x;
1574 // L-2 norm squared
1575 MFEM_SHARED real_t dists[max_team_x];
1576 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1577 // contiguous in quad
1578 MFEM_SHARED real_t basis[max_dof1d * max_q1d];
1579 MFEM_FOREACH_THREAD(i, x, max_team_x)
1580 {
1581#ifdef MFEM_USE_DOUBLE
1582 dists[i] = HUGE_VAL;
1583#else
1584 dists[i] = HUGE_VALF;
1585#endif
1586 }
1587 MFEM_FOREACH_THREAD(j0, x, nq1d)
1588 {
1589 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1590 {
1591 basis[j0 + i0 * nq1d] = basis1d.eval(qptr[j0], i0);
1592 }
1593 }
1594 MFEM_SYNC_THREAD;
1595 // team serial portion
1596 MFEM_FOREACH_THREAD(j, x, nq)
1597 {
1598 real_t phys_coord[SDim] = {0};
1599 int idcs[Dim];
1600 idcs[0] = j % nq1d;
1601 idcs[1] = j / nq1d;
1602 idcs[2] = idcs[1] / nq1d;
1603 idcs[1] = idcs[1] % nq1d;
1604 for (int i2 = 0; i2 < basis1d.pN; ++i2)
1605 {
1606 for (int i1 = 0; i1 < basis1d.pN; ++i1)
1607 {
1608 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1609 {
1610 real_t b = basis[idcs[0] + i0 * nq1d] * basis[idcs[1] + i1 * nq1d] *
1611 basis[idcs[2] + i2 * nq1d];
1612 for (int d = 0; d < SDim; ++d)
1613 {
1614 phys_coord[d] +=
1615 mptr[i0 +
1616 (i1 + (i2 + (d + eptr[idx] * SDim) * basis1d.pN) *
1617 basis1d.pN) *
1618 basis1d.pN] *
1619 b;
1620 }
1621 }
1622 }
1623 }
1624 real_t dist = 0;
1625 // L-2 norm squared
1626 for (int d = 0; d < SDim; ++d)
1627 {
1628 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1629 dist += tmp * tmp;
1630 }
1631 if (dist < dists[MFEM_THREAD_ID(x)])
1632 {
1633 // closer guess in physical space
1634 dists[MFEM_THREAD_ID(x)] = dist;
1635 for (int d = 0; d < Dim; ++d)
1636 {
1637 ref_buf[MFEM_THREAD_ID(x) + d * n] = qptr[idcs[d]];
1638 }
1639 }
1640 }
1641 // now do tree reduce
1642 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1643 {
1644 MFEM_SYNC_THREAD;
1645 if (MFEM_THREAD_ID(x) < i)
1646 {
1647 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1648 {
1649 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1650 for (int d = 0; d < Dim; ++d)
1651 {
1652 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1653 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1654 }
1655 }
1656 }
1657 }
1658 // write results out
1659 MFEM_SYNC_THREAD;
1660 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1661 }
1662};
1663
1664template <int Geom, int SDim, bool use_device>
1665static void ClosestPhysNodeImpl(int npts, int nelems, int ndof1d, int nq1d,
1666 const real_t *mptr, const real_t *pptr,
1667 const int *eptr, const real_t *nptr,
1668 const real_t *qptr, real_t *xptr)
1669{
1670 constexpr int max_team_x = 64;
1671 constexpr int max_q1d = 128;
1672 PhysNodeFinder<Geom, SDim, max_team_x, max_q1d> func;
1673 MFEM_VERIFY(ndof1d <= 32, "maximum of 32 dofs per dim is allowed");
1674 MFEM_VERIFY(nq1d <= max_q1d, "maximum of 128 test points per dim is allowed");
1675 func.basis1d.z = nptr;
1676 func.basis1d.pN = ndof1d;
1677 func.mptr = mptr;
1678 func.pptr = pptr;
1679 func.eptr = eptr;
1680 func.qptr = qptr;
1681 func.xptr = xptr;
1682 func.npts = npts;
1683 func.nq1d = nq1d;
1684 func.nq = func.compute_nq(nq1d);
1685 if (use_device)
1686 {
1687 // team_x must be a power of 2
1688 int team_x = max_team_x;
1689 while (true)
1690 {
1691 if (team_x <= func.nq)
1692 {
1693 break;
1694 }
1695 team_x >>= 1;
1696 }
1697 team_x = std::min<int>(max_team_x, 2 * team_x);
1698 forall_2D(npts, team_x, 1, func);
1699 }
1700 else
1701 {
1702 forall_switch(false, npts, func);
1703 }
1704}
1705
1706template <int Geom, int SDim, bool use_device>
1707static void ClosestPhysDofImpl(int npts, int nelems, int ndof1d,
1708 const real_t *mptr, const real_t *pptr,
1709 const int *eptr, const real_t *nptr,
1710 real_t *xptr)
1711{
1712 constexpr int max_team_x = 64;
1713 PhysDofFinder<Geom, SDim, max_team_x> func;
1714 func.basis1d.z = nptr;
1715 func.basis1d.pN = ndof1d;
1716 func.mptr = mptr;
1717 func.pptr = pptr;
1718 func.eptr = eptr;
1719 func.xptr = xptr;
1720 func.npts = npts;
1721 if (use_device)
1722 {
1723 int team_x = max_team_x;
1724 int ndof = func.ndofs(ndof1d);
1725 while (true)
1726 {
1727 if (team_x <= ndof)
1728 {
1729 break;
1730 }
1731 team_x >>= 1;
1732 }
1733 team_x = std::min<int>(max_team_x, 2 * team_x);
1734 forall_2D(npts, team_x, 1, func);
1735 }
1736 else
1737 {
1738 forall_switch(false, npts, func);
1739 }
1740}
1741
1742template <int Geom, int SDim, bool use_device>
1743static void ClosestRefDofImpl(int npts, int nelems, int ndof1d,
1744 const real_t *mptr, const real_t *pptr,
1745 const int *eptr, const real_t *nptr,
1746 real_t *xptr)
1747{
1748 // TODO
1749 MFEM_ABORT("ClostestRefDofImpl not implemented yet");
1750}
1751
1752template <int Geom, int SDim, bool use_device>
1753static void ClosestRefNodeImpl(int npts, int nelems, int ndof1d, int nq1d,
1754 const real_t *mptr, const real_t *pptr,
1755 const int *eptr, const real_t *nptr,
1756 const real_t *qptr, real_t *xptr)
1757{
1758 // TODO
1759 MFEM_ABORT("ClostestRefNodeImpl not implemented yet");
1760}
1761
1762template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1763 bool use_device>
1764static void NewtonSolveImpl(real_t ref_tol, real_t phys_rtol, int max_iter,
1765 int npts, int nelems, int ndof1d,
1766 const real_t *mptr, const real_t *pptr,
1767 const int *eptr, const real_t *nptr, int *tptr,
1768 int *iter_ptr, real_t *xptr)
1769{
1770 constexpr int max_team_x = use_device ? 64 : 1;
1771 InvTNewtonSolver<Geom, SDim, SType, max_team_x> func;
1772 MFEM_VERIFY(ndof1d <= func.max_dof1d(),
1773 "exceeded max_dof1d limit (32 for 2D/3D)");
1774 func.ref_tol = ref_tol;
1775 func.phys_rtol = phys_rtol;
1776 func.max_iter = max_iter;
1777 func.basis1d.z = nptr;
1778 func.basis1d.pN = ndof1d;
1779 func.mptr = mptr;
1780 func.pptr = pptr;
1781 func.eptr = eptr;
1782 func.xptr = xptr;
1783 func.iter_ptr = iter_ptr;
1784 func.tptr = tptr;
1785 func.npts = npts;
1786 if (use_device)
1787 {
1788 int team_x = max_team_x;
1789 int ndof = func.ndofs(ndof1d);
1790 while (true)
1791 {
1792 if (team_x <= ndof)
1793 {
1794 break;
1795 }
1796 team_x >>= 1;
1797 }
1798 team_x = std::min<int>(max_team_x, 2 * team_x);
1799 forall_2D(npts, team_x, 1, func);
1800 }
1801 else
1802 {
1803 forall_switch(false, npts, func);
1804 }
1805}
1806
1807template <int Geom, int SDim,
1808 InverseElementTransformation::SolverType SolverType, int max_team_x>
1809struct InvTNewtonEdgeScanner
1810{
1811 InvTNewtonSolver<Geom, SDim, SolverType, max_team_x> solver;
1812 // 1D ref space initial guesses
1813 const real_t *qptr;
1814 // num 1D points in qptr
1815 int nq1d;
1816
1817 void MFEM_HOST_DEVICE operator()(int idx) const
1818 {
1819 // can only be outside if all test points report outside
1820 int res = InverseElementTransformation::Outside;
1821 for (int i = 0; i < nq1d; ++i)
1822 {
1823 for (int d = 0; d < eltrans::GeometryUtils<Geom>::Dimension();
1824 ++d)
1825 {
1826 if (MFEM_THREAD_ID(x) == 0)
1827 {
1828 for (int d2 = 0;
1829 d2 < eltrans::GeometryUtils<Geom>::Dimension();
1830 ++d2)
1831 {
1832 solver.xptr[idx + d2 * solver.npts] = 0;
1833 }
1834 solver.xptr[idx + d * solver.npts] = qptr[i];
1835 }
1836 MFEM_SYNC_THREAD;
1837 int res_tmp = solver(idx);
1838 switch (res_tmp)
1839 {
1840 case InverseElementTransformation::Inside:
1841 return;
1842 case InverseElementTransformation::Outside:
1843 break;
1844 case InverseElementTransformation::Unknown:
1845 res = InverseElementTransformation::Unknown;
1846 break;
1847 }
1848 if (qptr[i] == 0)
1849 {
1850 // don't repeat test the origin
1851 break;
1852 }
1853 }
1854 }
1855 if (MFEM_THREAD_ID(x) == 0)
1856 {
1857 solver.tptr[idx] = res;
1858 }
1859 }
1860};
1861
1862template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1863 bool use_device>
1864static void NewtonEdgeScanImpl(real_t ref_tol, real_t phys_rtol, int max_iter,
1865 int npts, int nelems, int ndof1d,
1866 const real_t *mptr, const real_t *pptr,
1867 const int *eptr, const real_t *nptr,
1868 const real_t *qptr, int nq1d, int *tptr,
1869 int *iter_ptr, real_t *xptr)
1870{
1871 constexpr int max_team_x = use_device ? 64 : 1;
1872 InvTNewtonEdgeScanner<Geom, SDim, SType, max_team_x> func;
1873 MFEM_VERIFY(ndof1d <= func.solver.max_dof1d(),
1874 "exceeded max_dof1d limit (32 for 2D/3D)");
1875 func.solver.ref_tol = ref_tol;
1876 func.solver.phys_rtol = phys_rtol;
1877 func.solver.max_iter = max_iter;
1878 func.solver.basis1d.z = nptr;
1879 func.solver.basis1d.pN = ndof1d;
1880 func.solver.mptr = mptr;
1881 func.solver.pptr = pptr;
1882 func.solver.eptr = eptr;
1883 func.solver.xptr = xptr;
1884 func.solver.iter_ptr = iter_ptr;
1885 func.solver.tptr = tptr;
1886 func.solver.npts = npts;
1887 func.nq1d = nq1d;
1888 func.qptr = qptr;
1889 if (use_device)
1890 {
1891 int team_x = max_team_x;
1892 int ndof = func.solver.ndofs(ndof1d);
1893 while (true)
1894 {
1895 if (team_x <= ndof)
1896 {
1897 break;
1898 }
1899 team_x >>= 1;
1900 }
1901 team_x = std::min<int>(max_team_x, 2 * team_x);
1902 forall_2D(npts, team_x, 1, func);
1903 }
1904 else
1905 {
1906 forall_switch(false, npts, func);
1907 }
1908}
1909
1910} // namespace internal
1911
1912template <int Geom, int SDim, bool use_device>
1914BatchInverseElementTransformation::FindClosestPhysPoint::Kernel()
1915{
1916 return internal::ClosestPhysNodeImpl<Geom, SDim, use_device>;
1917}
1918
1919template <int Geom, int SDim, bool use_device>
1921BatchInverseElementTransformation::FindClosestPhysDof::Kernel()
1922{
1923 return internal::ClosestPhysDofImpl<Geom, SDim, use_device>;
1924}
1925
1926template <int Geom, int SDim, bool use_device>
1928BatchInverseElementTransformation::FindClosestRefPoint::Kernel()
1929{
1930 return internal::ClosestRefNodeImpl<Geom, SDim, use_device>;
1931}
1932
1933template <int Geom, int SDim, bool use_device>
1935BatchInverseElementTransformation::FindClosestRefDof::Kernel()
1936{
1937 return internal::ClosestRefDofImpl<Geom, SDim, use_device>;
1938}
1939
1940template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1941 bool use_device>
1943BatchInverseElementTransformation::NewtonSolve::Kernel()
1944{
1945 return internal::NewtonSolveImpl<Geom, SDim, SType, use_device>;
1946}
1947
1948template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1949 bool use_device>
1951BatchInverseElementTransformation::NewtonEdgeScan::Kernel()
1952{
1953 return internal::NewtonEdgeScanImpl<Geom, SDim, SType, use_device>;
1954}
1955
1957BatchInverseElementTransformation::FindClosestPhysPoint::Fallback(int, int,
1958 bool)
1959{
1960 MFEM_ABORT("Invalid Geom/SDim combination");
1961}
1962
1964BatchInverseElementTransformation::FindClosestRefPoint::Fallback(int, int,
1965 bool)
1966{
1967 MFEM_ABORT("Invalid Geom/SDim combination");
1968}
1969
1971BatchInverseElementTransformation::NewtonSolve::Fallback(
1973{
1974 MFEM_ABORT("Invalid Geom/SDim/SolverType combination");
1975}
1976
1978BatchInverseElementTransformation::NewtonEdgeScan::Fallback(
1980{
1981 MFEM_ABORT("Invalid Geom/SDim/SolverType combination");
1982}
1983
1985BatchInverseElementTransformation::FindClosestPhysDof::Fallback(int, int, bool)
1986{
1987 MFEM_ABORT("Invalid Geom/SDim combination");
1988}
1989
1991BatchInverseElementTransformation::FindClosestRefDof::Fallback(int, int, bool)
1992{
1993 MFEM_ABORT("Invalid Geom/SDim combination");
1994}
1995
1996BatchInverseElementTransformation::Kernels::Kernels()
1997{
1998 using BatchInvTr = BatchInverseElementTransformation;
1999
2000 constexpr auto SEGMENT = Geometry::SEGMENT;
2001 constexpr auto SQUARE = Geometry::SQUARE;
2002 constexpr auto CUBE = Geometry::CUBE;
2003 constexpr auto Newton = InverseElementTransformation::Newton;
2004 constexpr auto NewtonElementProject =
2006
2007 BatchInvTr::AddFindClosestSpecialization<SEGMENT, 1>();
2008 BatchInvTr::AddFindClosestSpecialization<SEGMENT, 2>();
2009 BatchInvTr::AddFindClosestSpecialization<SEGMENT, 3>();
2010
2011 BatchInvTr::AddFindClosestSpecialization<SQUARE, 2>();
2012 BatchInvTr::AddFindClosestSpecialization<SQUARE, 3>();
2013
2014 BatchInvTr::AddFindClosestSpecialization<CUBE, 3>();
2015
2016 // NewtonSolve
2017 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 1, Newton>();
2018 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 2, Newton>();
2019 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 3, Newton>();
2020
2021 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 1, NewtonElementProject>();
2022 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 2, NewtonElementProject>();
2023 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 3, NewtonElementProject>();
2024
2025 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 2, Newton>();
2026 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 3, Newton>();
2027 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 2, NewtonElementProject>();
2028 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 3, NewtonElementProject>();
2029
2030 BatchInvTr::AddNewtonSolveSpecialization<CUBE, 3, Newton>();
2031 BatchInvTr::AddNewtonSolveSpecialization<CUBE, 3, NewtonElementProject>();
2032}
2033
2034/// \endcond DO_NOT_DOCUMENT
2035
2036} // namespace mfem
const T * HostRead() const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), false).
Definition array.hpp:385
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:840
int Size() const
Return the logical size of the array.
Definition array.hpp:166
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:389
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:381
static int Check(int b_type)
If the input does not represents a valid BasisType, abort with an error; otherwise return the input.
Definition fe_base.hpp:49
@ GaussLobatto
Closed type.
Definition fe_base.hpp:36
@ ClosedUniform
Nodes: x_i = i/(n-1), i=0,...,n-1.
Definition fe_base.hpp:39
static int GetNodalBasis(int qpt_type)
Return the nodal BasisType corresponding to the Quadrature1D type.
Definition fe_base.hpp:82
Performs batch inverse element transforms. Currently only supports non-mixed meshes with SEGMENT,...
Definition eltrans.hpp:406
void UpdateNodes(const GridFunction &nodes, MemoryType d_mt=MemoryType::DEFAULT)
Updates internal datastructures if nodes change. Some version of UpdateNodes must be called at least ...
void(*)(real_t, real_t, int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, const real_t *, int, int *, int *, real_t *) NewtonEdgeScanKernelType
Definition eltrans.hpp:591
void(*)(int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, const real_t *, real_t *) ClosestRefPointKernelType
Definition eltrans.hpp:572
void(*)(int, int, int, const real_t *, const real_t *, const int *, const real_t *, real_t *) ClosestRefDofKernelType
Definition eltrans.hpp:564
void Transform(const Vector &pts, const Array< int > &elems, Array< int > &types, Vector &refs, bool use_device=true, Array< int > *iters=nullptr) const
Performs a batch request of a set of points belonging to the given elements. pts list of physical poi...
void(*)(int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, const real_t *, real_t *) ClosestPhysPointKernelType
Definition eltrans.hpp:546
void(*)(int, int, int, const real_t *, const real_t *, const int *, const real_t *, real_t *) ClosestPhysDofKernelType
Definition eltrans.hpp:555
void(*)(real_t, real_t, int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, int *, int *, real_t *) NewtonKernelType
Definition eltrans.hpp:581
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:262
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:277
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:208
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition fespace.hpp:678
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
Definition fespace.cpp:3824
int GetNE() const
Returns number of elements in the mesh.
Definition fespace.hpp:869
const ElementRestrictionOperator * GetElementRestriction(ElementDofOrdering e_ordering) const
Return an Operator that converts L-vectors to E-vectors.
Definition fespace.cpp:1480
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:644
int GetVDim() const
Returns the vector dimension of the finite element space.
Definition fespace.hpp:819
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Definition fespace.cpp:3860
virtual int GetMaxElementOrder() const
Return the maximum polynomial order over all elements.
Definition fespace.hpp:674
Abstract class for all finite elements.
Definition fe_base.hpp:247
int GetOrder() const
Returns the order of the finite element. In the case of anisotropic orders, returns the maximum order...
Definition fe_base.hpp:341
int GetDim() const
Returns the reference space dimension for the finite element.
Definition fe_base.hpp:324
Geometry::Type GetGeomType() const
Returns the Geometry::Type of the reference element.
Definition fe_base.hpp:334
const IntegrationPoint & GetCenter(int GeomType) const
Return the center of the given Geometry::Type, GeomType.
Definition geom.hpp:75
Class for grid function - Vector with associated FE space.
Definition gridfunc.hpp:32
FiniteElementSpace * FESpace()
virtual void GetVectorValue(int i, const IntegrationPoint &ip, Vector &val) const
Definition gridfunc.cpp:474
Class for integration point with weight.
Definition intrules.hpp:35
@ GivenPoint
Use a specific point, set with SetInitialGuess().
Definition eltrans.hpp:214
@ Center
Use the center of the reference element.
Definition eltrans.hpp:205
Mesh data type.
Definition mesh.hpp:65
int Dimension() const
Dimension of the reference space used within the elements.
Definition mesh.hpp:1306
void GetNodes(Vector &node_coord) const
Definition mesh.cpp:9627
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Definition mesh.cpp:7558
Abstract operator.
Definition operator.hpp:25
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
const Array< real_t > * GetPointsArray(const int p, const int btype)
Get the coordinates of the points of the given BasisType, btype.
Definition fe_base.cpp:2367
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:520
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:536
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:584
virtual real_t * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:532
1D Lagrange basis from [0, 1]
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
mfem::real_t real_t
Geometry Geometries
Definition fe.cpp:49
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:925
float real_t
Definition config.hpp:46
MemoryType
Memory types supported by MFEM.
Poly_1D poly1d
Definition fe.cpp:28
void forall_switch(bool use_dev, int N, lambda &&body)
Definition forall.hpp:919
void solve(Mesh &mesh, GridFunction &color, socketstream &sock)
Definition rubik.cpp:4939
@ DEVICE_MASK
Biwise-OR of all device backends.
Definition device.hpp:99
void pts(int iphi, int t, real_t x[])