MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
batchitrans.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
14#include "fem.hpp"
15
16#include "eltrans.hpp"
17
18#include "../general/forall.hpp"
19
20#include <cmath>
21
22namespace mfem
23{
34
38
40
42 MemoryType d_mt)
43{
44 UpdateNodes(*mesh.GetNodes(), d_mt);
45}
46
48 MemoryType d_mt)
49{
50 MemoryType my_d_mt =
52
53 gf_ = &gf;
54 const FiniteElementSpace *fespace = gf.FESpace();
55 const int max_order = fespace->GetMaxElementOrder();
56 const int ndof1d = max_order + 1;
57 int ND = ndof1d;
58 const int dim = fespace->GetMesh()->Dimension();
59 MFEM_VERIFY(fespace->GetMesh()->GetNumGeometries(dim) <= 1,
60 "Mixed meshes are not supported.");
61 for (int d = 1; d < dim; ++d)
62 {
63 ND *= ndof1d;
64 }
65 const int vdim = fespace->GetVDim();
66 const int NE = fespace->GetNE();
67 node_pos.SetSize(vdim * ND * NE, my_d_mt);
68
69 const FiniteElement *fe = fespace->GetTypicalFE();
70 const TensorBasisElement *tfe = dynamic_cast<const TensorBasisElement *>(fe);
71 if (fespace->IsVariableOrder() || tfe == nullptr)
72 {
73 MFEM_VERIFY(fe->GetGeomType() == Geometry::SEGMENT ||
76 "unsupported geometry type");
77 // project onto GLL nodes
78 basis_type = BasisType::GaussLobatto;
79 points1d = poly1d.GetPointsArray(max_order, BasisType::GaussLobatto);
80 points1d->HostRead();
81 // either mixed order, or not a tensor basis
82 node_pos.HostWrite();
83 real_t tmp[3];
84 int sdim = vdim;
85 Vector pos(tmp, sdim);
87 int idcs[3];
88 for (int e = 0; e < NE; ++e)
89 {
90 fe = fespace->GetFE(e);
91 for (int i = 0; i < ND; ++i)
92 {
93 idcs[0] = i % ndof1d;
94 idcs[1] = i / ndof1d;
95 idcs[2] = idcs[1] / ndof1d;
96 idcs[1] = idcs[1] % ndof1d;
97 ip.x = (*points1d)[idcs[0]];
98 ip.y = (*points1d)[idcs[1]];
99 ip.z = (*points1d)[idcs[2]];
100 gf.GetVectorValue(e, ip, pos);
101 for (int d = 0; d < sdim; ++d)
102 {
103 node_pos[i + (d + e * sdim) * ND] = pos[d];
104 }
105 }
106 }
107 }
108 else
109 {
110 const Operator *elem_restr =
112 elem_restr->Mult(gf, node_pos);
113 basis_type = tfe->GetBasisType();
114 points1d = poly1d.GetPointsArray(max_order, basis_type);
115 }
116}
117
119 const Array<int> &elems,
120 Array<int> &types,
121 Vector &refs, bool use_device,
122 Array<int> *iters) const
123{
125 {
126 // no devices available
127 use_device = false;
128 }
129 const FiniteElementSpace *fespace = gf_->FESpace();
130 const FiniteElement *fe = fespace->GetTypicalFE();
131 const int dim = fe->GetDim();
132 const int vdim = fespace->GetVDim();
133 const int NE = fespace->GetNE();
134 const int order = fe->GetOrder();
135 int npts = elems.Size();
136 auto geom = fe->GetGeomType();
137
138 types.SetSize(npts);
139 refs.SetSize(npts * dim);
140
141 auto pptr = pts.Read(use_device);
142 auto eptr = elems.Read(use_device);
143 auto mptr = node_pos.Read(use_device);
144 auto tptr = types.Write(use_device);
145 auto xptr = refs.ReadWrite(use_device);
146 int* iter_ptr = nullptr;
147 if (iters != nullptr)
148 {
149 iters->SetSize(npts);
150 iter_ptr = iters->Write(use_device);
151 }
152 auto nptr = points1d->Read(use_device);
153 int ndof1d = points1d->Size();
154
155 switch (init_guess_type)
156 {
158 {
159 real_t cx, cy, cz;
160 auto ip0 = Geometries.GetCenter(geom);
161 cx = ip0.x;
162 cy = ip0.y;
163 cz = ip0.z;
164 switch (dim)
165 {
166 case 1:
167 forall_switch(use_device, npts,
168 [=] MFEM_HOST_DEVICE(int i) { xptr[i] = cx; });
169 break;
170 case 2:
171 forall_switch(use_device, npts, [=] MFEM_HOST_DEVICE(int i)
172 {
173 xptr[i] = cx;
174 xptr[i + npts] = cy;
175 });
176 break;
177 case 3:
178 forall_switch(use_device, npts, [=] MFEM_HOST_DEVICE(int i)
179 {
180 xptr[i] = cx;
181 xptr[i + npts] = cy;
182 xptr[i + 2 * npts] = cz;
183 });
184 break;
185 }
186 } break;
189 {
190 int nq1d = (qpts_order >= 0 ? qpts_order
191 : std::max(order + rel_qpts_order, 0)) +
192 1;
193 int btype = BasisType::GetNodalBasis(guess_points_type);
194
195 if ((btype == BasisType::Invalid || btype == basis_type) &&
196 nq1d == ndof1d)
197 {
198 // special case: test points are basis nodal points
199 if (init_guess_type ==
201 {
202 FindClosestPhysDof::Run(geom, vdim, use_device, npts, NE, ndof1d,
203 mptr, pptr, eptr, nptr, xptr);
204 }
205 else
206 {
207 FindClosestRefDof::Run(geom, vdim, use_device, npts, NE, ndof1d, mptr,
208 pptr, eptr, nptr, xptr);
209 }
210 }
211 else
212 {
213 BasisType::Check(btype);
214 auto qpoints = poly1d.GetPointsArray(nq1d - 1, btype);
215 auto qptr = qpoints->Read(use_device);
216 if (init_guess_type ==
218 {
219 FindClosestPhysPoint::Run(geom, vdim, use_device, npts, NE, ndof1d,
220 nq1d, mptr, pptr, eptr, nptr, qptr,
221 xptr);
222 }
223 else
224 {
225 FindClosestRefPoint::Run(geom, vdim, use_device, npts, NE, ndof1d,
226 nq1d, mptr, pptr, eptr, nptr, qptr, xptr);
227 }
228 }
229 } break;
231 // nothing to do here
232 break;
234 {
235 int nq1d = (qpts_order >= 0 ? qpts_order
236 : std::max(order + rel_qpts_order, 0)) +
237 1;
238 int btype = BasisType::GetNodalBasis(guess_points_type);
239 if (btype == BasisType::Invalid)
240 {
241 // default to closed uniform points
243 }
244 auto qpoints = poly1d.GetPointsArray(nq1d - 1, btype);
245 auto qptr = qpoints->Read(use_device);
246 NewtonEdgeScan::Run(geom, vdim, solver_type, use_device, ref_tol,
247 phys_rtol, max_iter, npts, NE, ndof1d, mptr, pptr,
248 eptr, nptr, qptr, nq1d, tptr, iter_ptr,
249 xptr);
250 }
251 return;
252 }
253 // general case: for each point, use guess inside refs
254 NewtonSolve::Run(geom, vdim, solver_type, use_device, ref_tol, phys_rtol,
255 max_iter, npts, NE, ndof1d, mptr, pptr, eptr, nptr, tptr,
256 iter_ptr, xptr);
257}
258
259/// \cond DO_NOT_DOCUMENT
260namespace internal
261{
262// data for batch inverse transform newton solvers
263struct InvTNewtonSolverBase
264{
265 real_t ref_tol;
266 real_t phys_rtol;
267 // physical space coordinates of mesh element nodes
268 const real_t *mptr;
269 // physical space point coordinates to find
270 const real_t *pptr;
271 // element indices
272 const int *eptr;
273 // newton solve result code
274 int *tptr;
275 // number of iterations taken
276 int *iter_ptr;
277 // result ref coords
278 real_t *xptr;
279 eltrans::Lagrange basis1d;
280
281 int max_iter;
282 // number of points in pptr
283 int npts;
284};
285
286// helper for computing dx = (pseudo)-inverse jac * [pt - F(x)]
287template <int Dim, int SDim> struct InvTLinSolve;
288
289template <> struct InvTLinSolve<1, 1>
290{
291 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
292 real_t *dx)
293 {
294 dx[0] = rhs[0] / jac[0];
295 }
296};
297
298template <> struct InvTLinSolve<1, 2>
299{
300 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
301 real_t *dx)
302 {
303 auto a00 = jac[0];
304 auto a10 = jac[1 * MFEM_THREAD_SIZE(x)];
305 real_t den = a00 * a00 +
306 a10 * a10;
307 dx[0] = (a00 * rhs[0] + a10 * rhs[1 * MFEM_THREAD_SIZE(x)]) / den;
308 }
309};
310
311template <> struct InvTLinSolve<1, 3>
312{
313 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
314 real_t *dx)
315 {
316 auto a00 = jac[0];
317 auto a10 = jac[1 * MFEM_THREAD_SIZE(x)];
318 auto a20 = jac[2 * MFEM_THREAD_SIZE(x)];
319 real_t den = a00 * a00 + a10 * a10 + a20 * a20;
320 dx[0] = (a00 * rhs[0] + a10 * rhs[1 * MFEM_THREAD_SIZE(x)] +
321 a20 * rhs[2 * MFEM_THREAD_SIZE(x)]) /
322 den;
323 }
324};
325
326template <> struct InvTLinSolve<2, 2>
327{
328 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
329 real_t *dx)
330 {
331 auto a00 = jac[(0 + 0 * 2) * MFEM_THREAD_SIZE(x)];
332 auto a10 = jac[(1 + 0 * 2) * MFEM_THREAD_SIZE(x)];
333 auto a01 = jac[(0 + 1 * 2) * MFEM_THREAD_SIZE(x)];
334 auto a11 = jac[(1 + 1 * 2) * MFEM_THREAD_SIZE(x)];
335 real_t den = 1 / (a00 * a11 - a01 * a10);
336 dx[0] = (a11 * rhs[0 * MFEM_THREAD_SIZE(x)] -
337 a01 * rhs[1 * MFEM_THREAD_SIZE(x)]) *
338 den;
339 dx[1] = (a00 * rhs[1 * MFEM_THREAD_SIZE(x)] -
340 a10 * rhs[0 * MFEM_THREAD_SIZE(x)]) *
341 den;
342 }
343};
344
345template <> struct InvTLinSolve<2, 3>
346{
347 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
348 real_t *dx)
349 {
350 auto a00 = jac[(0 + 0 * 3) * MFEM_THREAD_SIZE(x)];
351 auto a01 = jac[(0 + 1 * 3) * MFEM_THREAD_SIZE(x)];
352 auto a10 = jac[(1 + 0 * 3) * MFEM_THREAD_SIZE(x)];
353 auto a11 = jac[(1 + 1 * 3) * MFEM_THREAD_SIZE(x)];
354 auto a20 = jac[(2 + 0 * 3) * MFEM_THREAD_SIZE(x)];
355 auto a21 = jac[(2 + 1 * 3) * MFEM_THREAD_SIZE(x)];
356 // a00**2*a11**2 + a00**2*a21**2 - 2*a00*a01*a10*a11 - 2*a00*a01*a20*a21 +
357 // a01**2*a10**2 + a01**2*a20**2 + a10**2*a21**2 - 2*a10*a11*a20*a21 +
358 // a11**2*a20**2
359 real_t den = 1 / (a00 * a00 * a11 * a11 + a00 * a00 * a21 * a21 -
360 2 * a00 * a01 * a10 * a11 - 2 * a00 * a01 * a20 * a21 +
361 a01 * a01 * a10 * a10 + a01 * a01 * a20 * a20 +
362 a10 * a10 * a21 * a21 - 2 * a10 * a11 * a20 * a21 +
363 a11 * a11 * a20 * a20);
364 // x0*(a00*(a01**2 + a11**2 + a21**2) - a01*(a00*a01 + a10*a11 + a20*a21))
365 // + x1*(a10*(a01**2 + a11**2 + a21**2) - a11*(a00*a01 + a10*a11 + a20*a21))
366 // + x2*(a20*(a01**2 + a11**2 + a21**2) - a21*(a00*a01 + a10*a11 + a20*a21))
367 dx[0] = (rhs[0 * MFEM_THREAD_SIZE(x)] *
368 (a00 * (a01 * a01 + a11 * a11 + a21 * a21) -
369 a01 * (a00 * a01 + a10 * a11 + a20 * a21)) +
370 rhs[1 * MFEM_THREAD_SIZE(x)] *
371 (a10 * (a01 * a01 + a11 * a11 + a21 * a21) -
372 a11 * (a00 * a01 + a10 * a11 + a20 * a21)) +
373 rhs[2 * MFEM_THREAD_SIZE(x)] *
374 (a20 * (a01 * a01 + a11 * a11 + a21 * a21) -
375 a21 * (a00 * a01 + a10 * a11 + a20 * a21))) *
376 den;
377 // x0*(a01*(a00**2 + a10**2 + a20**2)-a00*(a00*a01 + a10*a11 + a20*a21))
378 // +x1*(a11*(a00**2 + a10**2 + a20**2)-a10*(a00*a01 + a10*a11 + a20*a21))
379 // +x2*(a21*(a00**2 + a10**2 + a20**2)-a20*(a00*a01 + a10*a11 + a20*a21))
380 dx[1] = (rhs[0 * MFEM_THREAD_SIZE(x)] *
381 (a01 * (a00 * a00 + a10 * a10 + a20 * a20) -
382 a00 * (a00 * a01 + a10 * a11 + a20 * a21)) +
383 rhs[1 * MFEM_THREAD_SIZE(x)] *
384 (a11 * (a00 * a00 + a10 * a10 + a20 * a20) -
385 a10 * (a00 * a01 + a10 * a11 + a20 * a21)) +
386 rhs[2 * MFEM_THREAD_SIZE(x)] *
387 (a21 * (a00 * a00 + a10 * a10 + a20 * a20) -
388 a20 * (a00 * a01 + a10 * a11 + a20 * a21))) *
389 den;
390 }
391};
392
393template <> struct InvTLinSolve<3, 3>
394{
395 static void MFEM_HOST_DEVICE solve(const real_t *jac, const real_t *rhs,
396 real_t *dx)
397 {
398 auto a00 = jac[(0 + 0 * 3) * MFEM_THREAD_SIZE(x)];
399 auto a01 = jac[(0 + 1 * 3) * MFEM_THREAD_SIZE(x)];
400 auto a02 = jac[(0 + 2 * 3) * MFEM_THREAD_SIZE(x)];
401 auto a10 = jac[(1 + 0 * 3) * MFEM_THREAD_SIZE(x)];
402 auto a11 = jac[(1 + 1 * 3) * MFEM_THREAD_SIZE(x)];
403 auto a12 = jac[(1 + 2 * 3) * MFEM_THREAD_SIZE(x)];
404 auto a20 = jac[(2 + 0 * 3) * MFEM_THREAD_SIZE(x)];
405 auto a21 = jac[(2 + 1 * 3) * MFEM_THREAD_SIZE(x)];
406 auto a22 = jac[(2 + 2 * 3) * MFEM_THREAD_SIZE(x)];
407
408 real_t den = 1 / (a00 * a11 * a22 - a00 * a12 * a22 - a01 * a10 * a22 +
409 a01 * a12 * a20 + a02 * a10 * a21 - a02 * a11 * a20);
410 dx[0] = (rhs[0 * MFEM_THREAD_SIZE(x)] * (a11 * a22 - a12 * a21) -
411 rhs[1 * MFEM_THREAD_SIZE(x)] * (a01 * a22 - a02 * a21) +
412 rhs[2 * MFEM_THREAD_SIZE(x)] * (a01 * a12 - a02 * a11)) *
413 den;
414 dx[1] = (rhs[0 * MFEM_THREAD_SIZE(x)] * (a12 * a20 - a10 * a22) +
415 rhs[1 * MFEM_THREAD_SIZE(x)] * (a00 * a22 - a02 * a20) -
416 rhs[2 * MFEM_THREAD_SIZE(x)] * (a00 * a12 - a02 * a10)) *
417 den;
418 dx[2] = (rhs[0 * MFEM_THREAD_SIZE(x)] * (a10 * a21 - a11 * a20) -
419 rhs[1 * MFEM_THREAD_SIZE(x)] * (a00 * a21 - a01 * a20) +
420 rhs[2 * MFEM_THREAD_SIZE(x)] * (a00 * a11 - a01 * a10)) *
421 den;
422 }
423};
424
425template <int Geom, InverseElementTransformation::SolverType SolverType>
426struct ProjectType;
427
428template <>
429struct ProjectType<Geometry::SEGMENT, InverseElementTransformation::Newton>
430{
431 static MFEM_HOST_DEVICE bool project(real_t &x, real_t &dx)
432 {
433 x += dx;
434 return false;
435 }
436};
437
438template <>
439struct ProjectType<Geometry::SQUARE, InverseElementTransformation::Newton>
440{
441 static MFEM_HOST_DEVICE bool project(real_t &x, real_t &y, real_t &dx,
442 real_t &dy)
443 {
444 x += dx;
445 y += dy;
446 return false;
447 }
448};
449
450template <>
451struct ProjectType<Geometry::CUBE, InverseElementTransformation::Newton>
452{
453 static MFEM_HOST_DEVICE bool project(real_t &x, real_t &y, real_t &z,
454 real_t &dx, real_t &dy, real_t &dz)
455 {
456 x += dx;
457 y += dy;
458 z += dz;
459 return false;
460 }
461};
462
463template <int Geom>
464struct ProjectType<Geom, InverseElementTransformation::NewtonElementProject>
465{
466 template <class... Ts> static MFEM_HOST_DEVICE bool project(Ts &&...args)
467 {
468 return eltrans::GeometryUtils<Geom>::project(args...);
469 }
470};
471
472template <int Geom, int SDim,
473 InverseElementTransformation::SolverType SolverType, int max_team_x>
474struct InvTNewtonSolver;
475
476template <int SDim, InverseElementTransformation::SolverType SType,
477 int max_team_x>
478struct InvTNewtonSolver<Geometry::SEGMENT, SDim, SType, max_team_x>
479 : public InvTNewtonSolverBase
480{
481 static int ndofs(int ndof1d) { return ndof1d; }
482
483 // theoretically unbounded
484 static constexpr MFEM_HOST_DEVICE int max_dof1d() { return 0x1000; }
485
486 int MFEM_HOST_DEVICE operator()(int idx) const
487 {
488 // parallelize one team per pt
489 constexpr int Dim = 1;
490 int iter = 0;
491 MFEM_SHARED real_t ref_coord[Dim];
492 // contiguous in team_x, then SDim
493 MFEM_SHARED real_t phys_coord[SDim * max_team_x];
494 // contiguous in team_x, SDim, then Dim
495 MFEM_SHARED real_t jac[SDim * Dim * max_team_x];
496 MFEM_SHARED bool term_flag[1];
497 MFEM_SHARED int res[1];
498 MFEM_SHARED real_t dx[Dim];
499 MFEM_SHARED real_t prev_dx[Dim];
500 MFEM_SHARED bool hit_bdr[1];
501 MFEM_SHARED bool prev_hit_bdr[1];
502 real_t phys_tol = 0;
503 if (MFEM_THREAD_ID(x) == 0)
504 {
505 term_flag[0] = false;
506 res[0] = InverseElementTransformation::Unknown;
507 for (int d = 0; d < Dim; ++d)
508 {
509 ref_coord[d] = xptr[idx + d * npts];
510 dx[d] = 0;
511 prev_dx[d] = 0;
512 }
513 for (int d = 0; d < SDim; ++d)
514 {
515 phys_tol += pptr[idx + d * npts] * pptr[idx + d * npts];
516 }
517 phys_tol = fmax(phys_rtol * phys_rtol, phys_tol * phys_rtol * phys_rtol);
518 }
519 // for each iteration
520 while (true)
521 {
522 MFEM_SYNC_THREAD;
523 // compute phys_coord and jacobian at the same time
524 for (int d = 0; d < SDim; ++d)
525 {
526 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] = 0;
527 }
528 for (int i = 0; i < SDim * Dim; ++i)
529 {
530 jac[MFEM_THREAD_ID(x) + i * MFEM_THREAD_SIZE(x)] = 0;
531 }
532 MFEM_SYNC_THREAD;
533 MFEM_FOREACH_THREAD(j0, x, basis1d.pN)
534 {
535 real_t b0, db0;
536 basis1d.eval_d1(b0, db0, ref_coord[0], j0);
537 for (int d = 0; d < SDim; ++d)
538 {
539 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] +=
540 mptr[j0 + (eptr[idx] * SDim + d) * basis1d.pN] * b0;
541 jac[MFEM_THREAD_ID(x) + (d + 0 * SDim) * MFEM_THREAD_SIZE(x)] +=
542 mptr[j0 + (eptr[idx] * SDim + d) * basis1d.pN] * db0;
543 }
544 }
545
546 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
547 {
548 MFEM_SYNC_THREAD;
549 int a = MFEM_THREAD_ID(x);
550 int b = a + i;
551 if (a < i && b < basis1d.pN)
552 {
553 for (int d = 0; d < SDim; ++d)
554 {
555 phys_coord[a + d * MFEM_THREAD_SIZE(x)] +=
556 phys_coord[b + d * MFEM_THREAD_SIZE(x)];
557 }
558 for (int j = 0; j < SDim * Dim; ++j)
559 {
560 jac[a + j * MFEM_THREAD_SIZE(x)] +=
561 jac[b + j * MFEM_THREAD_SIZE(x)];
562 }
563 }
564 }
565 MFEM_SYNC_THREAD;
566
567 // rest of newton solve logic is serial, have thread 0 solve for it
568 if (MFEM_THREAD_ID(x) == 0)
569 {
570 // compute objective function
571 // f(x) = 1/2 |pt - F(x)|^2
572 real_t dist = 0;
573 for (int d = 0; d < SDim; ++d)
574 {
575 real_t tmp =
576 pptr[idx + d * npts] - phys_coord[d * MFEM_THREAD_SIZE(x)];
577 phys_coord[d * MFEM_THREAD_SIZE(x)] = tmp;
578 dist += tmp * tmp;
579 }
580 // phys_coord now contains pt - F(x)
581 // check for phys_tol convergence
582 if (dist <= phys_tol)
583 {
584 // found solution
585 res[0] = eltrans::GeometryUtils<Geometry::SEGMENT>::inside(
586 ref_coord[0])
587 ? InverseElementTransformation::Inside
588 : InverseElementTransformation::Outside;
589 tptr[idx] = res[0];
590 for (int d = 0; d < Dim; ++d)
591 {
592 xptr[idx + d * npts] = ref_coord[d];
593 }
594 if (iter_ptr)
595 {
596 iter_ptr[idx] = iter + 1;
597 }
598 term_flag[0] = true;
599 }
600 else if (iter >= max_iter)
601 {
602 // terminate on max iterations
603 tptr[idx] = InverseElementTransformation::Unknown;
604 res[0] = InverseElementTransformation::Unknown;
605 // might as well save where we failed at
606 for (int d = 0; d < Dim; ++d)
607 {
608 xptr[idx + d * npts] = ref_coord[d];
609 }
610 if (iter_ptr)
611 {
612 iter_ptr[idx] = iter + 1;
613 }
614 term_flag[0] = true;
615 }
616 else
617 {
618 // compute dx = (pseudo)-inverse jac * [pt - F(x)]
619 InvTLinSolve<Dim, SDim>::solve(jac, phys_coord, dx);
620
621 hit_bdr[0] = ProjectType<Geometry::SEGMENT, SType>::project(
622 ref_coord[0], dx[0]);
623
624 // check for ref coord convergence or stagnation on boundary
625 if (hit_bdr[0])
626 {
627 if (prev_hit_bdr[0])
628 {
629 real_t dx_change = 0;
630 for (int d = 0; d < Dim; ++d)
631 {
632 real_t tmp = dx[d] - prev_dx[d];
633 dx_change += tmp * tmp;
634 }
635 if (dx_change <= ref_tol * ref_tol)
636 {
637 // stuck on the boundary
638 tptr[idx] = InverseElementTransformation::Outside;
639 res[0] = InverseElementTransformation::Outside;
640 for (int d = 0; d < Dim; ++d)
641 {
642 xptr[idx + d * npts] = ref_coord[d];
643 }
644 if (iter_ptr)
645 {
646 iter_ptr[idx] = iter + 1;
647 }
648 term_flag[0] = true;
649 }
650 }
651 }
652
653 prev_hit_bdr[0] = hit_bdr[0];
654 }
655 }
656
657 MFEM_SYNC_THREAD;
658 if (term_flag[0])
659 {
660 return res[0];
661 }
662 MFEM_FOREACH_THREAD(d, x, Dim) { prev_dx[d] = dx[d]; }
663 ++iter;
664 }
665 }
666};
667
668template <int SDim, InverseElementTransformation::SolverType SType,
669 int max_team_x>
670struct InvTNewtonSolver<Geometry::SQUARE, SDim, SType, max_team_x>
671 : public InvTNewtonSolverBase
672{
673 static int ndofs(int ndof1d) { return ndof1d * ndof1d; }
674
675 static constexpr MFEM_HOST_DEVICE int max_dof1d() { return 32; }
676
677 int MFEM_HOST_DEVICE operator()(int idx) const
678 {
679 // parallelize one team per pt
680 constexpr int Dim = 2;
681 int iter = 0;
682 MFEM_SHARED real_t ref_coord[Dim];
683 // contiguous in team_x, then SDim
684 MFEM_SHARED real_t phys_coord[SDim * max_team_x];
685 MFEM_SHARED real_t basis0[max_dof1d()];
686 MFEM_SHARED real_t dbasis0[max_dof1d()];
687 MFEM_SHARED real_t basis1[max_dof1d()];
688 MFEM_SHARED real_t dbasis1[max_dof1d()];
689 // contiguous in team_x, SDim, then Dim
690 MFEM_SHARED real_t jac[SDim * Dim * max_team_x];
691 MFEM_SHARED bool term_flag[1];
692 MFEM_SHARED int res[1];
693 MFEM_SHARED real_t dx[Dim];
694 MFEM_SHARED real_t prev_dx[Dim];
695 MFEM_SHARED bool hit_bdr[1];
696 MFEM_SHARED bool prev_hit_bdr[1];
697 real_t phys_tol = 0;
698 if (MFEM_THREAD_ID(x) == 0)
699 {
700 term_flag[0] = false;
701 res[0] = InverseElementTransformation::Unknown;
702 hit_bdr[0] = false;
703 prev_hit_bdr[0] = false;
704 for (int d = 0; d < Dim; ++d)
705 {
706 ref_coord[d] = xptr[idx + d * npts];
707 dx[d] = 0;
708 prev_dx[d] = 0;
709 }
710 for (int d = 0; d < SDim; ++d)
711 {
712 phys_tol += pptr[idx + d * npts] * pptr[idx + d * npts];
713 }
714 phys_tol = fmax(phys_rtol * phys_rtol, phys_tol * phys_rtol * phys_rtol);
715 }
716 // for each iteration
717 while (true)
718 {
719 MFEM_SYNC_THREAD;
720 // compute phys_coord and jacobian at the same time
721 for (int d = 0; d < SDim; ++d)
722 {
723 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] = 0;
724 }
725 for (int i = 0; i < SDim * Dim; ++i)
726 {
727 jac[MFEM_THREAD_ID(x) + i * MFEM_THREAD_SIZE(x)] = 0;
728 }
729 MFEM_FOREACH_THREAD(j0, x, basis1d.pN)
730 {
731 basis1d.eval_d1(basis0[j0], dbasis0[j0], ref_coord[0], j0);
732 basis1d.eval_d1(basis1[j0], dbasis1[j0], ref_coord[1], j0);
733 }
734 MFEM_SYNC_THREAD;
735 MFEM_FOREACH_THREAD(jidx, x, basis1d.pN * basis1d.pN)
736 {
737 int idcs[Dim];
738 idcs[0] = jidx % basis1d.pN;
739 idcs[1] = jidx / basis1d.pN;
740 for (int d = 0; d < SDim; ++d)
741 {
742 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] +=
743 mptr[idcs[0] +
744 (idcs[1] + (eptr[idx] * SDim + d) * basis1d.pN) *
745 basis1d.pN] *
746 basis0[idcs[0]] * basis1[idcs[1]];
747 jac[MFEM_THREAD_ID(x) + (d + 0 * SDim) * MFEM_THREAD_SIZE(x)] +=
748 mptr[idcs[0] +
749 (idcs[1] + (eptr[idx] * SDim + d) * basis1d.pN) *
750 basis1d.pN] *
751 dbasis0[idcs[0]] * basis1[idcs[1]];
752 jac[MFEM_THREAD_ID(x) + (d + 1 * SDim) * MFEM_THREAD_SIZE(x)] +=
753 mptr[idcs[0] +
754 (idcs[1] + (eptr[idx] * SDim + d) * basis1d.pN) *
755 basis1d.pN] *
756 basis0[idcs[0]] * dbasis1[idcs[1]];
757 }
758 }
759
760 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
761 {
762 MFEM_SYNC_THREAD;
763 int a = MFEM_THREAD_ID(x);
764 int b = a + i;
765 if (a < i && b < basis1d.pN * basis1d.pN)
766 {
767 for (int d = 0; d < SDim; ++d)
768 {
769 phys_coord[a + d * MFEM_THREAD_SIZE(x)] +=
770 phys_coord[b + d * MFEM_THREAD_SIZE(x)];
771 }
772 for (int j = 0; j < SDim * Dim; ++j)
773 {
774 jac[a + j * MFEM_THREAD_SIZE(x)] +=
775 jac[b + j * MFEM_THREAD_SIZE(x)];
776 }
777 }
778 }
779 MFEM_SYNC_THREAD;
780
781 // rest of newton solve logic is serial, have thread 0 solve for it
782 if (MFEM_THREAD_ID(x) == 0)
783 {
784 // compute objective function
785 // f(x) = 1/2 |pt - F(x)|^2
786 real_t dist = 0;
787 for (int d = 0; d < SDim; ++d)
788 {
789 real_t tmp =
790 pptr[idx + d * npts] - phys_coord[d * MFEM_THREAD_SIZE(x)];
791 phys_coord[d * MFEM_THREAD_SIZE(x)] = tmp;
792 dist += tmp * tmp;
793 }
794 // phys_coord now contains pt - F(x)
795 // check for phys_tol convergence
796 if (dist <= phys_tol)
797 {
798 // found solution
799 res[0] = eltrans::GeometryUtils<Geometry::SQUARE>::inside(
800 ref_coord[0], ref_coord[1])
801 ? InverseElementTransformation::Inside
802 : InverseElementTransformation::Outside;
803 tptr[idx] = res[0];
804 for (int d = 0; d < Dim; ++d)
805 {
806 xptr[idx + d * npts] = ref_coord[d];
807 }
808 if (iter_ptr)
809 {
810 iter_ptr[idx] = iter + 1;
811 }
812 term_flag[0] = true;
813 }
814 else if (iter >= max_iter)
815 {
816 // terminate on max iterations
817 tptr[idx] = InverseElementTransformation::Unknown;
818 res[0] = InverseElementTransformation::Unknown;
819 // might as well save where we failed at
820 for (int d = 0; d < Dim; ++d)
821 {
822 xptr[idx + d * npts] = ref_coord[d];
823 }
824 if (iter_ptr)
825 {
826 iter_ptr[idx] = iter + 1;
827 }
828 term_flag[0] = true;
829 }
830 else
831 {
832 // compute dx = (pseudo)-inverse jac * [pt - F(x)]
833 InvTLinSolve<Dim, SDim>::solve(jac, phys_coord, dx);
834
835 hit_bdr[0] = ProjectType<Geometry::SQUARE, SType>::project(
836 ref_coord[0], ref_coord[1], dx[0], dx[1]);
837
838 // check for ref coord convergence or stagnation on boundary
839 if (hit_bdr[0])
840 {
841 if (prev_hit_bdr[0])
842 {
843 real_t dx_change = 0;
844 for (int d = 0; d < Dim; ++d)
845 {
846 real_t tmp = dx[d] - prev_dx[d];
847 dx_change += tmp * tmp;
848 }
849 if (dx_change <= ref_tol * ref_tol)
850 {
851 // stuck on the boundary
852 tptr[idx] = InverseElementTransformation::Outside;
853 res[0] = InverseElementTransformation::Outside;
854 for (int d = 0; d < Dim; ++d)
855 {
856 xptr[idx + d * npts] = ref_coord[d];
857 }
858 if (iter_ptr)
859 {
860 iter_ptr[idx] = iter + 1;
861 }
862 term_flag[0] = true;
863 }
864 }
865 }
866
867 prev_hit_bdr[0] = hit_bdr[0];
868 }
869 }
870
871 MFEM_SYNC_THREAD;
872 if (term_flag[0])
873 {
874 return res[0];
875 }
876 MFEM_FOREACH_THREAD(d, x, Dim) { prev_dx[d] = dx[d]; }
877 MFEM_SYNC_THREAD;
878 ++iter;
879 }
880 }
881};
882
883template <int SDim, InverseElementTransformation::SolverType SType,
884 int max_team_x>
885struct InvTNewtonSolver<Geometry::CUBE, SDim, SType, max_team_x>
886 : public InvTNewtonSolverBase
887{
888 static int ndofs(int ndof1d) { return ndof1d * ndof1d * ndof1d; }
889
890 static constexpr MFEM_HOST_DEVICE int max_dof1d() { return 32; }
891
892 int MFEM_HOST_DEVICE operator()(int idx) const
893 {
894 // parallelize one team per pt
895 constexpr int Dim = 3;
896 int iter = 0;
897 MFEM_SHARED real_t ref_coord[Dim];
898 // contiguous in team_x, then SDim
899 MFEM_SHARED real_t phys_coord[SDim * max_team_x];
900 MFEM_SHARED real_t basis0[max_dof1d()];
901 MFEM_SHARED real_t dbasis0[max_dof1d()];
902 MFEM_SHARED real_t basis1[max_dof1d()];
903 MFEM_SHARED real_t dbasis1[max_dof1d()];
904 MFEM_SHARED real_t basis2[max_dof1d()];
905 MFEM_SHARED real_t dbasis2[max_dof1d()];
906 // contiguous in team_x, SDim, then Dim
907 MFEM_SHARED real_t jac[SDim * Dim * max_team_x];
908 MFEM_SHARED bool term_flag[1];
909 MFEM_SHARED int res[1];
910 MFEM_SHARED real_t dx[Dim];
911 MFEM_SHARED real_t prev_dx[Dim];
912 MFEM_SHARED bool hit_bdr[1];
913 MFEM_SHARED bool prev_hit_bdr[1];
914 real_t phys_tol = 0;
915 if (MFEM_THREAD_ID(x) == 0)
916 {
917 term_flag[0] = false;
918 res[0] = InverseElementTransformation::Unknown;
919 hit_bdr[0] = false;
920 prev_hit_bdr[0] = false;
921 for (int d = 0; d < Dim; ++d)
922 {
923 ref_coord[d] = xptr[idx + d * npts];
924 dx[d] = 0;
925 prev_dx[d] = 0;
926 }
927 for (int d = 0; d < SDim; ++d)
928 {
929 phys_tol += pptr[idx + d * npts] * pptr[idx + d * npts];
930 }
931 phys_tol = fmax(phys_rtol * phys_rtol, phys_tol * phys_rtol * phys_rtol);
932 }
933 // for each iteration
934 while (true)
935 {
936 MFEM_SYNC_THREAD;
937 // compute phys_coord and jacobian at the same time
938 for (int d = 0; d < SDim; ++d)
939 {
940 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] = 0;
941 }
942 for (int i = 0; i < SDim * Dim; ++i)
943 {
944 jac[MFEM_THREAD_ID(x) + i * MFEM_THREAD_SIZE(x)] = 0;
945 }
946 MFEM_FOREACH_THREAD(j0, x, basis1d.pN)
947 {
948 basis1d.eval_d1(basis0[j0], dbasis0[j0], ref_coord[0], j0);
949 basis1d.eval_d1(basis1[j0], dbasis1[j0], ref_coord[1], j0);
950 basis1d.eval_d1(basis2[j0], dbasis2[j0], ref_coord[2], j0);
951 }
952 MFEM_SYNC_THREAD;
953 MFEM_FOREACH_THREAD(jidx, x, basis1d.pN * basis1d.pN * basis1d.pN)
954 {
955 int idcs[Dim];
956 idcs[0] = jidx % basis1d.pN;
957 idcs[1] = jidx / basis1d.pN;
958 idcs[2] = idcs[1] / basis1d.pN;
959 idcs[1] %= basis1d.pN;
960 for (int d = 0; d < SDim; ++d)
961 {
962 phys_coord[MFEM_THREAD_ID(x) + d * MFEM_THREAD_SIZE(x)] +=
963 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
964 basis1d.pN) *
965 basis1d.pN) *
966 basis1d.pN] *
967 basis0[idcs[0]] * basis1[idcs[1]] * basis2[idcs[2]];
968 jac[MFEM_THREAD_ID(x) + (d + 0 * SDim) * MFEM_THREAD_SIZE(x)] +=
969 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
970 basis1d.pN) *
971 basis1d.pN) *
972 basis1d.pN] *
973 dbasis0[idcs[0]] * basis1[idcs[1]] * basis2[idcs[2]];
974 jac[MFEM_THREAD_ID(x) + (d + 1 * SDim) * MFEM_THREAD_SIZE(x)] +=
975 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
976 basis1d.pN) *
977 basis1d.pN) *
978 basis1d.pN] *
979 basis0[idcs[0]] * dbasis1[idcs[1]] * basis2[idcs[2]];
980 jac[MFEM_THREAD_ID(x) + (d + 2 * SDim) * MFEM_THREAD_SIZE(x)] +=
981 mptr[idcs[0] + (idcs[1] + (idcs[2] + (eptr[idx] * SDim + d) *
982 basis1d.pN) *
983 basis1d.pN) *
984 basis1d.pN] *
985 basis0[idcs[0]] * basis1[idcs[1]] * dbasis2[idcs[2]];
986 }
987 }
988
989 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
990 {
991 MFEM_SYNC_THREAD;
992 int a = MFEM_THREAD_ID(x);
993 int b = a + i;
994 if (a < i && b < basis1d.pN * basis1d.pN * basis1d.pN)
995 {
996 for (int d = 0; d < SDim; ++d)
997 {
998 phys_coord[a + d * MFEM_THREAD_SIZE(x)] +=
999 phys_coord[b + d * MFEM_THREAD_SIZE(x)];
1000 }
1001 for (int j = 0; j < SDim * Dim; ++j)
1002 {
1003 jac[a + j * MFEM_THREAD_SIZE(x)] +=
1004 jac[b + j * MFEM_THREAD_SIZE(x)];
1005 }
1006 }
1007 }
1008 MFEM_SYNC_THREAD;
1009
1010 // rest of newton solve logic is serial, have thread 0 solve for it
1011 if (MFEM_THREAD_ID(x) == 0)
1012 {
1013 // compute objective function
1014 // f(x) = 1/2 |pt - F(x)|^2
1015 real_t dist = 0;
1016 for (int d = 0; d < SDim; ++d)
1017 {
1018 real_t tmp =
1019 pptr[idx + d * npts] - phys_coord[d * MFEM_THREAD_SIZE(x)];
1020 phys_coord[d * MFEM_THREAD_SIZE(x)] = tmp;
1021 dist += tmp * tmp;
1022 }
1023 // phys_coord now contains pt - F(x)
1024 // check for phys_tol convergence
1025 if (dist <= phys_tol)
1026 {
1027 // found solution
1028 res[0] = eltrans::GeometryUtils<Geometry::CUBE>::inside(
1029 ref_coord[0], ref_coord[1], ref_coord[2])
1030 ? InverseElementTransformation::Inside
1031 : InverseElementTransformation::Outside;
1032 tptr[idx] = res[0];
1033 for (int d = 0; d < Dim; ++d)
1034 {
1035 xptr[idx + d * npts] = ref_coord[d];
1036 }
1037 if (iter_ptr)
1038 {
1039 iter_ptr[idx] = iter + 1;
1040 }
1041 term_flag[0] = true;
1042 }
1043 else if (iter >= max_iter)
1044 {
1045 // terminate on max iterations
1046 tptr[idx] = InverseElementTransformation::Unknown;
1047 res[0] = InverseElementTransformation::Unknown;
1048 // might as well save where we failed at
1049 for (int d = 0; d < Dim; ++d)
1050 {
1051 xptr[idx + d * npts] = ref_coord[d];
1052 }
1053 if (iter_ptr)
1054 {
1055 iter_ptr[idx] = iter + 1;
1056 }
1057 term_flag[0] = true;
1058 }
1059 else
1060 {
1061 // compute dx = (pseudo)-inverse jac * [pt - F(x)]
1062 InvTLinSolve<Dim, SDim>::solve(jac, phys_coord, dx);
1063
1064 hit_bdr[0] = ProjectType<Geometry::CUBE, SType>::project(
1065 ref_coord[0], ref_coord[1], ref_coord[2], dx[0], dx[1],
1066 dx[2]);
1067
1068 // check for ref coord convergence or stagnation on boundary
1069 if (hit_bdr[0])
1070 {
1071 if (prev_hit_bdr[0])
1072 {
1073 real_t dx_change = 0;
1074 for (int d = 0; d < Dim; ++d)
1075 {
1076 real_t tmp = dx[d] - prev_dx[d];
1077 dx_change += tmp * tmp;
1078 }
1079 if (dx_change <= ref_tol * ref_tol)
1080 {
1081 // stuck on the boundary
1082 tptr[idx] = InverseElementTransformation::Outside;
1083 res[0] = InverseElementTransformation::Outside;
1084 for (int d = 0; d < Dim; ++d)
1085 {
1086 xptr[idx + d * npts] = ref_coord[d];
1087 }
1088 if (iter_ptr)
1089 {
1090 iter_ptr[idx] = iter + 1;
1091 }
1092 term_flag[0] = true;
1093 }
1094 }
1095 }
1096 }
1097 }
1098
1099 MFEM_SYNC_THREAD;
1100 if (term_flag[0])
1101 {
1102 return res[0];
1103 }
1104 MFEM_FOREACH_THREAD(d, x, Dim) { prev_dx[d] = dx[d]; }
1105 ++iter;
1106 }
1107 }
1108};
1109
1110// data for finding the batch transform initial guess co-located at dofs
1111struct DofFinderBase
1112{
1113 // physical space coordinates of mesh element nodes
1114 const real_t *mptr;
1115 // physical space point coordinates to find
1116 const real_t *pptr;
1117 // element indices
1118 const int *eptr;
1119 // reference space nodes to test
1120 const real_t *qptr;
1121 // initial guess results
1122 real_t *xptr;
1123 eltrans::Lagrange basis1d;
1124
1125 // number of points in pptr
1126 int npts;
1127};
1128
1129template <int Geom, int SDim, int max_team_x>
1130struct PhysDofFinder;
1131
1132template <int SDim, int max_team_x>
1133struct PhysDofFinder<Geometry::SEGMENT, SDim, max_team_x>
1134 : public DofFinderBase
1135{
1136 static int ndofs(int ndofs1d) { return ndofs1d; }
1137
1138 void MFEM_HOST_DEVICE operator()(int idx) const
1139 {
1140 constexpr int Dim = 1;
1141 // L-2 norm squared
1142 MFEM_SHARED real_t dists[max_team_x];
1143 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1144 MFEM_FOREACH_THREAD(i, x, max_team_x)
1145 {
1146#ifdef MFEM_USE_DOUBLE
1147 dists[i] = HUGE_VAL;
1148#else
1149 dists[i] = HUGE_VALF;
1150#endif
1151 }
1152 MFEM_SYNC_THREAD;
1153 // team serial portion
1154 MFEM_FOREACH_THREAD(i, x, basis1d.pN)
1155 {
1156 real_t phys_coord[SDim];
1157 for (int d = 0; d < SDim; ++d)
1158 {
1159 phys_coord[d] = mptr[i + (d + eptr[idx] * SDim) * basis1d.pN];
1160 }
1161 real_t dist = 0;
1162 // L-2 norm squared
1163 for (int d = 0; d < SDim; ++d)
1164 {
1165 real_t tmp = phys_coord[d] - pptr[idx + d * npts];
1166 dist += tmp * tmp;
1167 }
1168 if (dist < dists[MFEM_THREAD_ID(x)])
1169 {
1170 // closer guess in physical space
1171 dists[MFEM_THREAD_ID(x)] = dist;
1172 ref_buf[MFEM_THREAD_ID(x)] = basis1d.z[i];
1173 }
1174 }
1175 // now do tree reduce
1176 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1177 {
1178 MFEM_SYNC_THREAD;
1179 if (MFEM_THREAD_ID(x) < i)
1180 {
1181 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1182 {
1183 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1184 ref_buf[MFEM_THREAD_ID(x)] = ref_buf[MFEM_THREAD_ID(x) + i];
1185 }
1186 }
1187 }
1188 // write results out
1189 // not needed in 1D
1190 // MFEM_SYNC_THREAD;
1191 if (MFEM_THREAD_ID(x) == 0)
1192 {
1193 xptr[idx] = ref_buf[0];
1194 }
1195 }
1196};
1197
1198template <int SDim, int max_team_x>
1199struct PhysDofFinder<Geometry::SQUARE, SDim, max_team_x>
1200 : public DofFinderBase
1201{
1202 static int ndofs(int ndofs1d) { return ndofs1d * ndofs1d; }
1203
1204 void MFEM_HOST_DEVICE operator()(int idx) const
1205 {
1206 constexpr int Dim = 2;
1207 int n = basis1d.pN * basis1d.pN;
1208 if (n > max_team_x)
1209 {
1210 n = max_team_x;
1211 }
1212 // L-2 norm squared
1213 MFEM_SHARED real_t dists[max_team_x];
1214 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1215 MFEM_FOREACH_THREAD(i, x, max_team_x)
1216 {
1217#ifdef MFEM_USE_DOUBLE
1218 dists[i] = HUGE_VAL;
1219#else
1220 dists[i] = HUGE_VALF;
1221#endif
1222 }
1223 // team serial portion
1224 MFEM_FOREACH_THREAD(j, x, basis1d.pN * basis1d.pN)
1225 {
1226 real_t phys_coord[SDim] = {0};
1227 int idcs[Dim];
1228 idcs[0] = j % basis1d.pN;
1229 idcs[1] = j / basis1d.pN;
1230 for (int d = 0; d < SDim; ++d)
1231 {
1232 phys_coord[d] =
1233 mptr[idcs[0] + (idcs[1] + (d + eptr[idx] * SDim) * basis1d.pN) *
1234 basis1d.pN];
1235 }
1236 real_t dist = 0;
1237 // L-2 norm squared
1238 for (int d = 0; d < SDim; ++d)
1239 {
1240 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1241 dist += tmp * tmp;
1242 }
1243 if (dist < dists[MFEM_THREAD_ID(x)])
1244 {
1245 // closer guess in physical space
1246 dists[MFEM_THREAD_ID(x)] = dist;
1247 for (int d = 0; d < Dim; ++d)
1248 {
1249 ref_buf[MFEM_THREAD_ID(x) + d * n] = basis1d.z[idcs[d]];
1250 }
1251 }
1252 }
1253 // now do tree reduce
1254 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1255 {
1256 MFEM_SYNC_THREAD;
1257 if (MFEM_THREAD_ID(x) < i)
1258 {
1259 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1260 {
1261 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1262 for (int d = 0; d < Dim; ++d)
1263 {
1264 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1265 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1266 }
1267 }
1268 }
1269 }
1270 // write results out
1271 MFEM_SYNC_THREAD;
1272 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1273 }
1274};
1275
1276template <int SDim, int max_team_x>
1277struct PhysDofFinder<Geometry::CUBE, SDim, max_team_x>
1278 : public DofFinderBase
1279{
1280 static int ndofs(int ndofs1d) { return ndofs1d * ndofs1d * ndofs1d; }
1281
1282 void MFEM_HOST_DEVICE operator()(int idx) const
1283 {
1284 constexpr int Dim = 3;
1285 int n = basis1d.pN * basis1d.pN * basis1d.pN;
1286 if (n > max_team_x)
1287 {
1288 n = max_team_x;
1289 }
1290 // L-2 norm squared
1291 MFEM_SHARED real_t dists[max_team_x];
1292 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1293 MFEM_FOREACH_THREAD(i, x, max_team_x)
1294 {
1295#ifdef MFEM_USE_DOUBLE
1296 dists[i] = HUGE_VAL;
1297#else
1298 dists[i] = HUGE_VALF;
1299#endif
1300 }
1301 // team serial portion
1302 MFEM_FOREACH_THREAD(j, x, basis1d.pN * basis1d.pN * basis1d.pN)
1303 {
1304 real_t phys_coord[SDim] = {0};
1305 int idcs[Dim];
1306 idcs[0] = j % basis1d.pN;
1307 idcs[1] = j / basis1d.pN;
1308 idcs[2] = idcs[1] / basis1d.pN;
1309 idcs[1] = idcs[1] % basis1d.pN;
1310 for (int d = 0; d < SDim; ++d)
1311 {
1312 phys_coord[d] =
1313 mptr[idcs[0] + (idcs[1] + (idcs[2] + (d + eptr[idx] * SDim) *
1314 basis1d.pN) *
1315 basis1d.pN) *
1316 basis1d.pN];
1317 }
1318 real_t dist = 0;
1319 // L-2 norm squared
1320 for (int d = 0; d < SDim; ++d)
1321 {
1322 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1323 dist += tmp * tmp;
1324 }
1325 if (dist < dists[MFEM_THREAD_ID(x)])
1326 {
1327 // closer guess in physical space
1328 dists[MFEM_THREAD_ID(x)] = dist;
1329 for (int d = 0; d < Dim; ++d)
1330 {
1331 ref_buf[MFEM_THREAD_ID(x) + d * n] = basis1d.z[idcs[d]];
1332 }
1333 }
1334 }
1335 // now do tree reduce
1336 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1337 {
1338 MFEM_SYNC_THREAD;
1339 if (MFEM_THREAD_ID(x) < i)
1340 {
1341 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1342 {
1343 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1344 for (int d = 0; d < Dim; ++d)
1345 {
1346 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1347 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1348 }
1349 }
1350 }
1351 }
1352 // write results out
1353 MFEM_SYNC_THREAD;
1354 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1355 }
1356};
1357
1358// data for finding the batch transform initial guess
1359struct NodeFinderBase
1360{
1361 // physical space coordinates of mesh element nodes
1362 const real_t *mptr;
1363 // physical space point coordinates to find
1364 const real_t *pptr;
1365 // element indices
1366 const int *eptr;
1367 // reference space nodes to test
1368 const real_t *qptr;
1369 // initial guess results
1370 real_t *xptr;
1371 eltrans::Lagrange basis1d;
1372
1373 // number of points in pptr
1374 int npts;
1375 // number of points per element along each dimension to test
1376 int nq1d;
1377 // total number of points to test
1378 int nq;
1379};
1380
1381template <int Geom, int SDim, int max_team_x, int max_q1d>
1382struct PhysNodeFinder;
1383
1384template <int SDim, int max_team_x, int max_q1d>
1385struct PhysNodeFinder<Geometry::SEGMENT, SDim, max_team_x, max_q1d>
1386 : public NodeFinderBase
1387{
1388
1389 static int compute_nq(int nq1d) { return nq1d; }
1390
1391 static int ndofs(int ndof1d) { return ndof1d; }
1392
1393 void MFEM_HOST_DEVICE operator()(int idx) const
1394 {
1395 constexpr int Dim = 1;
1396 // L-2 norm squared
1397 MFEM_SHARED real_t dists[max_team_x];
1398 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1399 MFEM_FOREACH_THREAD(i, x, max_team_x)
1400 {
1401#ifdef MFEM_USE_DOUBLE
1402 dists[i] = HUGE_VAL;
1403#else
1404 dists[i] = HUGE_VALF;
1405#endif
1406 }
1407 MFEM_SYNC_THREAD;
1408 // team serial portion
1409 MFEM_FOREACH_THREAD(i, x, nq)
1410 {
1411 real_t phys_coord[SDim] = {0};
1412 for (int j0 = 0; j0 < basis1d.pN; ++j0)
1413 {
1414 real_t b = basis1d.eval(qptr[i], j0);
1415 for (int d = 0; d < SDim; ++d)
1416 {
1417 phys_coord[d] +=
1418 mptr[j0 + (d + eptr[idx] * SDim) * basis1d.pN] * b;
1419 }
1420 }
1421 real_t dist = 0;
1422 // L-2 norm squared
1423 for (int d = 0; d < SDim; ++d)
1424 {
1425 real_t tmp = phys_coord[d] - pptr[idx + d * npts];
1426 dist += tmp * tmp;
1427 }
1428 if (dist < dists[MFEM_THREAD_ID(x)])
1429 {
1430 // closer guess in physical space
1431 dists[MFEM_THREAD_ID(x)] = dist;
1432 ref_buf[MFEM_THREAD_ID(x)] = qptr[i];
1433 }
1434 }
1435 // now do tree reduce
1436 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1437 {
1438 MFEM_SYNC_THREAD;
1439 if (MFEM_THREAD_ID(x) < i)
1440 {
1441 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1442 {
1443 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1444 ref_buf[MFEM_THREAD_ID(x)] = ref_buf[MFEM_THREAD_ID(x) + i];
1445 }
1446 }
1447 }
1448 // write results out
1449 // not needed in 1D
1450 // MFEM_SYNC_THREAD;
1451 if (MFEM_THREAD_ID(x) == 0)
1452 {
1453 xptr[idx] = ref_buf[0];
1454 }
1455 }
1456};
1457
1458template <int SDim, int max_team_x, int max_q1d>
1459struct PhysNodeFinder<Geometry::SQUARE, SDim, max_team_x, max_q1d>
1460 : public NodeFinderBase
1461{
1462
1463 static int compute_nq(int nq1d) { return nq1d * nq1d; }
1464
1465 static int ndofs(int ndof1d)
1466 {
1467 return ndof1d * ndof1d;
1468 }
1469
1470 void MFEM_HOST_DEVICE operator()(int idx) const
1471 {
1472 constexpr int Dim = 2;
1473 constexpr int max_dof1d = 32;
1474 int n = (nq < max_team_x) ? nq : max_team_x;
1475 // L-2 norm squared
1476 MFEM_SHARED real_t dists[max_team_x];
1477 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1478 MFEM_SHARED real_t basis[max_dof1d * max_q1d];
1479 MFEM_FOREACH_THREAD(i, x, max_team_x)
1480 {
1481#ifdef MFEM_USE_DOUBLE
1482 dists[i] = HUGE_VAL;
1483#else
1484 dists[i] = HUGE_VALF;
1485#endif
1486 }
1487 MFEM_FOREACH_THREAD(j0, x, nq1d)
1488 {
1489 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1490 {
1491 basis[j0 + i0 * nq1d] = basis1d.eval(qptr[j0], i0);
1492 }
1493 }
1494 MFEM_SYNC_THREAD;
1495 // team serial portion
1496 MFEM_FOREACH_THREAD(j, x, nq)
1497 {
1498 real_t phys_coord[SDim] = {0};
1499 int idcs[Dim];
1500 idcs[0] = j % nq1d;
1501 idcs[1] = j / nq1d;
1502 for (int i1 = 0; i1 < basis1d.pN; ++i1)
1503 {
1504 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1505 {
1506 real_t b = basis[idcs[0] + i0 * nq1d] * basis[idcs[1] + i1 * nq1d];
1507 for (int d = 0; d < SDim; ++d)
1508 {
1509 phys_coord[d] +=
1510 mptr[i0 + (i1 + (d + eptr[idx] * SDim) * basis1d.pN) *
1511 basis1d.pN] *
1512 b;
1513 }
1514 }
1515 }
1516 real_t dist = 0;
1517 // L-2 norm squared
1518 for (int d = 0; d < SDim; ++d)
1519 {
1520 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1521 dist += tmp * tmp;
1522 }
1523 if (dist < dists[MFEM_THREAD_ID(x)])
1524 {
1525 // closer guess in physical space
1526 dists[MFEM_THREAD_ID(x)] = dist;
1527 for (int d = 0; d < Dim; ++d)
1528 {
1529 ref_buf[MFEM_THREAD_ID(x) + d * n] = qptr[idcs[d]];
1530 }
1531 }
1532 }
1533 // now do tree reduce
1534 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1535 {
1536 MFEM_SYNC_THREAD;
1537 if (MFEM_THREAD_ID(x) < i)
1538 {
1539 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1540 {
1541 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1542 for (int d = 0; d < Dim; ++d)
1543 {
1544 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1545 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1546 }
1547 }
1548 }
1549 }
1550 // write results out
1551 MFEM_SYNC_THREAD;
1552 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1553 }
1554};
1555
1556template <int SDim, int max_team_x, int max_q1d>
1557struct PhysNodeFinder<Geometry::CUBE, SDim, max_team_x, max_q1d>
1558 : public NodeFinderBase
1559{
1560
1561 static int compute_nq(int nq1d) { return nq1d * nq1d * nq1d; }
1562
1563 static int ndofs(int ndof1d)
1564 {
1565 return ndof1d * ndof1d * ndof1d;
1566 }
1567
1568 void MFEM_HOST_DEVICE operator()(int idx) const
1569 {
1570 constexpr int Dim = 3;
1571 constexpr int max_dof1d = 32;
1572 int n = (nq < max_team_x) ? nq : max_team_x;
1573 // L-2 norm squared
1574 MFEM_SHARED real_t dists[max_team_x];
1575 MFEM_SHARED real_t ref_buf[Dim * max_team_x];
1576 // contiguous in quad
1577 MFEM_SHARED real_t basis[max_dof1d * max_q1d];
1578 MFEM_FOREACH_THREAD(i, x, max_team_x)
1579 {
1580#ifdef MFEM_USE_DOUBLE
1581 dists[i] = HUGE_VAL;
1582#else
1583 dists[i] = HUGE_VALF;
1584#endif
1585 }
1586 MFEM_FOREACH_THREAD(j0, x, nq1d)
1587 {
1588 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1589 {
1590 basis[j0 + i0 * nq1d] = basis1d.eval(qptr[j0], i0);
1591 }
1592 }
1593 MFEM_SYNC_THREAD;
1594 // team serial portion
1595 MFEM_FOREACH_THREAD(j, x, nq)
1596 {
1597 real_t phys_coord[SDim] = {0};
1598 int idcs[Dim];
1599 idcs[0] = j % nq1d;
1600 idcs[1] = j / nq1d;
1601 idcs[2] = idcs[1] / nq1d;
1602 idcs[1] = idcs[1] % nq1d;
1603 for (int i2 = 0; i2 < basis1d.pN; ++i2)
1604 {
1605 for (int i1 = 0; i1 < basis1d.pN; ++i1)
1606 {
1607 for (int i0 = 0; i0 < basis1d.pN; ++i0)
1608 {
1609 real_t b = basis[idcs[0] + i0 * nq1d] * basis[idcs[1] + i1 * nq1d] *
1610 basis[idcs[2] + i2 * nq1d];
1611 for (int d = 0; d < SDim; ++d)
1612 {
1613 phys_coord[d] +=
1614 mptr[i0 +
1615 (i1 + (i2 + (d + eptr[idx] * SDim) * basis1d.pN) *
1616 basis1d.pN) *
1617 basis1d.pN] *
1618 b;
1619 }
1620 }
1621 }
1622 }
1623 real_t dist = 0;
1624 // L-2 norm squared
1625 for (int d = 0; d < SDim; ++d)
1626 {
1627 real_t tmp = pptr[idx + d * npts] - phys_coord[d];
1628 dist += tmp * tmp;
1629 }
1630 if (dist < dists[MFEM_THREAD_ID(x)])
1631 {
1632 // closer guess in physical space
1633 dists[MFEM_THREAD_ID(x)] = dist;
1634 for (int d = 0; d < Dim; ++d)
1635 {
1636 ref_buf[MFEM_THREAD_ID(x) + d * n] = qptr[idcs[d]];
1637 }
1638 }
1639 }
1640 // now do tree reduce
1641 for (int i = (MFEM_THREAD_SIZE(x) >> 1); i > 0; i >>= 1)
1642 {
1643 MFEM_SYNC_THREAD;
1644 if (MFEM_THREAD_ID(x) < i)
1645 {
1646 if (dists[MFEM_THREAD_ID(x) + i] < dists[MFEM_THREAD_ID(x)])
1647 {
1648 dists[MFEM_THREAD_ID(x)] = dists[MFEM_THREAD_ID(x) + i];
1649 for (int d = 0; d < Dim; ++d)
1650 {
1651 ref_buf[MFEM_THREAD_ID(x) + d * n] =
1652 ref_buf[MFEM_THREAD_ID(x) + i + d * n];
1653 }
1654 }
1655 }
1656 }
1657 // write results out
1658 MFEM_SYNC_THREAD;
1659 MFEM_FOREACH_THREAD(d, x, Dim) { xptr[idx + d * npts] = ref_buf[d * n]; }
1660 }
1661};
1662
1663template <int Geom, int SDim, bool use_device>
1664static void ClosestPhysNodeImpl(int npts, int nelems, int ndof1d, int nq1d,
1665 const real_t *mptr, const real_t *pptr,
1666 const int *eptr, const real_t *nptr,
1667 const real_t *qptr, real_t *xptr)
1668{
1669 constexpr int max_team_x = 64;
1670 constexpr int max_q1d = 128;
1671 PhysNodeFinder<Geom, SDim, max_team_x, max_q1d> func;
1672 MFEM_VERIFY(ndof1d <= 32, "maximum of 32 dofs per dim is allowed");
1673 MFEM_VERIFY(nq1d <= max_q1d, "maximum of 128 test points per dim is allowed");
1674 func.basis1d.z = nptr;
1675 func.basis1d.pN = ndof1d;
1676 func.mptr = mptr;
1677 func.pptr = pptr;
1678 func.eptr = eptr;
1679 func.qptr = qptr;
1680 func.xptr = xptr;
1681 func.npts = npts;
1682 func.nq1d = nq1d;
1683 func.nq = func.compute_nq(nq1d);
1684 if (use_device)
1685 {
1686 // team_x must be a power of 2
1687 int team_x = max_team_x;
1688 while (true)
1689 {
1690 if (team_x <= func.nq)
1691 {
1692 break;
1693 }
1694 team_x >>= 1;
1695 }
1696 team_x = std::min<int>(max_team_x, 2 * team_x);
1697 forall_2D(npts, team_x, 1, func);
1698 }
1699 else
1700 {
1701 forall_switch(false, npts, func);
1702 }
1703}
1704
1705template <int Geom, int SDim, bool use_device>
1706static void ClosestPhysDofImpl(int npts, int nelems, int ndof1d,
1707 const real_t *mptr, const real_t *pptr,
1708 const int *eptr, const real_t *nptr,
1709 real_t *xptr)
1710{
1711 constexpr int max_team_x = 64;
1712 PhysDofFinder<Geom, SDim, max_team_x> func;
1713 func.basis1d.z = nptr;
1714 func.basis1d.pN = ndof1d;
1715 func.mptr = mptr;
1716 func.pptr = pptr;
1717 func.eptr = eptr;
1718 func.xptr = xptr;
1719 func.npts = npts;
1720 if (use_device)
1721 {
1722 int team_x = max_team_x;
1723 int ndof = func.ndofs(ndof1d);
1724 while (true)
1725 {
1726 if (team_x <= ndof)
1727 {
1728 break;
1729 }
1730 team_x >>= 1;
1731 }
1732 team_x = std::min<int>(max_team_x, 2 * team_x);
1733 forall_2D(npts, team_x, 1, func);
1734 }
1735 else
1736 {
1737 forall_switch(false, npts, func);
1738 }
1739}
1740
1741template <int Geom, int SDim, bool use_device>
1742static void ClosestRefDofImpl(int npts, int nelems, int ndof1d,
1743 const real_t *mptr, const real_t *pptr,
1744 const int *eptr, const real_t *nptr,
1745 real_t *xptr)
1746{
1747 // TODO
1748 MFEM_ABORT("ClostestRefDofImpl not implemented yet");
1749}
1750
1751template <int Geom, int SDim, bool use_device>
1752static void ClosestRefNodeImpl(int npts, int nelems, int ndof1d, int nq1d,
1753 const real_t *mptr, const real_t *pptr,
1754 const int *eptr, const real_t *nptr,
1755 const real_t *qptr, real_t *xptr)
1756{
1757 // TODO
1758 MFEM_ABORT("ClostestRefNodeImpl not implemented yet");
1759}
1760
1761template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1762 bool use_device>
1763static void NewtonSolveImpl(real_t ref_tol, real_t phys_rtol, int max_iter,
1764 int npts, int nelems, int ndof1d,
1765 const real_t *mptr, const real_t *pptr,
1766 const int *eptr, const real_t *nptr, int *tptr,
1767 int *iter_ptr, real_t *xptr)
1768{
1769 constexpr int max_team_x = use_device ? 64 : 1;
1770 InvTNewtonSolver<Geom, SDim, SType, max_team_x> func;
1771 MFEM_VERIFY(ndof1d <= func.max_dof1d(),
1772 "exceeded max_dof1d limit (32 for 2D/3D)");
1773 func.ref_tol = ref_tol;
1774 func.phys_rtol = phys_rtol;
1775 func.max_iter = max_iter;
1776 func.basis1d.z = nptr;
1777 func.basis1d.pN = ndof1d;
1778 func.mptr = mptr;
1779 func.pptr = pptr;
1780 func.eptr = eptr;
1781 func.xptr = xptr;
1782 func.iter_ptr = iter_ptr;
1783 func.tptr = tptr;
1784 func.npts = npts;
1785 if (use_device)
1786 {
1787 int team_x = max_team_x;
1788 int ndof = func.ndofs(ndof1d);
1789 while (true)
1790 {
1791 if (team_x <= ndof)
1792 {
1793 break;
1794 }
1795 team_x >>= 1;
1796 }
1797 team_x = std::min<int>(max_team_x, 2 * team_x);
1798 forall_2D(npts, team_x, 1, func);
1799 }
1800 else
1801 {
1802 forall_switch(false, npts, func);
1803 }
1804}
1805
1806template <int Geom, int SDim,
1807 InverseElementTransformation::SolverType SolverType, int max_team_x>
1808struct InvTNewtonEdgeScanner
1809{
1810 InvTNewtonSolver<Geom, SDim, SolverType, max_team_x> solver;
1811 // 1D ref space initial guesses
1812 const real_t *qptr;
1813 // num 1D points in qptr
1814 int nq1d;
1815
1816 void MFEM_HOST_DEVICE operator()(int idx) const
1817 {
1818 // can only be outside if all test points report outside
1819 int res = InverseElementTransformation::Outside;
1820 for (int i = 0; i < nq1d; ++i)
1821 {
1822 for (int d = 0; d < eltrans::GeometryUtils<Geom>::Dimension();
1823 ++d)
1824 {
1825 if (MFEM_THREAD_ID(x) == 0)
1826 {
1827 for (int d2 = 0;
1828 d2 < eltrans::GeometryUtils<Geom>::Dimension();
1829 ++d2)
1830 {
1831 solver.xptr[idx + d2 * solver.npts] = 0;
1832 }
1833 solver.xptr[idx + d * solver.npts] = qptr[i];
1834 }
1835 MFEM_SYNC_THREAD;
1836 int res_tmp = solver(idx);
1837 switch (res_tmp)
1838 {
1839 case InverseElementTransformation::Inside:
1840 return;
1841 case InverseElementTransformation::Outside:
1842 break;
1843 case InverseElementTransformation::Unknown:
1844 res = InverseElementTransformation::Unknown;
1845 break;
1846 }
1847 if (qptr[i] == 0)
1848 {
1849 // don't repeat test the origin
1850 break;
1851 }
1852 }
1853 }
1854 if (MFEM_THREAD_ID(x) == 0)
1855 {
1856 solver.tptr[idx] = res;
1857 }
1858 }
1859};
1860
1861template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1862 bool use_device>
1863static void NewtonEdgeScanImpl(real_t ref_tol, real_t phys_rtol, int max_iter,
1864 int npts, int nelems, int ndof1d,
1865 const real_t *mptr, const real_t *pptr,
1866 const int *eptr, const real_t *nptr,
1867 const real_t *qptr, int nq1d, int *tptr,
1868 int *iter_ptr, real_t *xptr)
1869{
1870 constexpr int max_team_x = use_device ? 64 : 1;
1871 InvTNewtonEdgeScanner<Geom, SDim, SType, max_team_x> func;
1872 MFEM_VERIFY(ndof1d <= func.solver.max_dof1d(),
1873 "exceeded max_dof1d limit (32 for 2D/3D)");
1874 func.solver.ref_tol = ref_tol;
1875 func.solver.phys_rtol = phys_rtol;
1876 func.solver.max_iter = max_iter;
1877 func.solver.basis1d.z = nptr;
1878 func.solver.basis1d.pN = ndof1d;
1879 func.solver.mptr = mptr;
1880 func.solver.pptr = pptr;
1881 func.solver.eptr = eptr;
1882 func.solver.xptr = xptr;
1883 func.solver.iter_ptr = iter_ptr;
1884 func.solver.tptr = tptr;
1885 func.solver.npts = npts;
1886 func.nq1d = nq1d;
1887 func.qptr = qptr;
1888 if (use_device)
1889 {
1890 int team_x = max_team_x;
1891 int ndof = func.solver.ndofs(ndof1d);
1892 while (true)
1893 {
1894 if (team_x <= ndof)
1895 {
1896 break;
1897 }
1898 team_x >>= 1;
1899 }
1900 team_x = std::min<int>(max_team_x, 2 * team_x);
1901 forall_2D(npts, team_x, 1, func);
1902 }
1903 else
1904 {
1905 forall_switch(false, npts, func);
1906 }
1907}
1908
1909} // namespace internal
1910
1911template <int Geom, int SDim, bool use_device>
1913BatchInverseElementTransformation::FindClosestPhysPoint::Kernel()
1914{
1915 return internal::ClosestPhysNodeImpl<Geom, SDim, use_device>;
1916}
1917
1918template <int Geom, int SDim, bool use_device>
1920BatchInverseElementTransformation::FindClosestPhysDof::Kernel()
1921{
1922 return internal::ClosestPhysDofImpl<Geom, SDim, use_device>;
1923}
1924
1925template <int Geom, int SDim, bool use_device>
1927BatchInverseElementTransformation::FindClosestRefPoint::Kernel()
1928{
1929 return internal::ClosestRefNodeImpl<Geom, SDim, use_device>;
1930}
1931
1932template <int Geom, int SDim, bool use_device>
1934BatchInverseElementTransformation::FindClosestRefDof::Kernel()
1935{
1936 return internal::ClosestRefDofImpl<Geom, SDim, use_device>;
1937}
1938
1939template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1940 bool use_device>
1942BatchInverseElementTransformation::NewtonSolve::Kernel()
1943{
1944 return internal::NewtonSolveImpl<Geom, SDim, SType, use_device>;
1945}
1946
1947template <int Geom, int SDim, InverseElementTransformation::SolverType SType,
1948 bool use_device>
1950BatchInverseElementTransformation::NewtonEdgeScan::Kernel()
1951{
1952 return internal::NewtonEdgeScanImpl<Geom, SDim, SType, use_device>;
1953}
1954
1956BatchInverseElementTransformation::FindClosestPhysPoint::Fallback(int, int,
1957 bool)
1958{
1959 MFEM_ABORT("Invalid Geom/SDim combination");
1960}
1961
1963BatchInverseElementTransformation::FindClosestRefPoint::Fallback(int, int,
1964 bool)
1965{
1966 MFEM_ABORT("Invalid Geom/SDim combination");
1967}
1968
1970BatchInverseElementTransformation::NewtonSolve::Fallback(
1972{
1973 MFEM_ABORT("Invalid Geom/SDim/SolverType combination");
1974}
1975
1977BatchInverseElementTransformation::NewtonEdgeScan::Fallback(
1979{
1980 MFEM_ABORT("Invalid Geom/SDim/SolverType combination");
1981}
1982
1984BatchInverseElementTransformation::FindClosestPhysDof::Fallback(int, int, bool)
1985{
1986 MFEM_ABORT("Invalid Geom/SDim combination");
1987}
1988
1990BatchInverseElementTransformation::FindClosestRefDof::Fallback(int, int, bool)
1991{
1992 MFEM_ABORT("Invalid Geom/SDim combination");
1993}
1994
1996{
1997 using BatchInvTr = BatchInverseElementTransformation;
1998
1999 constexpr auto SEGMENT = Geometry::SEGMENT;
2000 constexpr auto SQUARE = Geometry::SQUARE;
2001 constexpr auto CUBE = Geometry::CUBE;
2002 constexpr auto Newton = InverseElementTransformation::Newton;
2003 constexpr auto NewtonElementProject =
2005
2006 BatchInvTr::AddFindClosestSpecialization<SEGMENT, 1>();
2007 BatchInvTr::AddFindClosestSpecialization<SEGMENT, 2>();
2008 BatchInvTr::AddFindClosestSpecialization<SEGMENT, 3>();
2009
2010 BatchInvTr::AddFindClosestSpecialization<SQUARE, 2>();
2011 BatchInvTr::AddFindClosestSpecialization<SQUARE, 3>();
2012
2013 BatchInvTr::AddFindClosestSpecialization<CUBE, 3>();
2014
2015 // NewtonSolve
2016 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 1, Newton>();
2017 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 2, Newton>();
2018 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 3, Newton>();
2019
2020 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 1, NewtonElementProject>();
2021 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 2, NewtonElementProject>();
2022 BatchInvTr::AddNewtonSolveSpecialization<SEGMENT, 3, NewtonElementProject>();
2023
2024 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 2, Newton>();
2025 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 3, Newton>();
2026 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 2, NewtonElementProject>();
2027 BatchInvTr::AddNewtonSolveSpecialization<SQUARE, 3, NewtonElementProject>();
2028
2029 BatchInvTr::AddNewtonSolveSpecialization<CUBE, 3, Newton>();
2030 BatchInvTr::AddNewtonSolveSpecialization<CUBE, 3, NewtonElementProject>();
2031}
2032
2033/// \endcond DO_NOT_DOCUMENT
2034
2035} // namespace mfem
const T * HostRead() const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), false).
Definition array.hpp:341
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:758
int Size() const
Return the logical size of the array.
Definition array.hpp:147
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:345
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
static int Check(int b_type)
If the input does not represents a valid BasisType, abort with an error; otherwise return the input.
Definition fe_base.hpp:49
@ GaussLobatto
Closed type.
Definition fe_base.hpp:36
@ ClosedUniform
Nodes: x_i = i/(n-1), i=0,...,n-1.
Definition fe_base.hpp:39
static int GetNodalBasis(int qpt_type)
Return the nodal BasisType corresponding to the Quadrature1D type.
Definition fe_base.hpp:82
Performs batch inverse element transforms. Currently only supports non-mixed meshes with SEGMENT,...
Definition eltrans.hpp:406
void UpdateNodes(const GridFunction &nodes, MemoryType d_mt=MemoryType::DEFAULT)
Updates internal datastructures if nodes change. Some version of UpdateNodes must be called at least ...
void(*)(real_t, real_t, int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, const real_t *, int, int *, int *, real_t *) NewtonEdgeScanKernelType
Definition eltrans.hpp:591
void(*)(int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, const real_t *, real_t *) ClosestRefPointKernelType
Definition eltrans.hpp:572
void(*)(int, int, int, const real_t *, const real_t *, const int *, const real_t *, real_t *) ClosestRefDofKernelType
Definition eltrans.hpp:564
void Transform(const Vector &pts, const Array< int > &elems, Array< int > &types, Vector &refs, bool use_device=true, Array< int > *iters=nullptr) const
Performs a batch request of a set of points belonging to the given elements. pts list of physical poi...
void(*)(int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, const real_t *, real_t *) ClosestPhysPointKernelType
Definition eltrans.hpp:546
void(*)(int, int, int, const real_t *, const real_t *, const int *, const real_t *, real_t *) ClosestPhysDofKernelType
Definition eltrans.hpp:555
void(*)(real_t, real_t, int, int, int, int, const real_t *, const real_t *, const int *, const real_t *, int *, int *, real_t *) NewtonKernelType
Definition eltrans.hpp:581
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Definition fespace.hpp:244
bool IsVariableOrder() const
Returns true if the space contains elements of varying polynomial orders.
Definition fespace.hpp:709
virtual const FiniteElement * GetFE(int i) const
Returns pointer to the FiniteElement in the FiniteElementCollection associated with i'th element in t...
Definition fespace.cpp:3835
int GetNE() const
Returns number of elements in the mesh.
Definition fespace.hpp:891
const ElementRestrictionOperator * GetElementRestriction(ElementDofOrdering e_ordering) const
Return an Operator that converts L-vectors to E-vectors.
Definition fespace.cpp:1510
Mesh * GetMesh() const
Returns the mesh.
Definition fespace.hpp:679
int GetVDim() const
Returns the vector dimension of the finite element space.
Definition fespace.hpp:841
const FiniteElement * GetTypicalFE() const
Return GetFE(0) if the local mesh is not empty; otherwise return a typical FE based on the Geometry t...
Definition fespace.cpp:3871
virtual int GetMaxElementOrder() const
Return the maximum polynomial order over all elements.
Definition fespace.hpp:705
Abstract class for all finite elements.
Definition fe_base.hpp:244
int GetOrder() const
Returns the order of the finite element. In the case of anisotropic orders, returns the maximum order...
Definition fe_base.hpp:338
int GetDim() const
Returns the reference space dimension for the finite element.
Definition fe_base.hpp:321
Geometry::Type GetGeomType() const
Returns the Geometry::Type of the reference element.
Definition fe_base.hpp:331
const IntegrationPoint & GetCenter(int GeomType) const
Return the center of the given Geometry::Type, GeomType.
Definition geom.hpp:75
Class for grid function - Vector with associated FE space.
Definition gridfunc.hpp:31
FiniteElementSpace * FESpace()
virtual void GetVectorValue(int i, const IntegrationPoint &ip, Vector &val) const
Definition gridfunc.cpp:473
Class for integration point with weight.
Definition intrules.hpp:35
@ GivenPoint
Use a specific point, set with SetInitialGuess().
Definition eltrans.hpp:214
@ Center
Use the center of the reference element.
Definition eltrans.hpp:205
Mesh data type.
Definition mesh.hpp:64
int Dimension() const
Dimension of the reference space used within the elements.
Definition mesh.hpp:1216
void GetNodes(Vector &node_coord) const
Definition mesh.cpp:9294
int GetNumGeometries(int dim) const
Return the number of geometries of the given dimension present in the mesh.
Definition mesh.cpp:7243
Abstract operator.
Definition operator.hpp:25
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
const Array< real_t > * GetPointsArray(const int p, const int btype)
Get the coordinates of the points of the given BasisType, btype.
Definition fe_base.cpp:2344
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
virtual real_t * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:506
1D Lagrange basis from [0, 1]
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
Geometry Geometries
Definition fe.cpp:49
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:762
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
Poly_1D poly1d
Definition fe.cpp:28
void forall_switch(bool use_dev, int N, lambda &&body)
Definition forall.hpp:756
void solve(Mesh &mesh, GridFunction &color, socketstream &sock)
Definition rubik.cpp:4939
@ DEVICE_MASK
Biwise-OR of all device backends.
Definition device.hpp:97
void pts(int iphi, int t, real_t x[])