MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
findpts_local_2.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "../gslib.hpp"
15
16#ifdef MFEM_USE_GSLIB
17
18#ifdef MFEM_HAVE_GCC_PRAGMA_DIAGNOSTIC
19#pragma GCC diagnostic push
20#pragma GCC diagnostic ignored "-Wunused-function"
21#endif
22#include "gslib.h"
23#ifndef GSLIB_RELEASE_VERSION //gslib v1.0.7
24#define GSLIB_RELEASE_VERSION 10007
25#endif
26#ifdef MFEM_HAVE_GCC_PRAGMA_DIAGNOSTIC
27#pragma GCC diagnostic pop
28#endif
29
30#include <climits>
31
32namespace mfem
33{
34#if GSLIB_RELEASE_VERSION >= 10009
35#define CODE_INTERNAL 0
36#define CODE_BORDER 1
37#define CODE_NOT_FOUND 2
38#define DIM 2
39#define DIM2 4
40
41struct findptsElementPoint_t
42{
43 double x[DIM], r[DIM], oldr[DIM], dist2, dist2p, tr;
44 int flags;
45};
46
47struct findptsElementGEdge_t
48{
49 double *x[DIM], *dxdn[2];
50};
51
52struct findptsElementGPT_t
53{
54 double x[DIM], jac[DIM * DIM], hes[4];
55};
56
57struct dbl_range_t
58{
59 double min, max;
60};
61struct obbox_t
62{
63 double c0[DIM], A[DIM * DIM];
64 dbl_range_t x[DIM];
65};
66
67struct findptsLocalHashData_t
68{
69 int hash_n;
70 dbl_range_t bnd[DIM];
71 double fac[DIM];
72 unsigned int *offset;
73 int max;
74};
75
76// Eval the ith Lagrange interpolant and its first derivative at x.
77// Note: lCoeff stores pre-computed coefficients for fast evaluation.
78static MFEM_HOST_DEVICE inline void lag_eval_first_der(double *p0, double x,
79 int i, const double *z,
80 const double *lCoeff,
81 int pN)
82{
83 double u0 = 1, u1 = 0;
84 for (int j = 0; j < pN; ++j)
85 {
86 if (i != j)
87 {
88 double d_j = 2 * (x - z[j]);
89 u1 = d_j * u1 + u0;
90 u0 = d_j * u0;
91 }
92 }
93 p0[i] = lCoeff[i] * u0;
94 p0[pN+i] = 2.0 * lCoeff[i] * u1;
95}
96
97// Eval the ith Lagrange interpolant and its first and second derivative at x.
98// Note: lCoeff stores pre-computed coefficients for fast evaluation.
99static MFEM_HOST_DEVICE inline void lag_eval_second_der(double *p0, double x,
100 int i, const double *z,
101 const double *lCoeff,
102 int pN)
103{
104 double u0 = 1, u1 = 0, u2 = 0;
105 for (int j = 0; j < pN; ++j)
106 {
107 if (i != j)
108 {
109 double d_j = 2 * (x - z[j]);
110 u2 = d_j * u2 + u1;
111 u1 = d_j * u1 + u0;
112 u0 = d_j * u0;
113 }
114 }
115 p0[i] = lCoeff[i] * u0;
116 p0[pN+i] = 2.0 * lCoeff[i] * u1;
117 p0[2*pN+i] = 8.0 * lCoeff[i] * u2;
118}
119
120// Axis-aligned bounding box test.
121static MFEM_HOST_DEVICE inline double AABB_test(const obbox_t *const b,
122 const double x[2])
123{
124 double test = 1;
125 for (int d = 0; d < 2; ++d)
126 {
127 double b_d = (x[d] - b->x[d].min) * (b->x[d].max - x[d]);
128 test = test < 0 ? test : b_d;
129 }
130 return test;
131}
132
133// Axis-aligned bounding box test followed by oriented bounding-box test.
134static MFEM_HOST_DEVICE inline double bbox_test(const obbox_t *const b,
135 const double x[2])
136{
137 const double bxyz = AABB_test(b, x);
138 if (bxyz < 0)
139 {
140 return bxyz;
141 }
142 else
143 {
144 double dxyz[2];
145 for (int d = 0; d < 2; ++d)
146 {
147 dxyz[d] = x[d] - b->c0[d];
148 }
149 double test = 1;
150 for (int d = 0; d < 2; ++d)
151 {
152 double rst = 0;
153 for (int e = 0; e < 2; ++e)
154 {
155 rst += b->A[d * 2 + e] * dxyz[e];
156 }
157 double brst = (rst + 1) * (1 - rst);
158 test = test < 0 ? test : brst;
159 }
160 return test;
161 }
162}
163
164// Element index corresponding to hash mesh that the point is located in.
165static MFEM_HOST_DEVICE inline int hash_index(const findptsLocalHashData_t *p,
166 const double x[2])
167{
168 const int n = p->hash_n;
169 int sum = 0;
170 for (int d = 2 - 1; d >= 0; --d)
171 {
172 sum *= n;
173 int i = (int)floor((x[d] - p->bnd[d].min) * p->fac[d]);
174 sum += i < 0 ? 0 : (n - 1 < i ? n - 1 : i);
175 }
176 return sum;
177}
178
179/*Solve Ax=y. A is row-major */
180static MFEM_HOST_DEVICE inline void lin_solve_2(double x[2], const double A[4],
181 const double y[2])
182{
183 const double idet = 1/(A[0]*A[3] - A[1]*A[2]);
184 x[0] = idet*(A[3]*y[0] - A[1]*y[1]);
185 x[1] = idet*(A[0]*y[1] - A[2]*y[0]);
186}
187
188/* L2 norm squared. */
189static MFEM_HOST_DEVICE inline double l2norm2(const double x[2])
190{
191 return x[0] * x[0] + x[1] * x[1];
192}
193
194/* the bit structure of flags is CSSRR
195 the C bit --- 1<<4 --- is set when the point is converged
196 RR is 0 = 00b if r is unconstrained,
197 1 = 01b if r is constrained at -1
198 2 = 10b if r is constrained at +1
199 SS is similarly for s constraints
200 SSRR = smax,smin,rmax,rmin
201*/
202#define CONVERGED_FLAG (1u<<4)
203#define FLAG_MASK 0x1fu // set all 5 bits to 1
204
205/* Determine number of constraints based on the CTTSSRR bits. */
206static MFEM_HOST_DEVICE inline int num_constrained(const int flags)
207{
208 const int y = flags | flags >> 1;
209 return (y&1u) + (y>>2 & 1u);
210}
211
212// Helper functions. Assumes x = 0, 1, or 2.
213static MFEM_HOST_DEVICE inline int plus_1_mod_2(const int x)
214{
215 return x ^ 1u;
216}
217// assumes x = 1 << i, with i < 4, returns i+1.
218static MFEM_HOST_DEVICE inline int which_bit(const int x)
219{
220 const int y = x & 7u;
221 return (y-(y>>2)) | ((x-1)&4u);
222}
223
224// Get edge index based on the SSRR bits.
225static MFEM_HOST_DEVICE inline int edge_index(const int x)
226{
227 return which_bit(x)-1;
228}
229
230// Gets vertex index based SSRR bits.
231static MFEM_HOST_DEVICE inline int point_index(const int x)
232{
233 return ((x>>1)&1u) | ((x>>2)&2u);
234}
235
236// compute (x,y) and (dxdn, dydn) data for all DOFs along the edge based on
237// edge index. ei=0..3 corresponding to rmin, rmax, smin, smax.
238static MFEM_HOST_DEVICE inline findptsElementGEdge_t
239get_edge(const double *elx[2], const double *wtend, int ei,
240 double *workspace,
241 int &side_init, int j, int pN) // Assumes j < pN
242{
243 findptsElementGEdge_t edge;
244 const int jidx = ei >= 2 ? j : ei*(pN-1);
245 const int kidx = ei >= 2 ? (ei-2)*(pN-1) : j;
246
247 // location of derivatives based on whether we want at r/s=-1 or r/s=+1
248 // ei == 0 and 2 are constrained at -1, 1 and 3 are constrained at +1.
249 const double *wt1 = wtend + (ei%2==0 ? 0 : 1)* pN * 3 + pN;
250
251 for (int d = 0; d < 2; ++d)
252 {
253 edge.x[d] = workspace + d * pN; //x & y coordinates of DOFS along edge
254 edge.dxdn[d] = workspace + (2 + d) * pN; //dxdn and dydn at DOFs along edge
255 }
256
257 if (side_init != (1u << ei))
258 {
259#define ELX(d, j, k) elx[d][j + k * pN] // assumes lexicographic ordering
260 for (int d = 0; d < 2; ++d)
261 {
262 // copy nodal coordinates along the constrained edge
263 edge.x[d][j] = ELX(d, jidx, kidx);
264
265 // compute derivative in normal direction.
266 double sums_k = 0.0;
267 for (int k = 0; k < pN; ++k)
268 {
269 if (ei >= 2)
270 {
271 sums_k += wt1[k] * ELX(d, j, k);
272 }
273 else
274 {
275 sums_k += wt1[k] * ELX(d, k, j);
276 }
277 }
278 edge.dxdn[d][j] = sums_k;
279 }
280#undef ELX
281 }
282 return edge;
283}
284
285//pi=0, r=-1,s=-1
286//pi=1, r=+1,s=-1
287//pi=2, r=-1,s=+1
288//pi=3, r=+1,s=+1
289static MFEM_HOST_DEVICE inline findptsElementGPT_t get_pt(const double *elx[2],
290 const double *wtend,
291 int pi, int pN)
292{
293 findptsElementGPT_t pt;
294
295#define ELX(d, j, k) elx[d][j + k * pN]
296
297 int r_g_wt_offset = pi % 2 == 0 ? 0 : 1; //wtend offset for gradient
298 int s_g_wt_offset = pi < 2 ? 0 : 1;
299 int jidx = pi % 2 == 0 ? 0 : pN-1;
300 int kidx = pi < 2 ? 0 : pN-1;
301
302 pt.x[0] = ELX(0, jidx, kidx);
303 pt.x[1] = ELX(1, jidx, kidx);
304
305 pt.jac[0] = 0.0;
306 pt.jac[1] = 0.0;
307 pt.jac[2] = 0.0;
308 pt.jac[3] = 0.0;
309 for (int j = 0; j < pN; ++j)
310 {
311 //dx/dr
312 pt.jac[0] += wtend[3 * r_g_wt_offset * pN + pN + j] * ELX(0, j, kidx);
313
314 // dy/dr
315 pt.jac[2] += wtend[3 * r_g_wt_offset * pN + pN + j] * ELX(1, j, kidx);
316
317 // dx/ds
318 pt.jac[1] += wtend[3 * s_g_wt_offset * pN + pN + j] * ELX(0, kidx, j);
319
320 // dy/ds
321 pt.jac[3] += wtend[3 * s_g_wt_offset * pN + pN + j] * ELX(1, kidx, j);
322 }
323
324 pt.hes[0] = 0.0;
325 pt.hes[1] = 0.0;
326 pt.hes[2] = 0.0;
327 pt.hes[3] = 0.0;
328 for (int j = 0; j < pN; ++j)
329 {
330 //d2x/dr2
331 pt.hes[0] += wtend[3 * r_g_wt_offset * pN + 2*pN + j] * ELX(0, j, kidx);
332
333 // d2y/dr2
334 pt.hes[2] += wtend[3 * r_g_wt_offset * pN + 2*pN + j] * ELX(1, j, kidx);
335
336 // d2x/ds2
337 pt.hes[1] += wtend[3 * s_g_wt_offset * pN + 2*pN + j] * ELX(0, kidx, j);
338
339 // d2y/ds2
340 pt.hes[3] += wtend[3 * s_g_wt_offset * pN + 2*pN + j] * ELX(1, kidx, j);
341 }
342#undef ELX
343 return pt;
344}
345
346/* Check reduction in objective against prediction, and adjust trust region
347 radius (p->tr) accordingly. May reject the prior step, returning 1; otherwise
348 returns 0 sets res->dist2, res->index, res->x, res->oldr in any event,
349 leaving res->r, res->dr, res->flags to be set when returning 0 */
350static MFEM_HOST_DEVICE bool reject_prior_step_q(findptsElementPoint_t *res,
351 const double resid[2],
352 const findptsElementPoint_t *p,
353 const double tol)
354{
355 const double dist2 = l2norm2(resid);
356 const double decr = p->dist2 - dist2;
357 const double pred = p->dist2p;
358 for (int d = 0; d < 2; ++d)
359 {
360 res->x[d] = p->x[d];
361 res->oldr[d] = p->r[d];
362 }
363 res->dist2 = dist2;
364 if (decr >= 0.01 * pred)
365 {
366 if (decr >= 0.9 * pred)
367 {
368 // very good iteration
369 res->tr = p->tr * 2;
370 }
371 else
372 {
373 // good iteration
374 res->tr = p->tr;
375 }
376 return false;
377 }
378 else
379 {
380 /* reject step; note: the point will pass through this routine
381 again, and we set things up here so it gets classed as a
382 "very good iteration" --- this doubles the trust radius,
383 which is why we divide by 4 below */
384 double v0 = fabs(p->r[0] - p->oldr[0]);
385 double v1 = fabs(p->r[1] - p->oldr[1]);
386 res->tr = (v0 > v1 ? v0 : v1)/4;
387 res->dist2 = p->dist2;
388 for (int d = 0; d < 2; ++d)
389 {
390 res->r[d] = p->oldr[d];
391 }
392 res->flags = p->flags >> 5;
393 res->dist2p = -HUGE_VAL;
394 if (pred < dist2 * tol)
395 {
396 res->flags |= CONVERGED_FLAG;
397 }
398 return true;
399 }
400}
401
402/* Minimize 0.5||x* - x(r)||^2_2 using gradient-descent, with
403 |dr| <= tr and |r0+dr|<=1*/
404static MFEM_HOST_DEVICE void newton_area(findptsElementPoint_t *const res,
405 const double jac[4],
406 const double resid[2],
407 const findptsElementPoint_t *const p,
408 const double tol)
409{
410 const double tr = p->tr;
411 double bnd[4] = {-1, 1, -1, 1};
412 double r0[2];
413 double dr[2], fac;
414 int d, mask, flags;
415 r0[0] = p->r[0], r0[1] = p->r[1];
416
417 mask = 0xfu; // 1111 - MSB to LSB - smax,smin,rmax,rmin
418 for (d = 0; d < 2; ++d)
419 {
420 if (r0[d] - tr > -1)
421 {
422 bnd[2 * d] = r0[d] - tr, mask ^= 1u << (2 * d);
423 }
424 if (r0[d] + tr < 1)
425 {
426 bnd[2 * d + 1] = r0[d] + tr, mask ^= 2u << (2 * d);
427 }
428 }
429
430 // dr = Jac^-1*resid where resid = x^* - x(r)
431 lin_solve_2(dr, jac, resid);
432
433 fac = 1, flags = 0;
434 for (d = 0; d < 2; ++d)
435 {
436 double nr = r0[d] + dr[d];
437 if ((nr - bnd[2 * d]) * (bnd[2 * d + 1] - nr) >= 0)
438 {
439 continue;
440 }
441 if (nr < bnd[2 * d])
442 {
443 double f = (bnd[2 * d] - r0[d]) / dr[d];
444 if (f < fac)
445 {
446 fac = f, flags = 1u << (2 * d);
447 }
448 }
449 else
450 {
451 double f = (bnd[2 * d + 1] - r0[d]) / dr[d];
452 if (f < fac)
453 {
454 fac = f, flags = 2u << (2 * d);
455 }
456 }
457 }
458
459 if (flags == 0)
460 {
461 goto newton_area_fin;
462 }
463
464 for (d = 0; d < 2; ++d)
465 {
466 dr[d] *= fac;
467 }
468
469newton_area_edge :
470 {
471 const int ei = edge_index(flags);
472 const int dn = ei>>1, de = plus_1_mod_2(dn);
473 double facc = 1;
474 int new_flags = 0;
475 double ress[2], y, JtJ, drc;
476 ress[0] = resid[0] - (jac[0] * dr[0] + jac[1] * dr[1]);
477 ress[1] = resid[1] - (jac[2] * dr[0] + jac[3] * dr[1]);
478 /* y = J_u^T res */
479 y = jac[de] * ress[0] + jac[2+de] * ress[1];
480 /* JtJ = J_u^T J_u */
481 JtJ = jac[de] * jac[de] + jac[2+de] * jac[2+de];
482 drc = y / JtJ;
483 {
484 const double rz = r0[de] + dr[de], lb = bnd[2*de], ub = bnd[2*de+1];
485 const double nr = r0[de]+(dr[de]+drc);
486 if ((nr-lb) * (ub-nr) < 0)
487 {
488 if (nr < lb)
489 {
490 double f = (lb-rz)/drc;
491 if (f < facc)
492 {
493 facc=f;
494 new_flags = 1u<<(2*de);
495 }
496 }
497 else
498 {
499 double f = (ub-rz)/drc;
500 if (f < facc)
501 {
502 facc=f;
503 new_flags = 2u<<(2*de);
504 }
505 }
506 }
507 }
508
509 dr[de] += facc * drc;
510 flags |= new_flags;
511 goto newton_area_relax;
512 }
513
514 /* check and possibly relax constraints */
515newton_area_relax :
516 {
517 const int old_flags = flags;
518 double ress[2], y[2];
519 /* res := res_0 - J dr */
520 ress[0] = resid[0] - (jac[0] * dr[0] + jac[1] * dr[1]);
521 ress[1] = resid[1] - (jac[2] * dr[0] + jac[3] * dr[1]);
522 /* y := J^T res */
523 y[0] = jac[0] * ress[0] + jac[2] * ress[1];
524 y[1] = jac[1] * ress[0] + jac[3] * ress[1];
525 for (int dd = 0; dd < 2; ++dd)
526 {
527 int f = flags >> (2 * dd) & 3u;
528 if (f)
529 {
530 dr[dd] = bnd[2 * dd + (f - 1)] - r0[dd];
531 if (dr[dd] * y[dd] < 0)
532 {
533 flags &= ~(3u << (2 * dd));
534 }
535 }
536 }
537 if (flags == old_flags)
538 {
539 goto newton_area_fin;
540 }
541 switch (num_constrained(flags))
542 {
543 case 1:
544 goto newton_area_edge;
545 }
546 }
547
548newton_area_fin:
549 flags &= mask;
550 if (fabs(dr[0]) + fabs(dr[1]) < tol)
551 {
552 flags |= CONVERGED_FLAG;
553 }
554 {
555 const double res0 = resid[0] - (jac[0] * dr[0] + jac[1] * dr[1]);
556 const double res1 = resid[1] - (jac[2] * dr[0] + jac[3] * dr[1]);
557 res->dist2p = resid[0] * resid[0] + resid[1] * resid[1] -
558 (res0 * res0 + res1 * res1);
559 }
560 for (int dd = 0; dd < 2; ++dd)
561 {
562 int f = flags >> (2 * dd) & 3u;
563 res->r[dd] = f == 0 ? r0[dd] + dr[dd] : (f == 1 ? -1 : 1);
564 }
565 res->flags = flags | (p->flags << 5);
566}
567
568// Full Newton solve on the face. One of r/s/t is constrained.
569static MFEM_HOST_DEVICE inline void newton_edge(findptsElementPoint_t *const
570 res,
571 const double jac[4],
572 const double rhes,
573 const double resid[2],
574 const int de,
575 const int dn,
576 int flags,
577 const findptsElementPoint_t *const p,
578 const double tol)
579{
580 const double tr = p->tr;
581 /* A = J^T J - resid_d H_d */
582 const double A = jac[de] * jac[de] + jac[2 + de] * jac[2 + de] - rhes;
583 /* y = J^T r */
584 const double y = jac[de] * resid[0] + jac[2 + de] * resid[1];
585
586 const double oldr = p->r[de];
587 double dr, nr, tdr, tnr;
588 double v, tv;
589 int new_flags = 0, tnew_flags = 0;
590
591#define EVAL(dr) (dr * A - 2 * y) * dr
592
593 /* if A is not SPD, quadratic model has no minimum */
594 if (A > 0)
595 {
596 dr = y / A, nr = oldr + dr;
597 if (fabs(dr) < tr && fabs(nr) < 1)
598 {
599 v = EVAL(dr);
600 goto newton_edge_fin;
601 }
602 }
603
604 if ((nr = oldr - tr) > -1)
605 {
606 dr = -tr;
607 }
608 else
609 {
610 nr = -1, dr = -1 - oldr, new_flags = flags | 1u << (2 * de);
611 }
612 v = EVAL(dr);
613
614 if ((tnr = oldr + tr) < 1)
615 {
616 tdr = tr;
617 }
618 else
619 {
620 tnr = 1, tdr = 1 - oldr, tnew_flags = flags | 2u << (2 * de);
621 }
622 tv = EVAL(tdr);
623
624 if (tv < v)
625 {
626 nr = tnr, dr = tdr, v = tv, new_flags = tnew_flags;
627 }
628
629newton_edge_fin:
630 /* check convergence */
631 if (fabs(dr) < tol)
632 {
633 new_flags |= CONVERGED_FLAG;
634 }
635 res->r[de] = nr;
636 res->r[dn]=p->r[dn];
637 res->dist2p = -v;
638 res->flags = flags | new_flags | (p->flags << 5);
639}
640
641// Find closest mesh node to the sought point.
642static MFEM_HOST_DEVICE void seed_j(const double *elx[2],
643 const double x[2],
644 const double *z, //GLL point locations [-1, 1]
645 double *dist2,
646 double *r[2],
647 const int j,
648 const int pN)
649{
650 dist2[j] = HUGE_VAL;
651
652 double zr = z[j];
653 for (int k = 0; k < pN; ++k)
654 {
655 double zs = z[k];
656 const int jk = j + k * pN;
657 double dx[2];
658 for (int d = 0; d < 2; ++d)
659 {
660 dx[d] = x[d] - elx[d][jk];
661 }
662 const double dist2_jkl = l2norm2(dx);
663 if (dist2[j] > dist2_jkl)
664 {
665 dist2[j] = dist2_jkl;
666 r[0][j] = zr;
667 r[1][j] = zs;
668 }
669 }
670}
671
672/* Compute contribution towards function value and its derivatives in each
673 reference direction. */
674static MFEM_HOST_DEVICE double tensor_ig2_j(double *g_partials,
675 const double *Jr,
676 const double *Dr,
677 const double *Js,
678 const double *Ds,
679 const double *u,
680 const int j,
681 const int pN)
682{
683 double uJs = 0.0;
684 double uDs = 0.0;
685 for (int k = 0; k < pN; ++k)
686 {
687 uJs += u[j + k * pN] * Js[k];
688 uDs += u[j + k * pN] * Ds[k];
689 }
690
691 g_partials[0] = uJs * Dr[j];
692 g_partials[1] = uDs * Jr[j];
693 return uJs * Jr[j];
694}
695
696template<int T_D1D = 0>
697static void FindPointsLocal2D_Kernel(const int npt,
698 const double tol,
699 const double *x,
700 const int point_pos_ordering,
701 const double *xElemCoord,
702 const int nel,
703 const double *wtend,
704 const double *boxinfo,
705 const int hash_n,
706 const double *hashMin,
707 const double *hashFac,
708 unsigned int *hashOffset,
709 unsigned int *const code_base,
710 unsigned int *const el_base,
711 double *const r_base,
712 double *const dist2_base,
713 const double *gll1D,
714 const double *lagcoeff,
715 const int pN = 0)
716{
717#define MAX_CONST(a, b) (((a) > (b)) ? (a) : (b))
718 const int MD1 = T_D1D ? T_D1D : DofQuadLimits::MAX_D1D;
719 const int D1D = T_D1D ? T_D1D : pN;
720 const int p_NE = D1D*D1D;
721 const int p_NEL = nel*p_NE;
722 MFEM_VERIFY(MD1 <= DofQuadLimits::MAX_D1D,
723 "Increase Max allowable polynomial order.");
724 MFEM_VERIFY(D1D != 0, "Polynomial order not specified.");
725 const int nThreads = D1D*DIM;
726
727 mfem::forall_2D(npt, nThreads, 1, [=] MFEM_HOST_DEVICE (int i)
728 {
729 // 3D1D for seed, 10D1D+6 for area, 3D1D+9 for edge
730 constexpr int size1 = 10*MD1 + 6;
731 constexpr int size2 = MD1*4; // edge constraints
732 constexpr int size3 = MD1*MD1*MD1*DIM; // local element coordinates
733
734 MFEM_SHARED double r_workspace[size1];
735 MFEM_SHARED findptsElementPoint_t el_pts[2];
736
737 MFEM_SHARED double constraint_workspace[size2];
738 MFEM_SHARED int edge_init;
739
740 MFEM_SHARED double elem_coords[MD1 <= 6 ? size3 : 1];
741
742 double *r_workspace_ptr;
743 findptsElementPoint_t *fpt, *tmp;
744 MFEM_FOREACH_THREAD(j,x,nThreads)
745 {
746 r_workspace_ptr = r_workspace;
747 fpt = el_pts + 0;
748 tmp = el_pts + 1;
749 }
750 MFEM_SYNC_THREAD;
751
752 int id_x = point_pos_ordering == 0 ? i : i*DIM;
753 int id_y = point_pos_ordering == 0 ? i+npt : i*DIM+1;
754 double x_i[2] = {x[id_x], x[id_y]};
755
756 unsigned int *code_i = code_base + i;
757 unsigned int *el_i = el_base + i;
758 double *r_i = r_base + DIM * i;
759 double *dist2_i = dist2_base + i;
760
761 // Initialize the code and dist
762 *code_i = CODE_NOT_FOUND;
763 *dist2_i = HUGE_VAL;
764
765 //// map_points_to_els ////
766 findptsLocalHashData_t hash;
767 for (int d = 0; d < DIM; ++d)
768 {
769 hash.bnd[d].min = hashMin[d];
770 hash.fac[d] = hashFac[d];
771 }
772 hash.hash_n = hash_n;
773 hash.offset = hashOffset;
774 const int hi = hash_index(&hash, x_i);
775 const unsigned int *elp = hash.offset+hash.offset[hi],
776 *const ele = hash.offset+hash.offset[hi+1];
777
778 for (; elp != ele; ++elp)
779 {
780 //elp
781 const unsigned int el = *elp;
782
783 // construct obbox_t on the fly from data
784 obbox_t box;
785 int n_box_ents = 3*DIM + DIM2;
786
787 for (int idx = 0; idx < DIM; ++idx)
788 {
789 box.c0[idx] = boxinfo[n_box_ents*el + idx];
790 box.x[idx].min = boxinfo[n_box_ents*el + DIM + idx];
791 box.x[idx].max = boxinfo[n_box_ents*el + 2*DIM + idx];
792 }
793
794 for (int idx = 0; idx < DIM2; ++idx)
795 {
796 box.A[idx] = boxinfo[n_box_ents*el + 3*DIM + idx];
797 }
798
799 if (bbox_test(&box, x_i) < 0) { continue; }
800
801 //// findpts_local ////
802 {
803 // read element coordinates into shared memory
804 if (MD1 <= 6)
805 {
806 MFEM_FOREACH_THREAD(j,x,nThreads)
807 {
808 const int qp = j % D1D;
809 const int d = j / D1D;
810 for (int k = 0; k < D1D; ++k)
811 {
812 const int jk = qp + k * D1D;
813 elem_coords[jk + d*p_NE] =
814 xElemCoord[jk + el*p_NE + d*p_NEL];
815 }
816 }
817 MFEM_SYNC_THREAD;
818 }
819
820 const double *elx[DIM];
821 for (int d = 0; d < DIM; d++)
822 {
823 elx[d] = MD1<= 6 ? &elem_coords[d*p_NE] :
824 xElemCoord + d*p_NEL + el * p_NE;
825 }
826
827 //// findpts_el ////
828 {
829 MFEM_FOREACH_THREAD(j,x,1)
830 {
831 fpt->dist2 = HUGE_VAL;
832 fpt->dist2p = 0;
833 fpt->tr = 1;
834 edge_init = 0;
835 }
836 MFEM_FOREACH_THREAD(j,x,DIM)
837 {
838 fpt->x[j] = x_i[j];
839 }
840 MFEM_SYNC_THREAD;
841
842 //// seed ////
843 {
844 double *dist2_temp = r_workspace_ptr;
845 double *r_temp[DIM];
846 for (int d = 0; d < DIM; ++d)
847 {
848 r_temp[d] = dist2_temp + (1 + d) * D1D;
849 }
850
851 MFEM_FOREACH_THREAD(j,x,D1D)
852 {
853 seed_j(elx, x_i, gll1D, dist2_temp, r_temp, j, D1D);
854 }
855 MFEM_SYNC_THREAD;
856
857 MFEM_FOREACH_THREAD(j,x,1)
858 {
859 fpt->dist2 = HUGE_VAL;
860 for (int jj = 0; jj < D1D; ++jj)
861 {
862 if (dist2_temp[jj] < fpt->dist2)
863 {
864 fpt->dist2 = dist2_temp[jj];
865 for (int d = 0; d < DIM; ++d)
866 {
867 fpt->r[d] = r_temp[d][jj];
868 }
869 }
870 }
871 }
872 MFEM_SYNC_THREAD;
873 } //seed done
874
875 MFEM_FOREACH_THREAD(j,x,1)
876 {
877 tmp->dist2 = HUGE_VAL;
878 tmp->dist2p = 0;
879 tmp->tr = 1;
880 tmp->flags = 0;
881 }
882 MFEM_FOREACH_THREAD(j,x,DIM)
883 {
884 tmp->x[j] = fpt->x[j];
885 tmp->r[j] = fpt->r[j];
886 }
887 MFEM_SYNC_THREAD;
888
889 for (int step = 0; step < 50; step++)
890 {
891 switch (num_constrained(tmp->flags & FLAG_MASK))
892 {
893 case 0: // findpt_area
894 {
895 double *wtr = r_workspace_ptr;
896 double *resid = wtr + 4 * D1D;
897 double *jac = resid + 2;
898 double *resid_temp = jac + 4;
899 double *jac_temp = resid_temp + 2 * D1D;
900
901 MFEM_FOREACH_THREAD(j,x,nThreads)
902 {
903 const int qp = j % D1D;
904 const int d = j / D1D;
905 lag_eval_first_der(wtr + 2*d*D1D, tmp->r[d], qp,
906 gll1D, lagcoeff, D1D);
907 }
908 MFEM_SYNC_THREAD;
909
910 MFEM_FOREACH_THREAD(j,x,nThreads)
911 {
912 const int qp = j % D1D;
913 const int d = j / D1D;
914 double *idx = jac_temp+2*d+4*qp;
915 resid_temp[d+qp*2] = tensor_ig2_j(idx, wtr,
916 wtr+D1D,
917 wtr+2*D1D,
918 wtr+3*D1D,
919 elx[d], qp,
920 D1D);
921 }
922 MFEM_SYNC_THREAD;
923
924 MFEM_FOREACH_THREAD(l,x,2)
925 {
926 resid[l] = tmp->x[l];
927 for (int j = 0; j < D1D; ++j)
928 {
929 resid[l] -= resid_temp[l + j * 2];
930 }
931 }
932 MFEM_FOREACH_THREAD(l,x,4)
933 {
934 jac[l] = 0;
935 for (int j = 0; j < D1D; ++j)
936 {
937 jac[l] += jac_temp[l + j * 4];
938 }
939 }
940 MFEM_SYNC_THREAD;
941
942 MFEM_FOREACH_THREAD(l,x,1)
943 {
944 if (!reject_prior_step_q(fpt, resid, tmp, tol))
945 {
946 newton_area(fpt, jac, resid, tmp, tol);
947 }
948 }
949 MFEM_SYNC_THREAD;
950 break;
951 } //case 0
952 case 1: // findpt_edge
953 {
954 const int ei = edge_index(tmp->flags & FLAG_MASK);
955 const int dn = ei>>1, de = plus_1_mod_2(dn);
956
957 double *wt = r_workspace_ptr;
958 double *resid = wt + 3 * D1D;
959 double *jac = resid + 2; //jac will be row-major
960 double *hess = jac + 2 * 2;
961 findptsElementGEdge_t edge;
962
963 MFEM_FOREACH_THREAD(j,x,D1D)
964 {
965 edge = get_edge(elx, wtend, ei,
966 constraint_workspace,
967 edge_init, j,
968 D1D);
969 }
970 MFEM_SYNC_THREAD;
971
972 // compute basis function info upto 2nd derivative.
973 MFEM_FOREACH_THREAD(j,x,D1D)
974 {
975 if (j == 0) { edge_init = 1u << ei; }
976 lag_eval_second_der(wt, tmp->r[de], j, gll1D,
977 lagcoeff, D1D);
978 }
979 MFEM_SYNC_THREAD;
980
981 MFEM_FOREACH_THREAD(d,x,DIM)
982 {
983 resid[d] = tmp->x[d];
984 jac[2*d] = 0.0;
985 jac[2*d + 1] = 0.0;
986 hess[d] = 0.0;
987 for (int k = 0; k < D1D; ++k)
988 {
989 resid[d] -= wt[k]*edge.x[d][k];
990 jac[2*d] += wt[k]*edge.dxdn[d][k];
991 jac[2*d+1] += wt[k+D1D]*edge.x[d][k];
992 hess[d] += wt[k+2*D1D]*edge.x[d][k];
993 }
994 }
995 MFEM_SYNC_THREAD;
996
997 // at this point, the Jacobian will be out of
998 // order for edge index 2 and 3 so we need to swap
999 // columns
1000 MFEM_FOREACH_THREAD(j,x,1)
1001 {
1002 if (ei >= 2)
1003 {
1004 double temp1 = jac[1],
1005 temp2 = jac[3];
1006 jac[1] = jac[0];
1007 jac[3] = jac[2];
1008 jac[0] = temp1;
1009 jac[2] = temp2;
1010 }
1011 hess[2] = resid[0]*hess[0] + resid[1]*hess[1];
1012 }
1013 MFEM_SYNC_THREAD;
1014
1015 MFEM_FOREACH_THREAD(l,x,1)
1016 {
1017 // check prior step //
1018 if (!reject_prior_step_q(fpt, resid, tmp, tol))
1019 {
1020 // steep is negative of the gradient of the
1021 // objective, so it tells direction of
1022 // decrease.
1023 double steep = resid[0] * jac[ dn]
1024 + resid[1] * jac[2+dn];
1025
1026 if (steep * tmp->r[dn] < 0)
1027 {
1028 newton_area(fpt, jac, resid, tmp, tol);
1029 }
1030 else
1031 {
1032 newton_edge(fpt, jac, hess[2], resid, de,
1033 dn, tmp->flags & FLAG_MASK,
1034 tmp, tol);
1035 }
1036 }
1037 }
1038 MFEM_SYNC_THREAD;
1039 break;
1040 }
1041 case 2: // findpts_pt
1042 {
1043 MFEM_FOREACH_THREAD(j,x,1)
1044 {
1045 int de = 0;
1046 int dn = 0;
1047 const int pi = point_index(tmp->flags & FLAG_MASK);
1048 const findptsElementGPT_t gpt =
1049 get_pt(elx, wtend, pi, D1D);
1050
1051 const double *const pt_x = gpt.x;
1052 const double *const jac = gpt.jac;
1053 const double *const hes = gpt.hes;
1054
1055 double resid[DIM], steep[DIM], sr[DIM];
1056 for (int d = 0; d < DIM; ++d)
1057 {
1058 resid[d] = fpt->x[d] - pt_x[d];
1059 }
1060 steep[0] = jac[0]*resid[0] + jac[2]*resid[1];
1061 steep[1] = jac[1]*resid[0] + jac[3]*resid[1];
1062
1063 sr[0] = steep[0]*tmp->r[0];
1064 sr[1] = steep[1]*tmp->r[1];
1065
1066 if (!reject_prior_step_q(fpt, resid, tmp, tol))
1067 {
1068 if (sr[0]<0)
1069 {
1070 if (sr[1]<0)
1071 {
1072 newton_area(fpt, jac, resid, tmp, tol);
1073 }
1074 else
1075 {
1076 de=0;
1077 dn=1;
1078 const double rh = resid[0]*hes[de]+
1079 resid[1]*hes[2+de];
1080 newton_edge(fpt, jac, rh, resid, de, dn,
1081 tmp->flags &
1082 FLAG_MASK &
1083 (3u<<(2*dn)),
1084 tmp, tol);
1085 }
1086 }
1087 else if (sr[1]<0)
1088 {
1089 de=1;
1090 dn=0;
1091 const double rh = resid[0]*hes[de]+
1092 resid[1]*hes[2+de];
1093 newton_edge(fpt, jac, rh, resid, de, dn,
1094 tmp->flags &
1095 FLAG_MASK &
1096 (3u<<(2*dn)),
1097 tmp, tol);
1098 }
1099 else
1100 {
1101 fpt->r[0] = tmp->r[0];
1102 fpt->r[1] = tmp->r[1];
1103 fpt->dist2p = 0;
1104 fpt->flags = tmp->flags | CONVERGED_FLAG;
1105 }
1106 }
1107 }
1108 MFEM_SYNC_THREAD;
1109 break;
1110 } //case 3
1111 } //switch
1112 if (fpt->flags & CONVERGED_FLAG)
1113 {
1114 break;
1115 }
1116 MFEM_SYNC_THREAD;
1117 MFEM_FOREACH_THREAD(j,x,1)
1118 {
1119 *tmp = *fpt;
1120 }
1121 MFEM_SYNC_THREAD;
1122 } //for int step < 50
1123 } //findpts_el
1124
1125 bool converged_internal = (fpt->flags&FLAG_MASK)==CONVERGED_FLAG;
1126 if (*code_i == CODE_NOT_FOUND || converged_internal ||
1127 fpt->dist2 < *dist2_i)
1128 {
1129 MFEM_FOREACH_THREAD(j,x,1)
1130 {
1131 *el_i = el;
1132 *code_i = converged_internal ? CODE_INTERNAL :
1133 CODE_BORDER;
1134 *dist2_i = fpt->dist2;
1135 }
1136 MFEM_FOREACH_THREAD(j,x,DIM)
1137 {
1138 r_i[j] = fpt->r[j];
1139 }
1140 MFEM_SYNC_THREAD;
1141 if (converged_internal)
1142 {
1143 break;
1144 }
1145 }
1146 } //findpts_local
1147 } //elp
1148 });
1149}
1150
1152 int point_pos_ordering,
1153 Array<unsigned int> &code,
1154 Array<unsigned int> &elem, Vector &ref,
1155 Vector &dist, int npt)
1156{
1157 if (npt == 0)
1158 {
1159 return;
1160 }
1161 auto pp = point_pos.Read();
1162 auto pgslm = gsl_mesh.Read();
1163 auto pwt = DEV.wtend.Read();
1164 auto pbb = DEV.bb.Read();
1165 auto plhm = DEV.loc_hash_min.Read();
1166 auto plhf = DEV.loc_hash_fac.Read();
1167 auto plho = DEV.loc_hash_offset.ReadWrite();
1168 auto pcode = code.Write();
1169 auto pelem = elem.Write();
1170 auto pref = ref.Write();
1171 auto pdist = dist.Write();
1172 auto pgll1d = DEV.gll1d.ReadWrite();
1173 auto plc = DEV.lagcoeff.Read();
1174
1175 switch (DEV.dof1d)
1176 {
1177 case 2:
1178 return FindPointsLocal2D_Kernel<2>(
1179 npt, DEV.newt_tol, pp, point_pos_ordering, pgslm, NE_split_total, pwt,
1180 pbb, DEV.h_nx, plhm, plhf, plho, pcode, pelem, pref, pdist,
1181 pgll1d, plc);
1182 case 3:
1183 return FindPointsLocal2D_Kernel<3>(
1184 npt, DEV.newt_tol, pp, point_pos_ordering, pgslm, NE_split_total, pwt,
1185 pbb, DEV.h_nx, plhm, plhf, plho, pcode, pelem, pref, pdist,
1186 pgll1d, plc);
1187 case 4:
1188 return FindPointsLocal2D_Kernel<4>(
1189 npt, DEV.newt_tol, pp, point_pos_ordering, pgslm, NE_split_total, pwt,
1190 pbb, DEV.h_nx, plhm, plhf, plho, pcode, pelem, pref, pdist,
1191 pgll1d, plc);
1192 case 5:
1193 return FindPointsLocal2D_Kernel<5>(
1194 npt, DEV.newt_tol, pp, point_pos_ordering, pgslm, NE_split_total, pwt,
1195 pbb, DEV.h_nx, plhm, plhf, plho, pcode, pelem, pref, pdist,
1196 pgll1d, plc);
1197 default:
1198 return FindPointsLocal2D_Kernel(npt, DEV.newt_tol, pp, point_pos_ordering,
1199 pgslm, NE_split_total, pwt, pbb, DEV.h_nx,
1200 plhm, plhf, plho, pcode, pelem,
1201 pref, pdist, pgll1d, plc, DEV.dof1d);
1202 }
1203}
1204#undef CODE_INTERNAL
1205#undef CODE_BORDER
1206#undef CODE_NOT_FOUND
1207#else
1208void FindPointsGSLIB::FindPointsLocal2(const Vector &point_pos,
1209 int point_pos_ordering,
1210 Array<unsigned int> &code,
1211 Array<unsigned int> &elem, Vector &ref,
1212 Vector &dist, int npt) {};
1213#endif
1214} // namespace mfem
1215
1216#endif //ifdef MFEM_USE_GSLIB
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:345
struct mfem::FindPointsGSLIB::@24 DEV
void FindPointsLocal2(const Vector &point_pos, int point_pos_ordering, Array< unsigned int > &gsl_code_dev_l, Array< unsigned int > &gsl_elem_dev_l, Vector &gsl_ref_l, Vector &gsl_dist_l, int npt)
Vector data type.
Definition vector.hpp:82
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:502
real_t b
Definition lissajous.cpp:42
constexpr int DIM
real_t u(const Vector &xvec)
Definition lor_mms.hpp:22
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:762
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
real_t p(const Vector &x, real_t t)