MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
tmop_pa_h3s.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12#include "../tmop.hpp"
13#include "tmop_pa.hpp"
17
18namespace mfem
19{
20
21using Args = kernels::InvariantsEvaluator3D::Buffers;
22
23// dP_302 = (dI2b*dI1b + dI1b*dI2b)/9 + (I1b/9)*ddI2b + (I2b/9)*ddI1b
24static MFEM_HOST_DEVICE inline
25void EvalH_302(const int e, const int qx, const int qy, const int qz,
26 const real_t weight, const real_t *J, DeviceTensor<8,real_t> dP,
27 real_t *B, real_t *dI1b, real_t *ddI1b,
28 real_t *dI2, real_t *dI2b, real_t *ddI2, real_t *ddI2b,
29 real_t *dI3b)
30{
31 constexpr int DIM = 3;
32 kernels::InvariantsEvaluator3D ie(Args()
33 .J(J).B(B)
34 .dI1b(dI1b).ddI1b(ddI1b)
35 .dI2(dI2).dI2b(dI2b).ddI2(ddI2).ddI2b(ddI2b)
36 .dI3b(dI3b));
37 const real_t c1 = weight/9.;
38 const real_t I1b = ie.Get_I1b();
39 const real_t I2b = ie.Get_I2b();
40 ConstDeviceMatrix di1b(ie.Get_dI1b(),DIM,DIM);
41 ConstDeviceMatrix di2b(ie.Get_dI2b(),DIM,DIM);
42 for (int i = 0; i < DIM; i++)
43 {
44 for (int j = 0; j < DIM; j++)
45 {
46 ConstDeviceMatrix ddi1b(ie.Get_ddI1b(i,j),DIM,DIM);
47 ConstDeviceMatrix ddi2b(ie.Get_ddI2b(i,j),DIM,DIM);
48 for (int r = 0; r < DIM; r++)
49 {
50 for (int c = 0; c < DIM; c++)
51 {
52 const real_t dp =
53 (di2b(r,c)*di1b(i,j) + di1b(r,c)*di2b(i,j))
54 + ddi2b(r,c)*I1b
55 + ddi1b(r,c)*I2b;
56 dP(r,c,i,j,qx,qy,qz,e) = c1 * dp;
57 }
58 }
59 }
60 }
61}
62
63// dP_303 = ddI1b/3
64static MFEM_HOST_DEVICE inline
65void EvalH_303(const int e, const int qx, const int qy, const int qz,
66 const real_t weight, const real_t *J, DeviceTensor<8,real_t> dP,
67 real_t *B, real_t *dI1b, real_t *ddI1, real_t *ddI1b,
68 real_t *dI2, real_t *dI2b, real_t *ddI2, real_t *ddI2b,
69 real_t *dI3b, real_t *ddI3b)
70{
71 constexpr int DIM = 3;
72 kernels::InvariantsEvaluator3D ie(Args()
73 .J(J).B(B)
74 .dI1b(dI1b).ddI1(ddI1).ddI1b(ddI1b)
75 .dI2(dI2).dI2b(dI2b).ddI2(ddI2).ddI2b(ddI2b)
76 .dI3b(dI3b).ddI3b(ddI3b));
77 const real_t c1 = weight/3.;
78 for (int i = 0; i < DIM; i++)
79 {
80 for (int j = 0; j < DIM; j++)
81 {
82 ConstDeviceMatrix ddi1b(ie.Get_ddI1b(i,j),DIM,DIM);
83 for (int r = 0; r < DIM; r++)
84 {
85 for (int c = 0; c < DIM; c++)
86 {
87 const real_t dp = ddi1b(r,c);
88 dP(r,c,i,j,qx,qy,qz,e) = c1 * dp;
89 }
90 }
91 }
92 }
93}
94
95// dP_315 = 2*(dI3b x dI3b) + 2*(I3b - 1)*ddI3b
96static MFEM_HOST_DEVICE inline
97void EvalH_315(const int e, const int qx, const int qy, const int qz,
98 const real_t weight, const real_t *J, DeviceTensor<8,real_t> dP,
99 real_t *dI3b, real_t *ddI3b)
100{
101 constexpr int DIM = 3;
102 kernels::InvariantsEvaluator3D ie(Args().
103 J(J).
104 dI3b(dI3b).ddI3b(ddI3b));
105
106 real_t sign_detJ;
107 const real_t I3b = ie.Get_I3b(sign_detJ);
108 ConstDeviceMatrix di3b(ie.Get_dI3b(sign_detJ),DIM,DIM);
109
110 for (int i = 0; i < DIM; i++)
111 {
112 for (int j = 0; j < DIM; j++)
113 {
114 ConstDeviceMatrix ddi3b(ie.Get_ddI3b(i,j),DIM,DIM);
115 for (int r = 0; r < DIM; r++)
116 {
117 for (int c = 0; c < DIM; c++)
118 {
119 const real_t dp = 2.0 * weight * (I3b - 1.0) * ddi3b(r,c) +
120 2.0 * weight * di3b(r,c) * di3b(i,j);
121 dP(r,c,i,j,qx,qy,qz,e) = dp;
122 }
123 }
124 }
125 }
126}
127
128// dP_318 = (I3b - 1/I3b^3)*ddI3b + (1 + 3/I3b^4)*(dI3b x dI3b)
129// Uses the I3b form, as dI3 and ddI3 were not implemented at the time.
130static MFEM_HOST_DEVICE inline
131void EvalH_318(const int e, const int qx, const int qy, const int qz,
132 const real_t weight, const real_t *J, DeviceTensor<8,real_t> dP,
133 real_t *dI3b, real_t *ddI3b)
134{
135 constexpr int DIM = 3;
136 kernels::InvariantsEvaluator3D ie(Args().
137 J(J).
138 dI3b(dI3b).ddI3b(ddI3b));
139
140 real_t sign_detJ;
141 const real_t I3b = ie.Get_I3b(sign_detJ);
142 ConstDeviceMatrix di3b(ie.Get_dI3b(sign_detJ),DIM,DIM);
143
144 for (int i = 0; i < DIM; i++)
145 {
146 for (int j = 0; j < DIM; j++)
147 {
148 ConstDeviceMatrix ddi3b(ie.Get_ddI3b(i,j),DIM,DIM);
149 for (int r = 0; r < DIM; r++)
150 {
151 for (int c = 0; c < DIM; c++)
152 {
153 const real_t dp =
154 weight * (I3b - 1.0/(I3b*I3b*I3b)) * ddi3b(r,c) +
155 weight * (1.0 + 3.0/(I3b*I3b*I3b*I3b)) * di3b(r,c)*di3b(i,j);
156 dP(r,c,i,j,qx,qy,qz,e) = dp;
157 }
158 }
159 }
160 }
161}
162
163// dP_321 = ddI1 + (-2/I3b^3)*(dI2 x dI3b + dI3b x dI2)
164// + (1/I3)*ddI2
165// + (6*I2/I3b^4)*(dI3b x dI3b)
166// + (-2*I2/I3b^3)*ddI3b
167static MFEM_HOST_DEVICE inline
168void EvalH_321(const int e, const int qx, const int qy, const int qz,
169 const real_t weight, const real_t *J, DeviceTensor<8,real_t> dP,
170 real_t *B, real_t *dI1b, real_t *ddI1, real_t *ddI1b,
171 real_t *dI2, real_t *dI2b, real_t *ddI2, real_t *ddI2b,
172 real_t *dI3b, real_t *ddI3b)
173{
174 constexpr int DIM = 3;
175 kernels::InvariantsEvaluator3D ie(Args()
176 .J(J).B(B)
177 .dI1b(dI1b).ddI1(ddI1).ddI1b(ddI1b)
178 .dI2(dI2).dI2b(dI2b).ddI2(ddI2).ddI2b(ddI2b)
179 .dI3b(dI3b).ddI3b(ddI3b));
180 real_t sign_detJ;
181 const real_t I2 = ie.Get_I2();
182 const real_t I3b = ie.Get_I3b(sign_detJ);
183
184 ConstDeviceMatrix di2(ie.Get_dI2(),DIM,DIM);
185 ConstDeviceMatrix di3b(ie.Get_dI3b(sign_detJ),DIM,DIM);
186
187 const real_t c0 = 1.0/I3b;
188 const real_t c1 = weight*c0*c0;
189 const real_t c2 = -2*c0*c1;
190 const real_t c3 = c2*I2;
191
192 for (int i = 0; i < DIM; i++)
193 {
194 for (int j = 0; j < DIM; j++)
195 {
196 ConstDeviceMatrix ddi1(ie.Get_ddI1(i,j),DIM,DIM);
197 ConstDeviceMatrix ddi2(ie.Get_ddI2(i,j),DIM,DIM);
198 ConstDeviceMatrix ddi3b(ie.Get_ddI3b(i,j),DIM,DIM);
199 for (int r = 0; r < DIM; r++)
200 {
201 for (int c = 0; c < DIM; c++)
202 {
203 const real_t dp =
204 weight * ddi1(r,c)
205 + c1 * ddi2(r,c)
206 + c3 * ddi3b(r,c)
207 + c2 * ((di2(r,c)*di3b(i,j) + di3b(r,c)*di2(i,j)))
208 -3*c0*c3 * di3b(r,c)*di3b(i,j);
209 dP(r,c,i,j,qx,qy,qz,e) = dp;
210 }
211 }
212 }
213 }
214}
215
216// H_332 = w0 H_302 + w1 H_315
217static MFEM_HOST_DEVICE inline
218void EvalH_332(const int e, const int qx, const int qy, const int qz,
219 const real_t weight, const real_t *w,
220 const real_t *J, DeviceTensor<8,real_t> dP,
221 real_t *B, real_t *dI1b, real_t *ddI1b,
222 real_t *dI2, real_t *dI2b, real_t *ddI2, real_t *ddI2b,
223 real_t *dI3b, real_t *ddI3b)
224{
225 constexpr int DIM = 3;
226 kernels::InvariantsEvaluator3D ie(Args()
227 .J(J).B(B)
228 .dI1b(dI1b).ddI1b(ddI1b)
229 .dI2(dI2).dI2b(dI2b).ddI2(ddI2).ddI2b(ddI2b)
230 .dI3b(dI3b).ddI3b(ddI3b));
231 real_t sign_detJ;
232 const real_t c1 = weight/9.;
233 const real_t I1b = ie.Get_I1b();
234 const real_t I2b = ie.Get_I2b();
235 const real_t I3b = ie.Get_I3b(sign_detJ);
236 ConstDeviceMatrix di1b(ie.Get_dI1b(),DIM,DIM);
237 ConstDeviceMatrix di2b(ie.Get_dI2b(),DIM,DIM);
238 ConstDeviceMatrix di3b(ie.Get_dI3b(sign_detJ),DIM,DIM);
239 for (int i = 0; i < DIM; i++)
240 {
241 for (int j = 0; j < DIM; j++)
242 {
243 ConstDeviceMatrix ddi1b(ie.Get_ddI1b(i,j),DIM,DIM);
244 ConstDeviceMatrix ddi2b(ie.Get_ddI2b(i,j),DIM,DIM);
245 ConstDeviceMatrix ddi3b(ie.Get_ddI3b(i,j),DIM,DIM);
246 for (int r = 0; r < DIM; r++)
247 {
248 for (int c = 0; c < DIM; c++)
249 {
250 const real_t dp_302 =
251 (di2b(r,c)*di1b(i,j) + di1b(r,c)*di2b(i,j))
252 + ddi2b(r,c)*I1b
253 + ddi1b(r,c)*I2b;
254 const real_t dp_315 = 2.0 * weight * (I3b - 1.0) * ddi3b(r,c) +
255 2.0 * weight * di3b(r,c) * di3b(i,j);
256 dP(r,c,i,j,qx,qy,qz,e) = w[0] * c1 * dp_302 +
257 w[1] * dp_315;
258 }
259 }
260 }
261 }
262}
263
264// H_338 = w0 H_302 + w1 H_318
265static MFEM_HOST_DEVICE inline
266void EvalH_338(const int e, const int qx, const int qy, const int qz,
267 const real_t weight, const real_t *w,
268 const real_t *J, DeviceTensor<8,real_t> dP,
269 real_t *B, real_t *dI1b, real_t *ddI1b,
270 real_t *dI2, real_t *dI2b, real_t *ddI2, real_t *ddI2b,
271 real_t *dI3b, real_t *ddI3b)
272{
273 constexpr int DIM = 3;
274 kernels::InvariantsEvaluator3D ie(Args()
275 .J(J).B(B)
276 .dI1b(dI1b).ddI1b(ddI1b)
277 .dI2(dI2).dI2b(dI2b).ddI2(ddI2).ddI2b(ddI2b)
278 .dI3b(dI3b).ddI3b(ddI3b));
279 real_t sign_detJ;
280 const real_t c1 = weight/9.;
281 const real_t I1b = ie.Get_I1b();
282 const real_t I2b = ie.Get_I2b();
283 const real_t I3b = ie.Get_I3b(sign_detJ);
284 ConstDeviceMatrix di1b(ie.Get_dI1b(),DIM,DIM);
285 ConstDeviceMatrix di2b(ie.Get_dI2b(),DIM,DIM);
286 ConstDeviceMatrix di3b(ie.Get_dI3b(sign_detJ),DIM,DIM);
287 for (int i = 0; i < DIM; i++)
288 {
289 for (int j = 0; j < DIM; j++)
290 {
291 ConstDeviceMatrix ddi1b(ie.Get_ddI1b(i,j),DIM,DIM);
292 ConstDeviceMatrix ddi2b(ie.Get_ddI2b(i,j),DIM,DIM);
293 ConstDeviceMatrix ddi3b(ie.Get_ddI3b(i,j),DIM,DIM);
294 for (int r = 0; r < DIM; r++)
295 {
296 for (int c = 0; c < DIM; c++)
297 {
298 const real_t dp_302 =
299 (di2b(r,c)*di1b(i,j) + di1b(r,c)*di2b(i,j))
300 + ddi2b(r,c)*I1b
301 + ddi1b(r,c)*I2b;
302 const real_t dp_318 =
303 weight * (I3b - 1.0/(I3b*I3b*I3b)) * ddi3b(r,c) +
304 weight * (1.0 + 3.0/(I3b*I3b*I3b*I3b)) * di3b(r,c)*di3b(i,j);
305 dP(r,c,i,j,qx,qy,qz,e) = w[0] * c1 * dp_302 +
306 w[1] * dp_318;
307 }
308 }
309 }
310 }
311}
312
313MFEM_REGISTER_TMOP_KERNELS(void, SetupGradPA_3D,
314 const real_t metric_normal,
315 const Vector &mc_,
316 const Array<real_t> &metric_param,
317 const int mid,
318 const Vector &x_,
319 const int NE,
320 const Array<real_t> &w_,
321 const Array<real_t> &b_,
322 const Array<real_t> &g_,
323 const DenseTensor &j_,
324 Vector &h_,
325 const int d1d,
326 const int q1d)
327{
328 MFEM_VERIFY(mid == 302 || mid == 303 || mid == 315 || mid == 318 ||
329 mid == 321 || mid == 332 || mid == 338,
330 "3D metric not yet implemented!");
331
332 const bool const_m0 = mc_.Size() == 1;
333
334 constexpr int DIM = 3;
335 const int D1D = T_D1D ? T_D1D : d1d;
336 const int Q1D = T_Q1D ? T_Q1D : q1d;
337
338 const auto MC = const_m0 ?
339 Reshape(mc_.Read(), 1, 1, 1, 1) :
340 Reshape(mc_.Read(), Q1D, Q1D, Q1D, NE);
341 const auto b = Reshape(b_.Read(), Q1D, D1D);
342 const auto g = Reshape(g_.Read(), Q1D, D1D);
343 const auto W = Reshape(w_.Read(), Q1D, Q1D, Q1D);
344 const auto J = Reshape(j_.Read(), DIM, DIM, Q1D, Q1D, Q1D, NE);
345 const auto X = Reshape(x_.Read(), D1D, D1D, D1D, DIM, NE);
346 auto H = Reshape(h_.Write(), DIM, DIM, DIM, DIM, Q1D, Q1D, Q1D, NE);
347
348 const real_t *metric_data = metric_param.Read();
349
350 mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e)
351 {
352 const int D1D = T_D1D ? T_D1D : d1d;
353 const int Q1D = T_Q1D ? T_Q1D : q1d;
354 constexpr int MQ1 = T_Q1D ? T_Q1D : T_MAX;
355 constexpr int MD1 = T_D1D ? T_D1D : T_MAX;
356
357 MFEM_SHARED real_t s_BG[2][MQ1*MD1];
358 MFEM_SHARED real_t s_DDD[3][MD1*MD1*MD1];
359 MFEM_SHARED real_t s_DDQ[9][MD1*MD1*MQ1];
360 MFEM_SHARED real_t s_DQQ[9][MD1*MQ1*MQ1];
361 MFEM_SHARED real_t s_QQQ[9][MQ1*MQ1*MQ1];
362
363 kernels::internal::LoadX<MD1>(e,D1D,X,s_DDD);
364 kernels::internal::LoadBG<MD1,MQ1>(D1D,Q1D,b,g,s_BG);
365
366 kernels::internal::GradX<MD1,MQ1>(D1D,Q1D,s_BG,s_DDD,s_DDQ);
367 kernels::internal::GradY<MD1,MQ1>(D1D,Q1D,s_BG,s_DDQ,s_DQQ);
368 kernels::internal::GradZ<MD1,MQ1>(D1D,Q1D,s_BG,s_DQQ,s_QQQ);
369
370 MFEM_FOREACH_THREAD(qz,z,Q1D)
371 {
372 MFEM_FOREACH_THREAD(qy,y,Q1D)
373 {
374 MFEM_FOREACH_THREAD(qx,x,Q1D)
375 {
376 const real_t *Jtr = &J(0,0,qx,qy,qz,e);
377 const real_t detJtr = kernels::Det<3>(Jtr);
378 const real_t m_coef = const_m0 ? MC(0,0,0,0) : MC(qx,qy,qz,e);
379 const real_t weight = metric_normal * m_coef *
380 W(qx,qy,qz) * detJtr;
381
382 // Jrt = Jtr^{-1}
383 real_t Jrt[9];
384 kernels::CalcInverse<3>(Jtr, Jrt);
385
386 // Jpr = X^T.DSh
387 real_t Jpr[9];
388 kernels::internal::PullGrad<MQ1>(Q1D,qx,qy,qz, s_QQQ, Jpr);
389
390 // Jpt = X^T . DS = (X^T.DSh) . Jrt = Jpr . Jrt
391 real_t Jpt[9];
392 kernels::Mult(3,3,3, Jpr, Jrt, Jpt);
393
394 // InvariantsEvaluator3D buffers used for the metrics
395 real_t B[9];
396 real_t dI1b[9], ddI1[9], ddI1b[9];
397 real_t dI2[9], dI2b[9], ddI2[9], ddI2b[9];
398 // reuse local arrays, to help register allocation
399 real_t *dI3b=Jrt, *ddI3b=Jpr;
400
401 // metric->AssembleH
402 if (mid == 302)
403 {
404 EvalH_302(e,qx,qy,qz,weight,Jpt,H,
405 B,dI1b,ddI1b,dI2,dI2b,ddI2,ddI2b,dI3b);
406 }
407 if (mid == 303)
408 {
409 EvalH_303(e,qx,qy,qz,weight,Jpt,H,
410 B,dI1b,ddI1,ddI1b,dI2,dI2b,ddI2,ddI2b,dI3b,ddI3b);
411 }
412 if (mid == 315)
413 {
414 EvalH_315(e,qx,qy,qz,weight,Jpt,H, dI3b,ddI3b);
415 }
416 if (mid == 318)
417 {
418 EvalH_318(e,qx,qy,qz,weight,Jpt,H, dI3b,ddI3b);
419 }
420 if (mid == 321)
421 {
422 EvalH_321(e,qx,qy,qz,weight,Jpt,H,
423 B,dI1b,ddI1,ddI1b,dI2,dI2b,ddI2,ddI2b,dI3b,ddI3b);
424 }
425 if (mid == 332)
426 {
427 EvalH_332(e,qx,qy,qz,weight,metric_data,Jpt,H,
428 B,dI1b,ddI1b,dI2,dI2b,ddI2,ddI2b,dI3b,ddI3b);
429 }
430 if (mid == 338)
431 {
432 EvalH_338(e,qx,qy,qz,weight,metric_data,Jpt,H,
433 B,dI1b,ddI1b,dI2,dI2b,ddI2,ddI2b,dI3b,ddI3b);
434 }
435 } // qx
436 } // qy
437 } // qz
438 });
439}
440
442{
443 const int N = PA.ne;
444 const int D1D = PA.maps->ndof;
445 const int Q1D = PA.maps->nqpt;
446 const int M = metric->Id();
447 const int id = (D1D << 4 ) | Q1D;
448 const real_t mn = metric_normal;
449 const Vector &MC = PA.MC;
450 const DenseTensor &J = PA.Jtr;
451 const Array<real_t> &W = PA.ir->GetWeights();
452 const Array<real_t> &B = PA.maps->B;
453 const Array<real_t> &G = PA.maps->G;
454 Vector &H = PA.H;
455
456 Array<real_t> mp;
457 if (auto m = dynamic_cast<TMOP_Combo_QualityMetric *>(metric))
458 {
459 m->GetWeights(mp);
460 }
461
462 MFEM_LAUNCH_TMOP_KERNEL(SetupGradPA_3D,id,mn,MC,mp,M,X,N,W,B,G,J,H);
463}
464
465} // namespace mfem
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:317
Rank 3 tensor (array of matrices)
const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read( GetMemory(), TotalSize(), on_dev).
TMOP_QualityMetric * metric
Definition tmop.hpp:1744
struct mfem::TMOP_Integrator::@23 PA
void AssembleGradPA_3D(const Vector &) const
virtual int Id() const
Return the metric ID.
Definition tmop.hpp:78
Vector data type.
Definition vector.hpp:80
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:474
int Size() const
Returns the size of the vector.
Definition vector.hpp:218
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:482
real_t b
Definition lissajous.cpp:42
constexpr int DIM
MFEM_HOST_DEVICE void CalcInverse(const T *data, T *inv_data)
Return the inverse of a matrix with given size and data into the matrix with data inv_data.
Definition kernels.hpp:246
MFEM_HOST_DEVICE void Mult(const int height, const int width, const TA *data, const TX *x, TY *y)
Matrix vector multiplication: y = A x, where the matrix A is of size height x width with given data,...
Definition kernels.hpp:163
MFEM_HOST_DEVICE T Det(const T *data)
Compute the determinant of a square matrix of size dim with given data.
Definition kernels.hpp:237
MFEM_REGISTER_TMOP_KERNELS(void, DatcSize, const int NE, const int ncomp, const int sizeidx, const real_t input_min_size, const DenseMatrix &w_, const Array< real_t > &b_, const Vector &x_, const Vector &nc_reduce, DenseTensor &j_, const int d1d, const int q1d)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:131
DeviceTensor< 2, const real_t > ConstDeviceMatrix
Definition dtensor.hpp:144
kernels::InvariantsEvaluator2D::Buffers Args
void forall_3D(int N, int X, int Y, int Z, lambda &&body)
Definition forall.hpp:775
float real_t
Definition config.hpp:43