MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
bilininteg_vecdiffusion_pa.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11#pragma once
12
18#include "../bilininteg.hpp"
19#include "../kernels.hpp"
20
21using mfem::kernels::internal::SetMaxOf;
22
23namespace mfem
24{
25
26/// \cond DO_NOT_DOCUMENT
27
28namespace internal
29{
30
31template<int T_SDIM = 0, int T_D1D = 0, int T_Q1D = 0>
32void SmemPAVectorDiffusionApply2D(const int NE,
33 const int coeff_vdim,
34 const Array<real_t> &b,
35 const Array<real_t> &g,
36 const Vector &d,
37 const Vector &x,
38 Vector &y,
39 const int sdim = 0,
40 const int d1d = 0,
41 const int q1d = 0)
42{
43 static constexpr int DIM = 2;
44 const int SDIM = T_SDIM ? T_SDIM : sdim;
45 const int D1D = T_D1D ? T_D1D : d1d;
46 const int Q1D = T_Q1D ? T_Q1D : q1d;
47
48 const int PA_SIZE = DIM*DIM;
49 const bool matrix_coeff = coeff_vdim == DIM*DIM;
50
51 const auto B = b.Read(), G = g.Read();
52 const auto DE = Reshape(d.Read(), Q1D, Q1D, PA_SIZE,
53 SDIM * (matrix_coeff ? SDIM : 1), NE);
54 const auto XE = Reshape(x.Read(), D1D, D1D, SDIM, NE);
55 auto YE = Reshape(y.ReadWrite(), D1D, D1D, SDIM, NE);
56
57 mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE(int e)
58 {
59 constexpr int MD1 = T_D1D > 0 ? SetMaxOf(T_D1D) : DofQuadLimits::MAX_T1D;
60 constexpr int MQ1 = T_Q1D > 0 ? SetMaxOf(T_Q1D) : DofQuadLimits::MAX_T1D;
61
62 MFEM_SHARED real_t sB[MD1][MQ1], sG[MD1][MQ1], smem[MQ1][MQ1];
63 kernels::internal::vd_regs2d_t<3, DIM, MQ1> r0, r1;
64 kernels::internal::LoadMatrix(D1D, Q1D, B, sB);
65 kernels::internal::LoadMatrix(D1D, Q1D, G, sG);
66
67 for (int i = 0; i < SDIM; i++)
68 {
69 for (int j = 0; j < (matrix_coeff ? SDIM : 1); j++)
70 {
71 kernels::internal::LoadDofs2d(e, D1D, i, XE, r0);
72 kernels::internal::Grad2d(D1D, Q1D, smem, sB, sG, r0, r1, i);
73 MFEM_FOREACH_THREAD_DIRECT(qy, y, Q1D)
74 {
75 MFEM_FOREACH_THREAD_DIRECT(qx, x, Q1D)
76 {
77 const real_t gradX = r1[i][0][qy][qx];
78 const real_t gradY = r1[i][1][qy][qx];
79 const int k = matrix_coeff ? (j + i * SDIM) : i;
80 const real_t O11 = DE(qx,qy,0,k,e), O12 = DE(qx,qy,1,k,e);
81 const real_t O21 = DE(qx,qy,2,k,e), O22 = DE(qx,qy,3,k,e);
82 r0[i][0][qy][qx] = (O11 * gradX) + (O12 * gradY);
83 r0[i][1][qy][qx] = (O21 * gradX) + (O22 * gradY);
84 } // qx
85 } // qy
86 MFEM_SYNC_THREAD;
87 kernels::internal::GradTranspose2d(D1D, Q1D, smem, sB, sG, r0, r1, i);
88 const int ij = matrix_coeff ? j : i;
89 kernels::internal::WriteDofs2d(e, D1D, i, ij, r1, YE);
90 } // j
91 } // i
92 });
93}
94
95template<int T_SDIM = 0, int T_D1D = 0, int T_Q1D = 0>
96void SmemPAVectorDiffusionApply3D(const int NE,
97 const int coeff_vdim,
98 const Array<real_t> &b,
99 const Array<real_t> &g,
100 const Vector &d,
101 const Vector &x,
102 Vector &y,
103 const int sdim = 0,
104 const int d1d = 0,
105 const int q1d = 0)
106{
107
108 static constexpr int DIM = 3;
109 const int SDIM = T_SDIM ? T_SDIM : sdim;
110 MFEM_VERIFY(SDIM == 3, "SDIM must be 3");
111 const int D1D = T_D1D ? T_D1D : d1d;
112 const int Q1D = T_Q1D ? T_Q1D : q1d;
113
114 const int PA_SIZE = DIM*DIM;
115 const bool matrix_coeff = coeff_vdim == DIM*DIM;
116
117 const auto B = b.Read(), G = g.Read();
118 const auto DE = Reshape(d.Read(), Q1D, Q1D, Q1D, PA_SIZE,
119 SDIM * (matrix_coeff ? SDIM : 1), NE);
120 const auto XE = Reshape(x.Read(), D1D, D1D, D1D, SDIM, NE);
121 auto YE = Reshape(y.ReadWrite(), D1D, D1D, D1D, SDIM, NE);
122
123 mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE(int e)
124 {
125 constexpr int MD1 = T_D1D > 0 ? SetMaxOf(T_D1D) : DofQuadLimits::MAX_T1D;
126 constexpr int MQ1 = T_Q1D > 0 ? SetMaxOf(T_Q1D) : DofQuadLimits::MAX_T1D;
127
128 MFEM_SHARED real_t sB[MD1][MQ1], sG[MD1][MQ1], smem[MQ1][MQ1];
129 kernels::internal::vd_regs3d_t<3, DIM, MQ1> r0, r1;
130 kernels::internal::LoadMatrix(D1D, Q1D, B, sB);
131 kernels::internal::LoadMatrix(D1D, Q1D, G, sG);
132
133 for (int i = 0; i < SDIM; i++)
134 {
135 for (int j = 0; j < (matrix_coeff ? SDIM : 1); j++)
136 {
137 kernels::internal::LoadDofs3d(e, D1D, i, XE, r0);
138 kernels::internal::Grad3d(D1D, Q1D, smem, sB, sG, r0, r1, i);
139 for (int qz = 0; qz < Q1D; qz++)
140 {
141 MFEM_FOREACH_THREAD_DIRECT(qy, y, Q1D)
142 {
143 MFEM_FOREACH_THREAD_DIRECT(qx, x, Q1D)
144 {
145 const real_t gradX = r1[i][0][qz][qy][qx];
146 const real_t gradY = r1[i][1][qz][qy][qx];
147 const real_t gradZ = r1[i][2][qz][qy][qx];
148 const int k = matrix_coeff ? (j + i * SDIM) : i;
149 const real_t O11 = DE(qx,qy,qz,0,k,e), O12 = DE(qx,qy,qz,1,k,e),
150 O13 = DE(qx,qy,qz,2,k,e);
151 const real_t O22 = DE(qx,qy,qz,3,k,e), O23 = DE(qx,qy,qz,4,k,e);
152 const real_t O33 = DE(qx,qy,qz,5,k,e);
153 r0[i][0][qz][qy][qx] = (O11*gradX)+(O12*gradY)+(O13*gradZ);
154 r0[i][1][qz][qy][qx] = (O12*gradX)+(O22*gradY)+(O23*gradZ);
155 r0[i][2][qz][qy][qx] = (O13*gradX)+(O23*gradY)+(O33*gradZ);
156 } // qx
157 } // qy
158 } // qz
159 MFEM_SYNC_THREAD;
160 kernels::internal::GradTranspose3d(D1D, Q1D, smem, sB, sG, r0, r1, i);
161 const int ij = matrix_coeff ? j : i;
162 kernels::internal::WriteDofs3d(e, D1D, i, ij, r1, YE);
163 } // j
164 } // i
165 });
166}
167
168} // namespace internal
169
170template<int DIM, int T_SDIM, int T_D1D, int T_Q1D>
172VectorDiffusionIntegrator::ApplyPAKernels::Kernel()
173{
174 if (DIM == 2)
175 {
176 return internal::SmemPAVectorDiffusionApply2D<T_SDIM, T_D1D, T_Q1D>;
177 }
178 else if (DIM == 3)
179 {
180 return internal::SmemPAVectorDiffusionApply3D<T_SDIM, T_D1D, T_Q1D>;
181 }
182 else { MFEM_ABORT("Unsupported kernel"); }
183}
184
186VectorDiffusionIntegrator::ApplyPAKernels::Fallback(int dim, int sdim,
187 int d1d, int q1d)
188{
189 if (dim == 2)
190 {
191 return internal::SmemPAVectorDiffusionApply2D;
192 }
193 else if (dim == 3)
194 {
195 return internal::SmemPAVectorDiffusionApply3D;
196 }
197 else { MFEM_ABORT("Unsupported kernel"); }
198}
199
200/// \endcond DO_NOT_DOCUMENT
201
202} // namespace mfem
void(*)(const int, const int, const Array< real_t > &, const Array< real_t > &, const Vector &, const Vector &, Vector &, const int, const int, const int) ApplyKernelType
arguments: ne, coeff_vdim, B, G, pa_data, x, y, d1d, q1d, vdim
int dim
Definition ex24.cpp:53
real_t b
Definition lissajous.cpp:42
constexpr int SDIM
constexpr int DIM
mfem::real_t real_t
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
internal::DofQuadLimits_CUDA DofQuadLimits
Maximum number of 1D DOFs or quadrature points for the architecture currently being compiled for (use...
Definition forall.hpp:100
void forall_2D(int N, int X, int Y, lambda &&body)
Definition forall.hpp:925