MFEM v4.9.0
Finite element discretization library
Loading...
Searching...
No Matches
assemble.hpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11#pragma once
12
13#include "util.hpp"
14
15namespace mfem::future
16{
17
18/// @brief Assemble element matrix for three dimensional data.
19///
20/// Note: In the below layouts, total_trial_op_dim is > 1 if
21/// there are more than one inputs dependent on the derivative variable.
22///
23/// @param A Memory for one element matrix with layout
24/// [test_ndof, test_vdim, trial_ndof, trial_vdim].
25/// @param fhat Memory to hold the residual computation with layout
26/// [test_vdim, test_op_dim, nqp].
27/// @param qpdc The quadrature point data cache with data layout
28/// [test_vdim, test_op_dim, trial_vdim, total_trial_op_dim, nqp].
29/// @param itod Input Trial Operator Dimension array. If the trial
30/// operator is not dependent, the dimension is 0 to indicate that.
31/// @param inputs The input field operator types.
32/// @param output The output field operator types.
33/// @param input_dtqmaps The input DofToQuad maps.
34/// @param output_dtqmap The output DofToQuad maps.
35/// @param scratch_shmem Scratch shared memory for computations.
36/// @param q1d The number of quadrature points in one dimension.
37/// @param td1d The number of trial dofs in one dimension.
38template <typename input_fop_ts, size_t num_inputs, typename output_fop_t>
39MFEM_HOST_DEVICE void assemble_element_mat_t3d(
41 const DeviceTensor<3, real_t>& fhat,
44 const input_fop_ts& inputs,
45 const output_fop_t& output,
46 const std::array<DofToQuadMap, num_inputs>& input_dtqmaps,
47 const DofToQuadMap& output_dtqmap,
48 std::array<DeviceTensor<1>, 6>& scratch_shmem,
49 const int& q1d,
50 const int& td1d)
51{
52 constexpr int dimension = 3;
53
54 // [test_vdim, test_op_dim, trial_vdim, total_trial_op_dim, num_qp]
55 const int test_vdim = qpdc.GetShape()[0];
56 const int test_op_dim = qpdc.GetShape()[1];
57 const int trial_vdim = qpdc.GetShape()[2];
58
59 // [num_test_dof, ...]
60 const auto num_test_dof = A.GetShape()[0];
61
62 for (int Jx = 0; Jx < td1d; Jx++)
63 {
64 for (int Jy = 0; Jy < td1d; Jy++)
65 {
66 for (int Jz = 0; Jz < td1d; Jz++)
67 {
68 const int J = Jx + td1d * (Jy + td1d * Jz);
69
70 for (int j = 0; j < trial_vdim; j++)
71 {
72 for (int tv = 0; tv < test_vdim; tv++)
73 {
74 for (int tod = 0; tod < test_op_dim; tod++)
75 {
76 MFEM_FOREACH_THREAD(qx, x, q1d)
77 {
78 MFEM_FOREACH_THREAD(qy, y, q1d)
79 {
80 MFEM_FOREACH_THREAD(qz, z, q1d)
81 {
82 const int q = qx + q1d * (qy + q1d * qz);
83 fhat(tv, tod, q) = 0.0;
84 }
85 }
86 }
87 }
88 }
89
90 // MSVC lambda capture workaround
91 [[maybe_unused]] const auto& inputs_ref = inputs;
92
93 int m_offset = 0;
95 {
96 using fop_t = std::decay_t<decltype(get<s>(inputs_ref))>;
97
98 const int trial_op_dim = static_cast<int>(itod(static_cast<int>(s)));
99 if (trial_op_dim == 0)
100 {
101 // This is inside a lambda so we have to return
102 // instead of idiomatic 'continue'.
103 return;
104 }
105
106 auto& B = input_dtqmaps[s].B;
107 auto& G = input_dtqmaps[s].G;
108
109 if constexpr (is_value_fop<fop_t>::value)
110 {
111 MFEM_FOREACH_THREAD(qx, x, q1d)
112 {
113 MFEM_FOREACH_THREAD(qy, y, q1d)
114 {
115 MFEM_FOREACH_THREAD(qz, z, q1d)
116 {
117 const int q = qx + q1d * (qy + q1d * qz);
118
119 for (int m = 0; m < trial_op_dim; m++)
120 {
121 for (int i = 0; i < test_vdim; i++)
122 {
123 for (int k = 0; k < test_op_dim; k++)
124 {
125 const real_t f = qpdc(i, k, j, m + m_offset, q);
126 fhat(i, k, q) += f * B(qx, 0, Jx) * B(qy, 0, Jy) * B(qz, 0, Jz);
127 }
128 }
129 }
130 }
131 }
132 }
133 }
134 else if constexpr (is_gradient_fop<fop_t>::value)
135 {
136 MFEM_FOREACH_THREAD(qx, x, q1d)
137 {
138 MFEM_FOREACH_THREAD(qy, y, q1d)
139 {
140 MFEM_FOREACH_THREAD(qz, z, q1d)
141 {
142 const int q = qx + q1d * (qy + q1d * qz);
143 for (int m = 0; m < trial_op_dim; m++)
144 {
145 for (int i = 0; i < test_vdim; i++)
146 {
147 for (int k = 0; k < test_op_dim; k++)
148 {
149 const real_t f = qpdc(i, k, j, m + m_offset, q);
150 if (m == 0)
151 {
152 fhat(i, k, q) += f * G(qx, 0, Jx) * B(qy, 0, Jy) * B(qz, 0, Jz);
153 }
154 else if (m == 1)
155 {
156 fhat(i, k, q) += f * B(qx, 0, Jx) * G(qy, 0, Jy) * B(qz, 0, Jz);
157 }
158 else if (m == 2)
159 {
160 fhat(i, k, q) += f * B(qx, 0, Jx) * B(qy, 0, Jy) * G(qz, 0, Jz);
161 }
162 }
163 }
164 }
165 }
166 }
167 }
168 }
169 else
170 {
171#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
172 MFEM_ABORT("sum factorized sparse matrix assemble routine "
173 "not implemented for field operator");
174#endif
175 }
176 MFEM_SYNC_THREAD;
177 m_offset += trial_op_dim;
178 });
179
180 auto bvtfhat = Reshape(&A(0, 0, J, j), num_test_dof, test_vdim);
181 map_quadrature_data_to_fields(bvtfhat, fhat, output, output_dtqmap,
182 scratch_shmem, dimension, true);
183 }
184 }
185 }
186 }
187}
188
189/// @brief Assemble element matrix for two dimensional data.
190///
191/// Note: In the below layouts, total_trial_op_dim is > 1 if
192/// there are more than one inputs dependent on the derivative variable.
193///
194/// @param A Memory for one element matrix with layout
195/// [test_ndof, test_vdim, trial_ndof, trial_vdim].
196/// @param fhat Memory to hold the residual computation with layout
197/// [test_vdim, test_op_dim, nqp].
198/// @param qpdc The quadrature point data cache with data layout
199/// [test_vdim, test_op_dim, trial_vdim, total_trial_op_dim, nqp].
200/// @param itod Input Trial Operator Dimension array. If the trial
201/// operator is not dependent, the dimension is 0 to indicate that.
202/// @param inputs The input field operator types.
203/// @param output The output field operator types.
204/// @param input_dtqmaps The input DofToQuad maps.
205/// @param output_dtqmap The output DofToQuad maps.
206/// @param scratch_shmem Scratch shared memory for computations.
207/// @param q1d The number of quadrature points in one dimension.
208/// @param td1d The number of trial dofs in one dimension.
209template <typename input_fop_ts, size_t num_inputs, typename output_fop_t>
210MFEM_HOST_DEVICE void assemble_element_mat_t2d(
212 const DeviceTensor<3, real_t>& fhat,
215 const input_fop_ts& inputs,
216 const output_fop_t& output,
217 const std::array<DofToQuadMap, num_inputs>& input_dtqmaps,
218 const DofToQuadMap& output_dtqmap,
219 std::array<DeviceTensor<1>, 6>& scratch_shmem,
220 const int& q1d,
221 const int& td1d)
222{
223 constexpr int dimension = 2;
224
225 // [test_vdim, test_op_dim, trial_vdim, total_trial_op_dim, num_qp]
226 const int test_vdim = qpdc.GetShape()[0];
227 const int test_op_dim = qpdc.GetShape()[1];
228 const int trial_vdim = qpdc.GetShape()[2];
229
230 // [num_test_dof, ...]
231 const auto num_test_dof = A.GetShape()[0];
232
233 for (int Jx = 0; Jx < td1d; Jx++)
234 {
235 for (int Jy = 0; Jy < td1d; Jy++)
236 {
237 const int J = Jy + Jx * td1d;
238
239 for (int j = 0; j < trial_vdim; j++)
240 {
241 for (int tv = 0; tv < test_vdim; tv++)
242 {
243 for (int tod = 0; tod < test_op_dim; tod++)
244 {
245 MFEM_FOREACH_THREAD(qx, x, q1d)
246 {
247 MFEM_FOREACH_THREAD(qy, y, q1d)
248 {
249 const int q = qy + qx * q1d;
250 fhat(tv, tod, q) = 0.0;
251 }
252 }
253 }
254 }
255
256 // MSVC lambda capture workaround
257 [[maybe_unused]] const auto& inputs_ref = inputs;
258
259 int m_offset = 0;
260 for_constexpr<num_inputs>([&](auto s)
261 {
262 using fop_t = std::decay_t<decltype(get<s>(inputs_ref))>;
263
264 const int trial_op_dim = static_cast<int>(itod(static_cast<int>(s)));
265 if (trial_op_dim == 0)
266 {
267 // This is inside a lambda so we have to return
268 // instead of idiomatic 'continue'.
269 return;
270 }
271
272 auto& B = input_dtqmaps[s].B;
273 auto& G = input_dtqmaps[s].G;
274
275 if constexpr (is_value_fop<fop_t>::value)
276 {
277 MFEM_FOREACH_THREAD(qx, x, q1d)
278 {
279 MFEM_FOREACH_THREAD(qy, y, q1d)
280 {
281 const int q = qy + qx * q1d;
282 for (int m = 0; m < trial_op_dim; m++)
283 {
284 for (int i = 0; i < test_vdim; i++)
285 {
286 for (int k = 0; k < test_op_dim; k++)
287 {
288 const real_t f = qpdc(i, k, j, m + m_offset, q);
289 fhat(i, k, q) += f * B(qx, 0, Jx) * B(qy, 0, Jy);
290 }
291 }
292 }
293 }
294 }
295 }
296 else if constexpr (is_gradient_fop<fop_t>::value)
297 {
298 MFEM_FOREACH_THREAD(qx, x, q1d)
299 {
300 MFEM_FOREACH_THREAD(qy, y, q1d)
301 {
302 const int q = qy + qx * q1d;
303 for (int m = 0; m < trial_op_dim; m++)
304 {
305 for (int i = 0; i < test_vdim; i++)
306 {
307 for (int k = 0; k < test_op_dim; k++)
308 {
309 const real_t f = qpdc(i, k, j, m + m_offset, q);
310 if (m == 0)
311 {
312 fhat(i, k, q) += f * B(qx, 0, Jx) * G(qy, 0, Jy);
313 }
314 else
315 {
316 fhat(i, k, q) += f * G(qx, 0, Jx) * B(qy, 0, Jy);
317 }
318 }
319 }
320 }
321 }
322 }
323 }
324 else
325 {
326#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
327 MFEM_ABORT("sum factorized sparse matrix assemble routine "
328 "not implemented for field operator");
329#endif
330 }
331 MFEM_SYNC_THREAD;
332 m_offset += trial_op_dim;
333 });
334
335 auto bvtfhat = Reshape(&A(0, 0, J, j), num_test_dof, test_vdim);
336 map_quadrature_data_to_fields(bvtfhat, fhat, output, output_dtqmap,
337 scratch_shmem, dimension, true);
338 }
339 }
340 }
341}
342
343/// @brief Assemble element matrix for two or three dimensional data.
344///
345/// Note: In the below layouts, total_trial_op_dim is > 1 if
346/// there are more than one inputs dependent on the derivative variable.
347///
348/// @param A Memory for one element matrix with layout
349/// [test_ndof, test_vdim, trial_ndof, trial_vdim].
350/// @param fhat Memory to hold the residual computation with layout
351/// [test_vdim, test_op_dim, nqp].
352/// @param qpdc The quadrature point data cache with data layout
353/// [test_vdim, test_op_dim, trial_vdim, total_trial_op_dim, nqp].
354/// @param itod Input Trial Operator Dimension array. If the trial
355/// operator is not dependent, the dimension is 0 to indicate that.
356/// @param inputs The input field operator types.
357/// @param output The output field operator types.
358/// @param input_dtqmaps The input DofToQuad maps.
359/// @param output_dtqmap The output DofToQuad maps.
360/// @param scratch_shmem Scratch shared memory for computations.
361/// @param dimension The spatial dimension.
362/// @param q1d The number of quadrature points in one dimension.
363/// @param td1d The number of trial dofs in one dimension.
364/// @param use_sum_factorization Indicator if sum factorization is used.
365template <typename input_fop_ts, size_t num_inputs, typename output_fop_t>
366MFEM_HOST_DEVICE void assemble_element_mat_naive(
368 const DeviceTensor<3, real_t>& fhat,
371 const input_fop_ts& inputs,
372 const output_fop_t& output,
373 const std::array<DofToQuadMap, num_inputs>& input_dtqmaps,
374 const DofToQuadMap& output_dtqmap,
375 std::array<DeviceTensor<1>, 6>& scratch_shmem,
376 const int& dimension,
377 const int& q1d,
378 const int& td1d,
379 const bool& use_sum_factorization)
380{
381 if (use_sum_factorization)
382 {
383 if (dimension == 2)
384 {
385 assemble_element_mat_t2d(A, fhat, qpdc, itod, inputs, output,
386 input_dtqmaps, output_dtqmap, scratch_shmem, q1d, td1d);
387 }
388 else if (dimension == 3)
389 {
390 assemble_element_mat_t3d(A, fhat, qpdc, itod, inputs, output,
391 input_dtqmaps, output_dtqmap, scratch_shmem, q1d, td1d);
392 }
393 }
394 else
395 {
396#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
397 MFEM_ABORT("element matrix assemble not implemented for non tensor "
398 "product basis");
399#endif
400 }
401}
402
403} // namespace mfem::future
A basic generic Tensor class, appropriate for use on the GPU.
Definition dtensor.hpp:84
MFEM_HOST_DEVICE auto & GetShape() const
Returns the shape of the tensor.
Definition dtensor.hpp:131
constexpr int dimension
This example only works in 3D. Kernels for 2D are not implemented.
Definition hooke.cpp:45
MFEM_HOST_DEVICE void assemble_element_mat_t3d(const DeviceTensor< 4, real_t > &A, const DeviceTensor< 3, real_t > &fhat, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const input_fop_ts &inputs, const output_fop_t &output, const std::array< DofToQuadMap, num_inputs > &input_dtqmaps, const DofToQuadMap &output_dtqmap, std::array< DeviceTensor< 1 >, 6 > &scratch_shmem, const int &q1d, const int &td1d)
Assemble element matrix for three dimensional data.
Definition assemble.hpp:39
MFEM_HOST_DEVICE void assemble_element_mat_t2d(const DeviceTensor< 4, real_t > &A, const DeviceTensor< 3, real_t > &fhat, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const input_fop_ts &inputs, const output_fop_t &output, const std::array< DofToQuadMap, num_inputs > &input_dtqmaps, const DofToQuadMap &output_dtqmap, std::array< DeviceTensor< 1 >, 6 > &scratch_shmem, const int &q1d, const int &td1d)
Assemble element matrix for two dimensional data.
Definition assemble.hpp:210
MFEM_HOST_DEVICE void assemble_element_mat_naive(const DeviceTensor< 4, real_t > &A, const DeviceTensor< 3, real_t > &fhat, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const input_fop_ts &inputs, const output_fop_t &output, const std::array< DofToQuadMap, num_inputs > &input_dtqmaps, const DofToQuadMap &output_dtqmap, std::array< DeviceTensor< 1 >, 6 > &scratch_shmem, const int &dimension, const int &q1d, const int &td1d, const bool &use_sum_factorization)
Assemble element matrix for two or three dimensional data.
Definition assemble.hpp:366
constexpr void for_constexpr(lambda &&f, std::integer_sequence< std::size_t, i ... >)
Definition util.hpp:71
MFEM_HOST_DEVICE void map_quadrature_data_to_fields(DeviceTensor< 2, real_t > &y, const DeviceTensor< 3, real_t > &f, const output_t &output, const DofToQuadMap &dtq, std::array< DeviceTensor< 1 >, 6 > &scratch_mem, const int &dimension, const bool &use_sum_factorization)
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
Definition dtensor.hpp:138
float real_t
Definition config.hpp:46
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
DofToQuadMap struct.
Definition util.hpp:1518