44 const input_fop_ts& inputs,
45 const output_fop_t& output,
46 const std::array<DofToQuadMap, num_inputs>& input_dtqmaps,
55 const int test_vdim = qpdc.
GetShape()[0];
56 const int test_op_dim = qpdc.
GetShape()[1];
57 const int trial_vdim = qpdc.
GetShape()[2];
60 const auto num_test_dof = A.
GetShape()[0];
62 for (
int Jx = 0; Jx < td1d; Jx++)
64 for (
int Jy = 0; Jy < td1d; Jy++)
66 for (
int Jz = 0; Jz < td1d; Jz++)
68 const int J = Jx + td1d * (Jy + td1d * Jz);
70 for (
int j = 0; j < trial_vdim; j++)
72 for (
int tv = 0; tv < test_vdim; tv++)
74 for (
int tod = 0; tod < test_op_dim; tod++)
76 MFEM_FOREACH_THREAD(qx, x, q1d)
78 MFEM_FOREACH_THREAD(qy, y, q1d)
80 MFEM_FOREACH_THREAD(qz, z, q1d)
82 const int q = qx + q1d * (qy + q1d * qz);
83 fhat(tv, tod, q) = 0.0;
91 [[maybe_unused]]
const auto& inputs_ref = inputs;
96 using fop_t = std::decay_t<decltype(get<s>(inputs_ref))>;
98 const int trial_op_dim =
static_cast<int>(itod(
static_cast<int>(s)));
99 if (trial_op_dim == 0)
106 auto& B = input_dtqmaps[s].B;
107 auto& G = input_dtqmaps[s].G;
111 MFEM_FOREACH_THREAD(qx, x, q1d)
113 MFEM_FOREACH_THREAD(qy, y, q1d)
115 MFEM_FOREACH_THREAD(qz, z, q1d)
117 const int q = qx + q1d * (qy + q1d * qz);
119 for (
int m = 0; m < trial_op_dim; m++)
121 for (
int i = 0; i < test_vdim; i++)
123 for (
int k = 0; k < test_op_dim; k++)
125 const real_t f = qpdc(i, k, j, m + m_offset, q);
126 fhat(i, k, q) +=
f * B(qx, 0, Jx) * B(qy, 0, Jy) * B(qz, 0, Jz);
136 MFEM_FOREACH_THREAD(qx, x, q1d)
138 MFEM_FOREACH_THREAD(qy, y, q1d)
140 MFEM_FOREACH_THREAD(qz, z, q1d)
142 const int q = qx + q1d * (qy + q1d * qz);
143 for (
int m = 0; m < trial_op_dim; m++)
145 for (
int i = 0; i < test_vdim; i++)
147 for (
int k = 0; k < test_op_dim; k++)
149 const real_t f = qpdc(i, k, j, m + m_offset, q);
152 fhat(i, k, q) +=
f * G(qx, 0, Jx) * B(qy, 0, Jy) * B(qz, 0, Jz);
156 fhat(i, k, q) +=
f * B(qx, 0, Jx) * G(qy, 0, Jy) * B(qz, 0, Jz);
160 fhat(i, k, q) +=
f * B(qx, 0, Jx) * B(qy, 0, Jy) * G(qz, 0, Jz);
171#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
172 MFEM_ABORT(
"sum factorized sparse matrix assemble routine "
173 "not implemented for field operator");
177 m_offset += trial_op_dim;
180 auto bvtfhat =
Reshape(&A(0, 0, J, j), num_test_dof, test_vdim);
215 const input_fop_ts& inputs,
216 const output_fop_t& output,
217 const std::array<DofToQuadMap, num_inputs>& input_dtqmaps,
226 const int test_vdim = qpdc.
GetShape()[0];
227 const int test_op_dim = qpdc.
GetShape()[1];
228 const int trial_vdim = qpdc.
GetShape()[2];
231 const auto num_test_dof = A.
GetShape()[0];
233 for (
int Jx = 0; Jx < td1d; Jx++)
235 for (
int Jy = 0; Jy < td1d; Jy++)
237 const int J = Jy + Jx * td1d;
239 for (
int j = 0; j < trial_vdim; j++)
241 for (
int tv = 0; tv < test_vdim; tv++)
243 for (
int tod = 0; tod < test_op_dim; tod++)
245 MFEM_FOREACH_THREAD(qx, x, q1d)
247 MFEM_FOREACH_THREAD(qy, y, q1d)
249 const int q = qy + qx * q1d;
250 fhat(tv, tod, q) = 0.0;
257 [[maybe_unused]]
const auto& inputs_ref = inputs;
262 using fop_t = std::decay_t<decltype(get<s>(inputs_ref))>;
264 const int trial_op_dim =
static_cast<int>(itod(
static_cast<int>(s)));
265 if (trial_op_dim == 0)
272 auto& B = input_dtqmaps[s].B;
273 auto& G = input_dtqmaps[s].G;
277 MFEM_FOREACH_THREAD(qx, x, q1d)
279 MFEM_FOREACH_THREAD(qy, y, q1d)
281 const int q = qy + qx * q1d;
282 for (
int m = 0; m < trial_op_dim; m++)
284 for (
int i = 0; i < test_vdim; i++)
286 for (
int k = 0; k < test_op_dim; k++)
288 const real_t f = qpdc(i, k, j, m + m_offset, q);
289 fhat(i, k, q) +=
f * B(qx, 0, Jx) * B(qy, 0, Jy);
298 MFEM_FOREACH_THREAD(qx, x, q1d)
300 MFEM_FOREACH_THREAD(qy, y, q1d)
302 const int q = qy + qx * q1d;
303 for (
int m = 0; m < trial_op_dim; m++)
305 for (
int i = 0; i < test_vdim; i++)
307 for (
int k = 0; k < test_op_dim; k++)
309 const real_t f = qpdc(i, k, j, m + m_offset, q);
312 fhat(i, k, q) +=
f * B(qx, 0, Jx) * G(qy, 0, Jy);
316 fhat(i, k, q) +=
f * G(qx, 0, Jx) * B(qy, 0, Jy);
326#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
327 MFEM_ABORT(
"sum factorized sparse matrix assemble routine "
328 "not implemented for field operator");
332 m_offset += trial_op_dim;
335 auto bvtfhat =
Reshape(&A(0, 0, J, j), num_test_dof, test_vdim);
371 const input_fop_ts& inputs,
372 const output_fop_t& output,
373 const std::array<DofToQuadMap, num_inputs>& input_dtqmaps,
379 const bool& use_sum_factorization)
381 if (use_sum_factorization)
386 input_dtqmaps, output_dtqmap, scratch_shmem, q1d, td1d);
391 input_dtqmaps, output_dtqmap, scratch_shmem, q1d, td1d);
396#if !(defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP))
397 MFEM_ABORT(
"element matrix assemble not implemented for non tensor "
MFEM_HOST_DEVICE void assemble_element_mat_t3d(const DeviceTensor< 4, real_t > &A, const DeviceTensor< 3, real_t > &fhat, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const input_fop_ts &inputs, const output_fop_t &output, const std::array< DofToQuadMap, num_inputs > &input_dtqmaps, const DofToQuadMap &output_dtqmap, std::array< DeviceTensor< 1 >, 6 > &scratch_shmem, const int &q1d, const int &td1d)
Assemble element matrix for three dimensional data.
MFEM_HOST_DEVICE void assemble_element_mat_t2d(const DeviceTensor< 4, real_t > &A, const DeviceTensor< 3, real_t > &fhat, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const input_fop_ts &inputs, const output_fop_t &output, const std::array< DofToQuadMap, num_inputs > &input_dtqmaps, const DofToQuadMap &output_dtqmap, std::array< DeviceTensor< 1 >, 6 > &scratch_shmem, const int &q1d, const int &td1d)
Assemble element matrix for two dimensional data.
MFEM_HOST_DEVICE void assemble_element_mat_naive(const DeviceTensor< 4, real_t > &A, const DeviceTensor< 3, real_t > &fhat, const DeviceTensor< 5, const real_t > &qpdc, const DeviceTensor< 1, const real_t > &itod, const input_fop_ts &inputs, const output_fop_t &output, const std::array< DofToQuadMap, num_inputs > &input_dtqmaps, const DofToQuadMap &output_dtqmap, std::array< DeviceTensor< 1 >, 6 > &scratch_shmem, const int &dimension, const int &q1d, const int &td1d, const bool &use_sum_factorization)
Assemble element matrix for two or three dimensional data.
MFEM_HOST_DEVICE void map_quadrature_data_to_fields(DeviceTensor< 2, real_t > &y, const DeviceTensor< 3, real_t > &f, const output_t &output, const DofToQuadMap &dtq, std::array< DeviceTensor< 1 >, 6 > &scratch_mem, const int &dimension, const bool &use_sum_factorization)