573 derivative_ids_t derivative_ids)
575 if constexpr (!(std::is_same_v<entity_t, Entity::Element> ||
576 std::is_same_v<entity_t, Entity::BoundaryElement>))
579 "entity type not supported in AddIntegrator");
582 static constexpr size_t num_inputs =
585 static constexpr size_t num_outputs =
590 using qf_param_ts =
typename qf_signature::parameter_ts;
591 using qf_output_t =
typename qf_signature::return_t;
594 if constexpr (num_outputs > 1)
597 "more than one output per quadrature functions is not supported right now");
600 if constexpr (std::is_same_v<qf_output_t, void>)
603 "quadrature function has no return value");
607 static_assert(num_qfinputs == num_inputs,
608 "quadrature function inputs and descriptor inputs have to match");
611 static_assert(num_qf_outputs == num_outputs,
612 "quadrature function outputs and descriptor outputs have to match");
614 constexpr auto inout_tuple =
616 constexpr auto filtered_inout_tuple =
filter_fields(inout_tuple);
617 static constexpr size_t num_fields =
620 MFEM_ASSERT(num_fields == solutions.size() + parameters.size(),
621 "Total number of fields doesn't match sum of solutions and parameters."
622 " This indicates that some fields are not used in the integrator,"
623 " which currently is not supported.");
629 auto input_to_field =
631 auto output_to_field =
635 std::vector<int> inputs_vdim(num_inputs);
638 inputs_vdim[i] =
get<i>(inputs).vdim;
642 if constexpr (std::is_same_v<entity_t, Entity::Element>)
646 else if constexpr (std::is_same_v<entity_t, Entity::BoundaryElement>)
651 const auto output_fop =
get<0>(outputs);
652 test_space_field_idx =
FindIdx(output_fop.GetFieldId(), fields);
654 bool use_sum_factorization =
false;
656 if constexpr (std::is_same_v<entity_t, Entity::Element>)
658 entity_element_type =
663 use_tensor_product_structure ==
true)
665 use_sum_factorization =
true;
668 else if constexpr (std::is_same_v<entity_t, Entity::BoundaryElement>)
670 entity_element_type =
675 use_tensor_product_structure ==
true)
677 use_sum_factorization =
true;
683 if (use_sum_factorization)
691 (fields[test_space_field_idx],
692 element_dof_ordering, output_fop);
693 auto &output_e_size = output_e_sz;
695 output_restriction_transpose = output_rt;
696 residual_e.
SetSize(output_e_size);
700 restriction_callback =
701 [=, solutions = this->solutions, parameters = this->parameters]
702 (std::vector<Vector> &sol,
703 const std::vector<Vector> &par,
704 std::vector<Vector> &
f)
707 element_dof_ordering);
709 element_dof_ordering,
714 fields[test_space_field_idx], output_fop, mesh.
GetComm());
717 if constexpr (std::is_same_v<entity_t, Entity::Element>)
721 else if constexpr (std::is_same_v<entity_t, Entity::BoundaryElement>)
728 const int num_qp = integration_rule.
GetNPoints();
730 if constexpr (
is_sum_fop<
decltype(output_fop)>::value)
737 const int residual_lsize =
GetVSize(fields[test_space_field_idx]);
738 residual_l.
SetSize(residual_lsize);
745 std::vector<const DofToQuad*> dtq;
746 for (
const auto &field : fields)
753 const int q1d = (int)floor(std::pow(num_qp, 1.0/
dimension) + 0.5);
755 const int residual_size_on_qp =
757 fields[test_space_field_idx]);
762 const int test_vdim = output_fop.vdim;
763 const int test_op_dim = output_fop.size_on_qp / output_fop.vdim;
764 const int num_test_dof =
765 num_entities ? (output_e_size / output_fop.vdim / num_entities) : 0;
769 auto input_size_on_qp =
772 auto action_shmem_info =
774 (input_dtq_maps, output_dtq_maps, fields, num_entities, inputs, num_qp,
775 input_size_on_qp, residual_size_on_qp, element_dof_ordering);
777 Vector shmem_cache(action_shmem_info.total_size);
784 if (use_sum_factorization)
786 thread_blocks.
x = q1d;
787 thread_blocks.
y = q1d;
788 thread_blocks.
z = q1d;
793 if (use_sum_factorization)
795 thread_blocks.
x = q1d;
796 thread_blocks.
y = q1d;
802 thread_blocks.
x = q1d;
807 action_callbacks.push_back(
823 use_sum_factorization,
837 &restriction_cb = this->restriction_callback,
838 &fields_e = this->fields_e,
839 &residual_e = this->residual_e,
840 &output_restriction_transpose = this->output_restriction_transpose
842 (std::vector<Vector> &sol,
const std::vector<Vector> &par,
Vector &res)
845 restriction_cb(sol, par, fields_e);
848 auto ye =
Reshape(residual_e.
ReadWrite(), test_vdim, num_test_dof, num_entities);
851 action_shmem_info.field_sizes,
854 const bool has_attr = attributes.
Size() > 0;
855 const auto d_attr = attributes.
Read();
856 const auto d_elem_attr = elem_attributes->Read();
858 forall([=] MFEM_HOST_DEVICE (
int e,
void *shmem)
860 if (has_attr && !d_attr[d_elem_attr[e] - 1]) {
return; }
862 auto [input_dtq_shmem, output_dtq_shmem, fields_shmem, input_shmem,
863 residual_shmem, scratch_shmem] =
864 unpack_shmem(shmem, action_shmem_info, input_dtq_maps, output_dtq_maps,
865 wrapped_fields_e, num_qp, e);
868 input_shmem, fields_shmem, input_dtq_shmem, input_to_field, inputs, ir_weights,
869 scratch_shmem,
dimension, use_sum_factorization);
872 qfunc, input_shmem, residual_shmem,
873 residual_size_on_qp, num_qp, q1d,
dimension, use_sum_factorization);
875 auto fhat =
Reshape(&residual_shmem(0, 0), test_vdim, test_op_dim, num_qp);
876 auto y =
Reshape(&ye(0, 0, e), num_test_dof, test_vdim);
878 y, fhat, output_fop, output_dtq_shmem[0],
879 scratch_shmem,
dimension, use_sum_factorization);
880 }, num_entities, thread_blocks, action_shmem_info.total_size, shmem_cache.
ReadWrite());
881 output_restriction_transpose(residual_e, res);
886 if constexpr (derivative_ids_t::size() != 0)
890 this->output_restriction_transpose](
const std::size_t derivative_id)
892 const size_t d_field_idx =
FindIdx(derivative_id, fields);
893 const auto direction = fields[d_field_idx];
894 const int da_size_on_qp =
899 input_dtq_maps, output_dtq_maps, fields, num_entities, inputs,
900 num_qp, input_size_on_qp, residual_size_on_qp,
901 element_dof_ordering, d_field_idx);
903 Vector shmem_cache(shmem_info.total_size);
908 element_dof_ordering)->
Height());
909 Vector derivative_action_e(output_e_size);
910 derivative_action_e = 0.0;
913 auto it = dependency_map.find(derivative_id);
914 if (it == dependency_map.end())
916 MFEM_ABORT(
"Derivative ID not found in dependency map");
918 const auto input_is_dependent = it->second;
923 Vector inputs_trial_op_dim(num_inputs);
924 int total_trial_op_dim = 0;
930 if (!input_is_dependent[s])
937 itod(idx) = input_size_on_qp[s] /
get<s>(inputs).vdim;
939 total_trial_op_dim +=
static_cast<int>(itod(idx));
945 const size_t d_input_idx = [d_field_idx, &input_to_field]
947 for (
size_t i = 0; i < input_to_field.size(); i++)
949 if (input_to_field[i] == d_field_idx)
954 return size_t(SIZE_MAX);
957 const int trial_vdim =
GetVDim(fields[d_field_idx]);
958 const int num_trial_dof =
960 inputs_vdim[d_input_idx] / num_entities;
961 const int num_trial_dof_1d =
964 Vector Ae_mem(num_test_dof * test_vdim * num_trial_dof * trial_vdim *
971 derivative_qp_caches[derivative_id] =
Vector(test_vdim * test_op_dim *
973 total_trial_op_dim * num_qp * num_entities);
975 auto& fields_ref = this->fields;
976 auto& derivative_qp_caches_ref = this->derivative_qp_caches[derivative_id];
982 derivative_setup_callbacks[derivative_id].push_back(
994 use_sum_factorization,
1005 element_dof_ordering,
1013 inputs_trial_op_dim,
1016 &qpdc_mem = derivative_qp_caches_ref
1017 ](std::vector<Vector> &f_e,
const Vector &dir_l)
mutable
1020 element_dof_ordering);
1021 auto wrapped_fields_e =
wrap_fields(f_e, shmem_info.field_sizes,
1024 shmem_info.direction_size,
1027 auto qpdc =
Reshape(qpdc_mem.ReadWrite(), test_vdim, test_op_dim,
1028 trial_vdim, total_trial_op_dim, num_qp, num_entities);
1030 auto itod =
Reshape(inputs_trial_op_dim.
Read(), num_inputs);
1032 const auto d_elem_attr = elem_attributes->Read();
1033 const bool has_attr = attributes.
Size() > 0;
1034 const auto d_domain_attr = attributes.
Read();
1038 if (has_attr && !d_domain_attr[d_elem_attr[e] - 1]) {
return; }
1040 auto [input_dtq_shmem, output_dtq_shmem, fields_shmem,
1041 direction_shmem, input_shmem,
1042 shadow_shmem_, residual_shmem,
1044 unpack_shmem(shmem, shmem_info, input_dtq_maps, output_dtq_maps,
1045 wrapped_fields_e, wrapped_direction_e, num_qp, e);
1046 auto &shadow_shmem = shadow_shmem_;
1049 input_shmem, fields_shmem, input_dtq_shmem, input_to_field,
1050 inputs, ir_weights, scratch_shmem,
dimension,
1051 use_sum_factorization);
1055 auto qpdc_e =
Reshape(&qpdc(0, 0, 0, 0, 0, e), test_vdim, test_op_dim,
1056 trial_vdim, total_trial_op_dim, num_qp);
1058 qfunc, input_shmem, shadow_shmem, residual_shmem, qpdc_e, itod, da_size_on_qp,
1060 }, num_entities, thread_blocks, shmem_info.total_size,
1066 derivative_action_callbacks[derivative_id].push_back(
1079 use_sum_factorization,
1093 derivative_action_e,
1094 element_dof_ordering,
1095 inputs_trial_op_dim,
1099 &qpdc_mem = derivative_qp_caches_ref,
1102 std::vector<Vector> &f_e,
const Vector &dir_l,
1103 Vector &der_action_l)
mutable
1106 element_dof_ordering);
1108 test_vdim, num_entities);
1109 auto wrapped_fields_e =
wrap_fields(f_e, shmem_info.field_sizes,
1112 shmem_info.direction_size,
1115 auto qpdc =
Reshape(qpdc_mem.Read(), test_vdim, test_op_dim,
1116 trial_vdim, total_trial_op_dim, num_qp, num_entities);
1118 auto itod =
Reshape(inputs_trial_op_dim.
Read(), num_inputs);
1120 const bool has_attr = attributes.
Size() > 0;
1121 const auto d_attr = attributes.
Read();
1122 const auto d_elem_attr = elem_attributes->Read();
1124 derivative_action_e = 0.0;
1127 if (has_attr && !d_attr[d_elem_attr[e] - 1]) {
return; }
1129 auto [input_dtq_shmem, output_dtq_shmem, fields_shmem,
1130 direction_shmem, input_shmem,
1131 shadow_shmem_, residual_shmem,
1133 unpack_shmem(shmem, shmem_info, input_dtq_maps, output_dtq_maps,
1134 wrapped_fields_e, wrapped_direction_e, num_qp, e);
1135 auto &shadow_shmem = shadow_shmem_;
1138 shadow_shmem, direction_shmem, input_dtq_shmem, inputs,
1139 ir_weights, scratch_shmem, input_is_dependent,
dimension,
1140 use_sum_factorization);
1142 auto fhat =
Reshape(&residual_shmem(0, 0), test_vdim,
1143 test_op_dim, num_qp);
1145 auto qpdce =
Reshape(&qpdc(0, 0, 0, 0, 0, e), test_vdim, test_op_dim,
1146 trial_vdim, total_trial_op_dim, num_qp);
1149 use_sum_factorization);
1151 auto y =
Reshape(&ye(0, 0, e), num_test_dof, test_vdim);
1153 y, fhat, output_fop, output_dtq_shmem[0],
1154 scratch_shmem,
dimension, use_sum_factorization);
1155 }, num_entities, thread_blocks, shmem_info.total_size,
1157 or_transpose(derivative_action_e, der_action_l);
1160 assemble_derivative_sparsematrix_callbacks[derivative_id].push_back(
1172 use_sum_factorization,
1190 inputs_trial_op_dim,
1195 &qpdc_mem = derivative_qp_caches_ref,
1196 &fields = fields_ref
1199 auto wrapped_fields_e =
wrap_fields(f_e, shmem_info.field_sizes,
1202 shmem_info.direction_size,
1205 auto qpdc =
Reshape(qpdc_mem.Read(), test_vdim, test_op_dim,
1206 trial_vdim, total_trial_op_dim, num_qp, num_entities);
1208 auto itod =
Reshape(inputs_trial_op_dim.
Read(), num_inputs);
1210 auto Ae =
Reshape(Ae_mem.
ReadWrite(), num_test_dof, test_vdim, num_trial_dof,
1211 trial_vdim, num_entities);
1213 const auto d_elem_attr = elem_attributes->Read();
1214 const bool has_attr = attributes.
Size() > 0;
1215 const auto d_domain_attr = attributes.
Read();
1219 if (has_attr && !d_domain_attr[d_elem_attr[e] - 1]) {
return; }
1221 auto [input_dtq_shmem, output_dtq_shmem, fields_shmem,
1222 direction_shmem, input_shmem,
1223 shadow_shmem_, residual_shmem,
1225 unpack_shmem(shmem, shmem_info, input_dtq_maps, output_dtq_maps,
1226 wrapped_fields_e, wrapped_direction_e, num_qp, e);
1228 auto fhat =
Reshape(&residual_shmem(0, 0), test_vdim, test_op_dim, num_qp);
1229 auto Aee =
Reshape(&Ae(0, 0, 0, 0, e), num_test_dof, test_vdim, num_trial_dof,
1231 auto qpdce =
Reshape(&qpdc(0, 0, 0, 0, 0, e), test_vdim, test_op_dim,
1232 trial_vdim, total_trial_op_dim, num_qp);
1234 input_dtq_shmem, output_dtq_shmem[0], scratch_shmem,
dimension, q1d,
1235 num_trial_dof_1d, use_sum_factorization);
1236 }, num_entities, thread_blocks, shmem_info.total_size,
1240 for (
size_t s = 0; s < num_inputs; s++)
1242 if (input_is_dependent[s])
1244 trial_field = &fields[input_to_field[s]];
1248 auto trial_fes = *std::get_if<const ParFiniteElementSpace *>
1249 (&trial_field->data);
1250 auto test_fes = *std::get_if<const ParFiniteElementSpace *>
1251 (&fields[output_to_field[0]].data);
1253 A =
new SparseMatrix(test_fes->GetVSize(), trial_fes->GetVSize());
1256 num_trial_dof * trial_vdim, num_entities);
1257 for (
int e = 0; e < num_entities; e++)
1259 DenseMatrix Aee(&tmp(0, 0, e), num_test_dof * test_vdim,
1260 num_trial_dof * trial_vdim);
1263 test_fes->GetElementVDofs(e, test_vdofs);
1264 trial_fes->GetElementVDofs(e, trial_vdofs);
1266 if (use_sum_factorization)
1273 if (test_dofmap.
Size() == 0)
1275 test_vdofs_mapped = test_vdofs;
1279 MFEM_ASSERT(test_dofmap.
Size() == num_test_dof,
1280 "internal error: dof map of the test space does not "
1281 "match previously determined number of test space dofs");
1283 for (
int vd = 0; vd < test_vdim; vd++)
1285 for (
int i = 0; i < num_test_dof; i++)
1287 test_vdofs_mapped[i + vd * num_test_dof] =
1288 test_vdofs[test_dofmap[i] + vd * num_test_dof];
1297 if (trial_dofmap.
Size() == 0)
1299 trial_vdofs_mapped = trial_vdofs;
1303 MFEM_ASSERT(trial_dofmap.
Size() == num_trial_dof,
1304 "internal error: dof map of the test space does not "
1305 "match previously determined number of test space dofs");
1307 for (
int vd = 0; vd < trial_vdim; vd++)
1309 for (
int i = 0; i < num_trial_dof; i++)
1311 trial_vdofs_mapped[i + vd * num_trial_dof] =
1312 trial_vdofs[trial_dofmap[i] + vd * num_trial_dof];
1317 A->
AddSubMatrix(test_vdofs_mapped, trial_vdofs_mapped, Aee, 1);
1328 auto& assemble_derivative_sparsematrix_callbacks_ref =
1329 this->assemble_derivative_sparsematrix_callbacks[derivative_id];
1331 assemble_derivative_hypreparmatrix_callbacks[derivative_id].push_back(
1336 &spmatcb = assemble_derivative_sparsematrix_callbacks_ref,
1337 &fields = fields_ref
1341 for (
const auto &
f : spmatcb)
1346 if (spmat ==
nullptr)
1348 MFEM_ABORT(
"internal error");
1351 bool same_test_and_trial =
false;
1352 for (
size_t s = 0; s < num_inputs; s++)
1354 if (input_is_dependent[s])
1356 if (output_to_field[0] == input_to_field[s])
1358 same_test_and_trial =
true;
1365 for (
size_t s = 0; s < num_inputs; s++)
1367 if (input_is_dependent[s])
1369 trial_field = &fields[input_to_field[s]];
1373 auto trial_fes = *std::get_if<const ParFiniteElementSpace *>
1374 (&trial_field->
data);
1375 auto test_fes = *std::get_if<const ParFiniteElementSpace *>
1376 (&fields[output_to_field[0]].data);
1378 if (same_test_and_trial)
1381 test_fes->GlobalVSize(),
1382 test_fes->GetDofOffsets(),
1384 A =
RAP(&tmp, test_fes->Dof_TrueDof_Matrix());
1389 test_fes->GlobalVSize(),
1390 trial_fes->GlobalVSize(),
1391 test_fes->GetDofOffsets(),
1392 trial_fes->GetDofOffsets(),
1394 A =
RAP(test_fes->Dof_TrueDof_Matrix(), &tmp,
1395 trial_fes->Dof_TrueDof_Matrix());