17#include <unordered_map>
41template<
typename... Ts>
44 constexpr auto get_array = [](
const Ts&... x) {
return std::array<
typename std::common_type<Ts...>
::type,
sizeof...(Ts)> { x... }; };
45 return std::apply(get_array,
tuple);
51template <
typename lambda, std::size_t... i>
53 std::integral_constant<std::size_t, i>... Is)
59template <std::size_t... n,
typename lambda,
typename... arg_types>
61 std::integer_sequence<std::size_t, n...>,
70template <
typename lambda, std::size_t... i>
72 std::integer_sequence<std::size_t, i ... >)
74 (
f(std::integral_constant<std::size_t, i> {}), ...);
77template <
typename lambda>
78constexpr void for_constexpr(lambda&&
f, std::integer_sequence<std::size_t>) {}
80template <
int... n,
typename lambda>
86template <
typename lambda,
typename arg_t>
88 std::integer_sequence<std::size_t>)
93template <
typename lambda,
typename arg_t, std::size_t i, std::size_t... Is>
95 std::integer_sequence<std::size_t, i, Is...>)
97 f(std::integral_constant<std::size_t, i> {},
get<i>(arg));
99 std::integer_sequence<std::size_t, Is...> {});
102template <
typename lambda,
typename arg_t>
106 std::make_index_sequence<tuple_size<std::remove_reference_t<arg_t>>::value>;
111template <std::size_t I,
typename Tuple, std::size_t... Is>
112std::array<bool,
sizeof...(Is)>
115 return { (
get<I>(inputs).GetFieldId() ==
get<Is>(inputs).GetFieldId())... };
118template <
typename... input_ts, std::size_t... Is>
120 std::index_sequence<Is...>)
122 constexpr std::size_t N =
sizeof...(input_ts);
124 if constexpr (N == 0)
125 return std::unordered_map<
int, std::array<bool, 0>> {};
127 std::unordered_map<int, std::array<bool, N>> map;
129 (void)std::initializer_list<int>
132 map[
get<Is>(inputs).GetFieldId()] =
146template <
typename... input_ts>
163#if defined(__clang__)
164 constexpr auto prefix = std::string_view {
"[T = "};
165 constexpr auto suffix =
"]";
166 constexpr auto function = std::string_view{__PRETTY_FUNCTION__};
167#elif defined(__GNUC__)
168 constexpr auto prefix = std::string_view {
"with T = "};
169 constexpr auto suffix =
"; ";
170 constexpr auto function = std::string_view{__PRETTY_FUNCTION__};
171#elif defined(_MSC_VER)
172 constexpr auto prefix = std::string_view {
"get_type_name<"};
173 constexpr auto suffix =
">(void)";
174 constexpr auto function = std::string_view{__FUNCSIG__};
176#error Unsupported compiler
179 const auto start = function.find(prefix) + prefix.size();
180 const auto end = function.find(suffix);
181 const auto size = end - start;
183 return function.substr(start, size);
186template <
typename Tuple, std::size_t... Is>
189 ((
out << (Is == 0 ?
"" :
", ") << std::get<Is>(t)), ...);
195template <
typename... Args>
218 for (
int i = 0; i < A.
NumRows(); ++i)
220 for (
int j = 0; j < A.
NumCols(); ++j)
222 std::ostringstream oss;
223 oss << std::scientific << std::setprecision(2) << A(i, j);
224 max_width = std::max(max_width,
static_cast<int>(oss.str().length()));
229 for (
int i = 0; i < A.
NumRows(); ++i)
232 for (
int j = 0; j < A.
NumCols(); ++j)
234 out << std::setw(max_width) << std::scientific << std::setprecision(2) <<
237 if (j < A.NumCols() - 1)
265 for (
int i = 0; i < v.
Size(); i++)
268 if (i < v.Size() - 1)
288 for (
int i = 0; i < v.
Size(); i++)
291 if (i < v.Size() - 1)
306template<
typename K,
typename T, std::
size_t N>
310 std::size_t count = 0;
311 for (
const auto& [key, value] : map)
314 for (std::size_t i = 0; i < N; i++)
317 if (i < N-1) {
out <<
", "; }
320 if (count < map.size() - 1)
335 out << msg << std::endl;
352 out << msg << std::endl;
357 size_t msg_len = msg.length();
358 std::vector<size_t> lengths(nranks);
366 std::vector<std::string> messages(nranks);
370 for (
size_t r = 1; r < nranks; r++)
372 std::vector<char> buffer(lengths[r] + 1);
373 MPI_Recv(buffer.data(),
static_cast<int>(lengths[r]), MPI_CHAR,
374 static_cast<int>(r), 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
375 messages[r] = std::string(buffer.data(),
static_cast<size_t>(lengths[r]));
379 for (
size_t r = 0; r < nranks; r++)
381 out <<
"[Rank " << r <<
"] " << messages[r] << std::endl;
388 MPI_Send(
const_cast<char*
>(msg.c_str()),
static_cast<int>(msg_len), MPI_CHAR,
389 0, 0, MPI_COMM_WORLD);
393 MPI_Barrier(MPI_COMM_WORLD);
402 std::stringstream ss;
404 for (
int i = 0; i < v.
Size(); i++)
407 if (i < v.Size() - 1) { ss <<
", "; }
415template <
typename ... Ts>
424template <
typename output_t,
typename... input_ts>
434template <
typename output_t,
typename T,
typename... input_ts>
441template <
typename output_t,
typename... input_ts>
450 return T::GetFieldId();
453template <
typename Tuple, std::size_t... Is>
456 return std::array<int,
sizeof...(Is)>
458 std::decay_t<decltype(std::get<Is>(t))>{}.GetFieldId()...
466template <
typename... Ts>
478constexpr bool contains(
const int* arr, std::size_t size,
int value)
480 for (std::size_t i = 0; i < size; ++i)
494template <
typename... Ts>
498 constexpr std::size_t size =
sizeof...(Ts);
500 std::array<int, size> unique_ids = {};
501 std::size_t unique_count = 0;
503 for (std::size_t i = 0; i < size; ++i)
505 if (!
contains(unique_ids.data(), unique_count, ids[i]))
507 unique_ids[unique_count] = ids[i];
520template <
typename T, std::
size_t N>
522 const std::array<T, N> &
a,
523 const std::array<bool, N> &marker)
526 for (
int i = 0; i < N; i++)
540template <
typename... Ts>
543 return std::tuple_cat(
544 std::conditional_t<Ts::GetFieldId() != -1, std::tuple<Ts>, std::tuple<>> {}...);
568 template <
typename T>
584struct BoundaryElement;
600#if defined(MFEM_USE_CUDA_OR_HIP)
601template <
typename func_t>
605 extern __shared__
real_t shmem[];
613template <
typename func_t>
623#if defined(MFEM_USE_CUDA_OR_HIP)
625 int num_bytes = num_shmem *
sizeof(
decltype(shmem));
626 dim3 block_size(blocks.
x, blocks.
y, blocks.
z);
628#if defined(MFEM_USE_CUDA)
629 MFEM_GPU_CHECK(cudaGetLastError());
630#elif defined(MFEM_USE_HIP)
631 MFEM_GPU_CHECK(hipGetLastError());
638 MFEM_ASSERT(!((
bool)num_shmem != (
bool)shmem),
639 "Backend::CPU needs a pre-allocated shared memory block");
640 for (
int i = 0; i < N; i++)
647 MFEM_ABORT(
"no compute backend available");
693 eps = lambda * (lambda + xnorm / vnorm);
698 const auto d_v = v.
Read();
699 const auto d_x = x.
Read();
700 auto d_xpev = xpev.
Write();
703 d_xpev[i] = d_x[i] + eps * d_v[i];
712 const auto d_f = f.
Read();
716 d_y[i] = (d_y[i] - d_f[i]) / eps;
743 const std::vector<FieldDescriptor>& fields)
745 for (std::size_t i = 0; i < fields.size(); i++)
747 if (fields[i].
id ==
id)
762 return std::visit([](
auto arg)
766 MFEM_ABORT(
"FieldDescriptor data is nullptr");
769 using T = std::decay_t<
decltype(arg)>;
770 if constexpr (std::is_same_v<T, const FiniteElementSpace *> ||
771 std::is_same_v<T, const ParFiniteElementSpace *>)
773 return arg->GetVSize();
775 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
777 return arg->GetVSize();
797 return std::visit([&](
auto arg)
801 MFEM_ABORT(
"FieldDescriptor data is nullptr");
804 using T = std::decay_t<
decltype(arg)>;
805 if constexpr (std::is_same_v<T, const FiniteElementSpace *>)
807 arg->GetElementVDofs(el, vdofs);
809 else if constexpr (std::is_same_v<T, const ParFiniteElementSpace *>)
811 arg->GetElementVDofs(el, vdofs);
813 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
815 MFEM_ABORT(
"internal error");
831 return std::visit([](
auto arg)
835 MFEM_ABORT(
"FieldDescriptor data is nullptr");
838 using T = std::decay_t<
decltype(arg)>;
839 if constexpr (std::is_same_v<T, const FiniteElementSpace *>)
841 return arg->GetTrueVSize();
843 else if constexpr (std::is_same_v<T, const ParFiniteElementSpace *>)
845 return arg->GetTrueVSize();
847 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
849 return arg->GetTrueVSize();
866 return std::visit([](
auto && arg)
868 using T = std::decay_t<
decltype(arg)>;
869 if constexpr (std::is_same_v<T, const FiniteElementSpace *>)
871 return arg->GetVDim();
873 else if constexpr (std::is_same_v<T, const ParFiniteElementSpace *>)
875 return arg->GetVDim();
877 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
879 return arg->GetVDim();
894template <
typename entity_t>
897 return std::visit([](
auto && arg)
899 using T = std::decay_t<
decltype(arg)>;
900 if constexpr (std::is_same_v<T, const FiniteElementSpace *> ||
901 std::is_same_v<T, const ParFiniteElementSpace *>)
903 if constexpr (std::is_same_v<entity_t, Entity::Element>)
905 return arg->GetMesh()->Dimension();
907 else if constexpr (std::is_same_v<entity_t, Entity::BoundaryElement>)
909 return arg->GetMesh()->Dimension() - 1;
912 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
914 return arg->Dimension();
932 return std::visit([](
auto&& arg) ->
const Operator*
934 using T = std::decay_t<
decltype(arg)>;
935 if constexpr (std::is_same_v<T, const FiniteElementSpace *> ||
936 std::is_same_v<T, const ParFiniteElementSpace *>)
938 return arg->GetProlongationMatrix();
940 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
942 return arg->GetProlongationMatrix();
962 return std::visit([&o](
auto&& arg) ->
const Operator*
964 using T = std::decay_t<
decltype(arg)>;
965 if constexpr (std::is_same_v<T, const FiniteElementSpace *>
966 || std::is_same_v<T, const ParFiniteElementSpace *>)
968 return arg->GetElementRestriction(o);
970 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
972 return arg->GetElementRestriction(o);
977 "can't use get_element_restriction on type");
997 return std::visit([&o, &ft, &m](
auto&& arg) ->
const Operator*
999 using T = std::decay_t<
decltype(arg)>;
1000 if constexpr (std::is_same_v<T, const FiniteElementSpace *> ||
1001 std::is_same_v<T, const ParFiniteElementSpace *>)
1003 return arg->GetFaceRestriction(o, ft, m);
1005 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
1008 MFEM_ABORT(
"internal error");
1013 "can't use get_face_restriction on type");
1025template <
typename entity_t>
1030 if constexpr (std::is_same_v<entity_t, Entity::Element>)
1034 else if constexpr (std::is_same_v<entity_t, Entity::BoundaryElement>)
1039 MFEM_ABORT(
"restriction not implemented for Entity");
1050template <
typename entity_t,
typename fop_t>
1051inline std::tuple<std::function<void(
const Vector&,
Vector&)>,
int>
1063 return std::make_tuple(RT, 1);
1072 return std::make_tuple(RT, R->
Height());
1074 return std::make_tuple(
1091 P->Mult(x, field_l);
1105template <std::
size_t N, std::
size_t M>
1108 std::array<Vector, M> &fields_l)
1110 int data_offset = 0;
1111 for (
int i = 0; i < N; i++)
1114 const int width = P->Width();
1116 const Vector x_i(
const_cast<Vector&
>(x), data_offset, width);
1117 fields_l[i].SetSize(P->Height());
1119 P->Mult(x_i, fields_l[i]);
1120 data_offset += width;
1136 std::vector<Vector> &fields_l)
1138 int data_offset = 0;
1139 for (std::size_t i = 0; i < fields.size(); i++)
1142 const int width = P->Width();
1143 const Vector x_i(
const_cast<Vector&
>(x), data_offset, width);
1144 fields_l[i].SetSize(P->Height());
1145 P->Mult(x_i, fields_l[i]);
1146 data_offset += width;
1153 std::vector<Vector> &fields_l)
1155 int data_offset = 0;
1156 for (std::size_t i = 0; i < fields.size(); i++)
1158 const int sz =
GetVSize(fields[i]);
1159 fields_l[i].SetSize(sz);
1161 const Vector x_i(
const_cast<Vector&
>(x), data_offset, sz);
1177template <
typename fop_t>
1188 MFEM_ASSERT(y.Size() == 1,
"output size doesn't match kernel description");
1190 MPI_Allreduce(&local_sum, y.GetData(), 1, MPI_DOUBLE, MPI_SUM, mpi_comm);
1217template <
typename entity_t>
1224 MFEM_ASSERT(R->Width() == u_l.
Size(),
1225 "restriction not applicable to given data size");
1226 const int height = R->Height();
1228 R->Mult(u_l, field_e);
1239template <
typename entity_t>
1241 const std::vector<Vector> &u_l,
1242 std::vector<Vector> &fields_e,
1244 const int offset = 0)
1246 for (std::size_t i = 0; i <
u.size(); i++)
1249 MFEM_ASSERT(R->Width() == u_l[i].Size(),
1250 "restriction not applicable to given data size");
1251 const int height = R->Height();
1252 fields_e[i + offset].SetSize(height);
1253 R->Mult(u_l[i], fields_e[i + offset]);
1258template <std::
size_t N, std::
size_t M>
1260 const std::array<Vector, N> &u_l,
1261 std::array<Vector, M> &fields_e,
1263 const int offset = 0)
1265 for (
int i = 0; i < N; i++)
1268 MFEM_ASSERT(R->Width() == u_l[i].Size(),
1269 "element restriction not applicable to given data size");
1270 const int height = R->Height();
1271 fields_e[i + offset].SetSize(height);
1272 R->Mult(u_l[i], fields_e[i + offset]);
1281template <
typename entity_t>
1284 if constexpr (std::is_same_v<entity_t, Entity::Element>)
1286 return mesh.
GetNE();
1288 else if constexpr (std::is_same_v<entity_t, Entity::BoundaryElement>)
1308template <
typename entity_t>
1314 return std::visit([&ir, &mode](
auto&& arg) ->
const DofToQuad*
1316 using T = std::decay_t<
decltype(arg)>;
1317 if constexpr (std::is_same_v<T, const FiniteElementSpace *>
1318 || std::is_same_v<T, const ParFiniteElementSpace *>)
1320 if constexpr (std::is_same_v<entity_t, Entity::Element>)
1322 return &arg->GetTypicalFE()->GetDofToQuad(ir, mode);
1324 else if constexpr (std::is_same_v<entity_t, Entity::BoundaryElement>)
1326 return &arg->GetTypicalTraceElement()->GetDofToQuad(ir, mode);
1329 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
1331 return &arg->GetDofToQuad();
1349template <
typename field_operator_t>
1352 std::visit([](
auto && arg)
1354 using T = std::decay_t<
decltype(arg)>;
1355 if constexpr (std::is_same_v<T, const FiniteElementSpace *> ||
1356 std::is_same_v<T, const ParFiniteElementSpace *>)
1358 if constexpr (std::is_same_v<field_operator_t, Value<>>)
1362 else if constexpr (std::is_same_v<field_operator_t, Gradient<>>)
1364 MFEM_ASSERT(arg->GetTypicalElement()->GetMapType() ==
1366 "Gradient not compatible with FE");
1371 "FieldOperator not compatible with FiniteElementSpace");
1374 else if constexpr (std::is_same_v<T, const ParameterSpace *>)
1376 if constexpr (std::is_same_v<field_operator_t, Identity<>>)
1383 "FieldOperator not compatible with ParameterSpace");
1389 "Operator not compatible with FE");
1401template <
typename entity_t,
typename field_operator_t>
1424 MFEM_ABORT(
"can't get size on quadrature point for field descriptor");
1435template <
typename entity_t,
typename field_operator_ts>
1436std::array<size_t, tuple_size<field_operator_ts>::value>
1438 const std::vector<FieldDescriptor> &fields,
1439 field_operator_ts &fops)
1441 std::array<size_t, tuple_size<field_operator_ts>::value> map;
1443 auto find_id = [](
const std::vector<FieldDescriptor> &fields, std::size_t i)
1445 auto it = std::find_if(begin(fields), end(fields),
1448 return field.
id == i;
1451 if (it == fields.end())
1455 return static_cast<size_t>(it - fields.begin());
1458 auto f = [&](
auto &fop,
auto &map)
1460 if constexpr (std::is_same_v<std::decay_t<
decltype(fop)>,
Weight>)
1470 int i = find_id(fields, fop.GetFieldId());
1474 fop.vdim =
GetVDim(fields[i]);
1480 MFEM_ABORT(
"can't find field for id: " << fop.GetFieldId());
1485 for_constexpr<tuple_size<field_operator_ts>::value>([&](
auto idx)
1494template <
typename input_t, std::size_t... i>
1496 std::array<
Vector,
sizeof...(i)> &input_qp_mem,
int num_qp,
int num_entities,
1497 const input_t &inputs, std::index_sequence<i...>)
1503template <
typename input_t, std::size_t... i>
1508 std::index_sequence<i...>)
1510 return {
Vector(
get<i>(inputs).size_on_qp * num_qp * num_entities)...};
1545template <
typename input_t, std::size_t... i>
1547 const input_t &inputs,
1548 std::index_sequence<i...>)
1550 return {
get<i>(inputs).size_on_qp...};
1568template <std::
size_t num_fields, std::
size_t num_inputs, std::
size_t num_outputs>
1583template <
typename entity_t, std::
size_t num_fields, std::
size_t num_inputs, std::
size_t num_outputs,
typename input_t>
1586 const std::array<DofToQuadMap, num_inputs> &input_dtq_maps,
1587 const std::array<DofToQuadMap, num_outputs> &output_dtq_maps,
1588 const std::vector<FieldDescriptor> &fields,
1589 const int &num_entities,
1590 const input_t &inputs,
1592 const std::vector<int> &input_size_on_qp,
1593 const int &residual_size_on_qp,
1595 const int &derivative_action_field_idx = -1)
1597 std::array<int, 8> offsets = {0};
1601 std::array<std::array<int, 2>, num_inputs> input_dtq_sizes;
1602 int max_dtq_qps = 0;
1603 int max_dtq_dofs = 0;
1604 for (std::size_t i = 0; i < num_inputs; i++)
1606 auto a = input_dtq_maps[i].B.GetShape();
1607 input_dtq_sizes[i][0] =
a[0] *
a[1] *
a[2];
1608 auto b = input_dtq_maps[i].G.GetShape();
1609 input_dtq_sizes[i][1] =
b[0] *
b[1] *
b[2];
1614 total_size += std::accumulate(std::begin(input_dtq_sizes[i]),
1615 std::end(input_dtq_sizes[i]),
1620 std::array<std::array<int, 2>, num_outputs> output_dtq_sizes;
1621 for (std::size_t i = 0; i < num_outputs; i++)
1623 auto a = output_dtq_maps[i].B.GetShape();
1624 output_dtq_sizes[i][0] =
a[0] *
a[1] *
a[2];
1625 auto b = output_dtq_maps[i].G.GetShape();
1626 output_dtq_sizes[i][1] =
b[0] *
b[1] *
b[2];
1631 total_size += std::accumulate(std::begin(output_dtq_sizes[i]),
1632 std::end(output_dtq_sizes[i]),
1637 std::array<int, num_fields> field_sizes;
1638 for (std::size_t i = 0; i < num_fields; i++)
1646 total_size += std::accumulate(
1647 std::begin(field_sizes), std::end(field_sizes), 0);
1650 int direction_size = 0;
1651 if (derivative_action_field_idx != -1)
1655 fields[derivative_action_field_idx], dof_ordering)
1659 total_size += direction_size;
1663 std::array<int, num_inputs> input_sizes;
1664 for (std::size_t i = 0; i < num_inputs; i++)
1666 input_sizes[i] = input_size_on_qp[i] * num_qp;
1668 total_size += std::accumulate(
1669 std::begin(input_sizes), std::end(input_sizes), 0);
1672 std::array<int, num_inputs> shadow_sizes{0};
1673 if (derivative_action_field_idx != -1)
1675 for (std::size_t i = 0; i < num_inputs; i++)
1677 shadow_sizes[i] = input_size_on_qp[i] * num_qp;
1679 total_size += std::accumulate(
1680 std::begin(shadow_sizes), std::end(shadow_sizes), 0);
1684 const int residual_size = residual_size_on_qp;
1685 total_size += residual_size * num_qp;
1688 constexpr int num_temp = 6;
1689 std::array<int, num_temp> temp_sizes = {0};
1691 const int q1d = max_dtq_qps;
1692 [[maybe_unused]]
const int d1d = max_dtq_dofs;
1695 constexpr int hardcoded_temp_num = 6;
1696 for (std::size_t i = 0; i < hardcoded_temp_num; i++)
1699 temp_sizes[i] = q1d * q1d * q1d;
1701 total_size += std::accumulate(
1702 std::begin(temp_sizes), std::end(temp_sizes), 0);
1719template <
typename shmem_info_t>
1722 out <<
"Shared Memory Info\n"
1723 <<
"total size: " << shmem_info.total_size
1724 <<
" " <<
"(" << shmem_info.total_size *
real_t(
sizeof(
real_t))/1024.0 <<
"kb)";
1725 out <<
"\ninput dtq sizes (B G): ";
1726 for (
auto &i : shmem_info.input_dtq_sizes)
1729 for (
int j = 0; j < 2; j++)
1739 out <<
"\noutput dtq sizes (B G): ";
1740 for (
auto &i : shmem_info.output_dtq_sizes)
1743 for (
int j = 0; j < 2; j++)
1753 out <<
"\nfield sizes: ";
1754 for (
auto &i : shmem_info.field_sizes)
1758 out <<
"\ndirection size: ";
1759 out << shmem_info.direction_size <<
" ";
1760 out <<
"\ninput sizes: ";
1761 for (
auto &i : shmem_info.input_sizes)
1765 out <<
"\nshadow sizes: ";
1766 for (
auto &i : shmem_info.shadow_sizes)
1770 out <<
"\ntemp sizes: ";
1771 for (
auto &i : shmem_info.temp_sizes)
1775 out <<
"\noffsets: ";
1776 for (
auto &i : shmem_info.offsets)
1783template <std::
size_t N>
1784MFEM_HOST_DEVICE
inline
1788 const std::array<std::array<int, 2>, N> &sizes,
1789 const std::array<DofToQuadMap, N> &dtq)
1791 std::array<DofToQuadMap, N>
f;
1792 for (std::size_t i = 0; i < N; i++)
1794 if (dtq[i].which_input != -1)
1796 const auto [nqp_b, dim_b, ndof_b] = dtq[i].B.GetShape();
1797 const auto B =
Reshape(&dtq[i].B[0], nqp_b, dim_b, ndof_b);
1798 auto mem_Bi =
Reshape(
reinterpret_cast<real_t *
>(mem) + offset, nqp_b, dim_b,
1801 MFEM_FOREACH_THREAD(q, x, nqp_b)
1803 MFEM_FOREACH_THREAD(d, y, ndof_b)
1805 for (
int b = 0;
b < dim_b;
b++)
1807 auto v = B(q,
b, d);
1808 mem_Bi(q,
b, d) = v;
1813 offset += sizes[i][0];
1815 const auto [nqp_g, dim_g, ndof_g] = dtq[i].G.GetShape();
1816 const auto G =
Reshape(&dtq[i].G[0], nqp_g, dim_g, ndof_g);
1817 auto mem_Gi =
Reshape(
reinterpret_cast<real_t *
>(mem) + offset, nqp_g, dim_g,
1820 MFEM_FOREACH_THREAD(q, x, nqp_g)
1822 MFEM_FOREACH_THREAD(d, y, ndof_g)
1824 for (
int b = 0;
b < dim_g;
b++)
1826 mem_Gi(q,
b, d) = G(q,
b, d);
1831 offset += sizes[i][1];
1835 dtq[i].which_input};
1846template <std::
size_t num_fields>
1847MFEM_HOST_DEVICE
inline
1848std::array<DeviceTensor<1>, num_fields>
1852 const std::array<int, num_fields> &sizes,
1854 const int &entity_idx)
1856 std::array<DeviceTensor<1>, num_fields>
f;
1860 int block_size = MFEM_THREAD_SIZE(x) *
1861 MFEM_THREAD_SIZE(y) *
1862 MFEM_THREAD_SIZE(z);
1863 int tid = MFEM_THREAD_ID(x) +
1864 MFEM_THREAD_SIZE(x) *
1865 (MFEM_THREAD_ID(y) + MFEM_THREAD_SIZE(y) * MFEM_THREAD_ID(z));
1866 for (
int k = tid; k < sizes[field_idx]; k += block_size)
1868 reinterpret_cast<real_t *
>(mem)[offset + k] =
1869 fields_e[field_idx](k, entity_idx);
1875 offset += sizes[field_idx];
1881MFEM_HOST_DEVICE
inline
1887 const int &entity_idx)
1889 int block_size = MFEM_THREAD_SIZE(x) *
1890 MFEM_THREAD_SIZE(y) *
1891 MFEM_THREAD_SIZE(z);
1892 int tid = MFEM_THREAD_ID(x) +
1893 MFEM_THREAD_SIZE(x) *
1894 (MFEM_THREAD_ID(y) + MFEM_THREAD_SIZE(y) * MFEM_THREAD_ID(z));
1895 for (
int k = tid; k < size; k += block_size)
1897 reinterpret_cast<real_t *
>(mem)[offset + k] =
direction(k, entity_idx);
1902 &
reinterpret_cast<real_t *
>(mem)[offset], size);
1905template <std::
size_t N>
1906MFEM_HOST_DEVICE
inline
1910 const std::array<int, N> &sizes,
1913 std::array<DeviceTensor<2>, N>
f;
1914 for (std::size_t i = 0; i < N; i++)
1924MFEM_HOST_DEVICE
inline
1928 const int &residual_size,
1935template <std::
size_t N>
1936MFEM_HOST_DEVICE
inline
1940 const std::array<int, N> &sizes)
1942 std::array<DeviceTensor<1>, N>
f;
1943 for (std::size_t i = 0; i < N; i++)
1951template <
typename shared_mem_info_t, std::
size_t num_inputs, std::
size_t num_outputs, std::
size_t num_fields>
1952MFEM_HOST_DEVICE
inline
1955 const shared_mem_info_t &shmem_info,
1956 const std::array<DofToQuadMap, num_inputs> &input_dtq_maps,
1957 const std::array<DofToQuadMap, num_outputs> &output_dtq_maps,
1962 auto input_dtq_shmem =
1966 shmem_info.input_dtq_sizes,
1969 auto output_dtq_shmem =
1973 shmem_info.output_dtq_sizes,
1980 shmem_info.field_sizes,
1990 shmem_info.input_sizes,
1993 auto residual_shmem =
1997 shmem_info.residual_size,
2004 shmem_info.temp_sizes);
2010 input_dtq_shmem, output_dtq_shmem, fields_shmem,
2011 input_shmem, residual_shmem, scratch_mem);
2014template <
typename shared_mem_info_t, std::
size_t num_inputs, std::
size_t num_outputs, std::
size_t num_fields>
2015MFEM_HOST_DEVICE
inline
2018 const shared_mem_info_t &shmem_info,
2019 const std::array<DofToQuadMap, num_inputs> &input_dtq_maps,
2020 const std::array<DofToQuadMap, num_outputs> &output_dtq_maps,
2026 auto input_dtq_shmem =
2030 shmem_info.input_dtq_sizes,
2033 auto output_dtq_shmem =
2037 shmem_info.output_dtq_sizes,
2044 shmem_info.field_sizes,
2048 auto direction_shmem =
2052 shmem_info.direction_size,
2053 wrapped_direction_e,
2062 shmem_info.input_sizes,
2069 shmem_info.input_sizes,
2072 auto residual_shmem =
2076 shmem_info.residual_size,
2083 shmem_info.temp_sizes);
2089 input_dtq_shmem, output_dtq_shmem, fields_shmem,
2090 direction_shmem, input_shmem, shadow_shmem,
2091 residual_shmem, scratch_mem);
2094template <std::size_t... i>
2095MFEM_HOST_DEVICE
inline
2097 const std::array<
DeviceTensor<3>,
sizeof...(i)> &input_qp_global,
int e,
2098 std::index_sequence<i...>)
2103 &input_qp_global[i](0, 0, e),
2104 input_qp_global[i].GetShape()[0],
2105 input_qp_global[i].GetShape()[1]) ...
2109template <std::
size_t N>
2110MFEM_HOST_DEVICE
inline
2113 for (std::size_t i = 0; i < N; i++)
2115 int size = v[i].
GetShape()[0] * v[i].GetShape()[1];
2116 auto vi =
Reshape(&v[i][0], size);
2117 for (
int j = 0; j < size; j++)
2124template <std::
size_t n>
2125MFEM_HOST_DEVICE
inline
2129 for (
int i = 0; i < n; i++)
2131 s *=
u.GetShape()[i];
2134 for (
int j = 0; j < s; j++)
2146MFEM_HOST_DEVICE
inline
2150 for (
int i = 0; i < n; i++)
2152 s *=
u.GetShape()[i];
2156 for (
int j = 0; j < s; j++)
2168template <
int n, std::
size_t m>
2169MFEM_HOST_DEVICE
inline
2173 for (
int i = 0; i < m; i++)
2186template <std::
size_t num_fields>
2188 std::vector<Vector> &fields,
2189 std::array<int, num_fields> &field_sizes,
2190 const int &num_entities)
2192 std::array<DeviceTensor<2>, num_fields>
f;
2222template <
typename input_t, std::size_t num_fields, std::size_t... i>
2224 const input_t &inputs,
2225 std::array<
bool,
sizeof...(i)> &kinput_is_dependent,
2226 const std::array<
int,
sizeof...(i)> &input_to_field,
2227 const std::array<FieldDescriptor, num_fields> &fields,
2228 std::index_sequence<i...> seq)
2230 MFEM_CONTRACT_VAR(seq);
2231 return (... + [](
auto &input,
auto is_dependent,
auto field)
2240 get<i>(kinput_is_dependent),
2241 fields[input_to_field[i]]));
2246 typename field_operator_ts,
2247 std::size_t N = tuple_size<field_operator_ts>::value,
2250 field_operator_ts &fops,
2251 std::vector<const DofToQuad*> &dtqs,
2252 const std::array<size_t, N> &field_map,
2253 std::index_sequence<Is...>)
2255 auto f = [&](
auto fop, std::size_t idx)
2257 [[maybe_unused]]
auto g = [&](
int idx)
2259 auto dtq = dtqs[field_map[idx]];
2267 value_dim = dtq->FE->GetRangeDim() ? dtq->FE->GetRangeDim() : 1;
2268 grad_dim = dtq->FE->GetDim();
2271 return std::tuple{dtq, value_dim, grad_dim};
2277 auto [dtq, value_dim, grad_dim] = g(idx);
2282 static_cast<int>(idx)
2285 else if constexpr (std::is_same_v<
decltype(fop),
Weight>)
2297 auto [dtq, value_dim, grad_dim] = g(idx);
2308 "field operator type is not implemented");
2317 return std::array<DofToQuadMap, N>
2332 typename field_operator_ts,
2333 std::size_t num_fields>
2335 field_operator_ts &fops,
2336 std::vector<const DofToQuad*> &dtqmaps,
2337 const std::array<size_t, num_fields> &to_field_map)
2342 std::make_index_sequence<num_fields> {});
int Size() const
Return the logical size of the array.
Data type dense matrix using column-major storage.
A basic generic Tensor class, appropriate for use on the GPU.
MFEM_HOST_DEVICE auto & GetShape() const
Returns the shape of the tensor.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static MemoryClass GetDeviceMemoryClass()
Get the current Device MemoryClass. This is the MemoryClass used by most MFEM device kernels to acces...
Structure representing the matrices/tensors needed to evaluate (in reference space) the values,...
Mode
Type of data stored in the arrays B, Bt, G, and Gt.
@ TENSOR
Tensor product representation using 1D matrices/tensors with dimensions using 1D number of quadrature...
Abstract data type element.
Class FiniteElementSpace - responsible for providing FEM view of the mesh, mainly managing the set of...
Class for an integration rule - an Array of IntegrationPoint.
int GetNE() const
Returns number of elements.
int GetNBE() const
Returns number of boundary elements.
static int WorldRank()
Return the MPI rank in MPI_COMM_WORLD.
static int WorldSize()
Return the size of MPI_COMM_WORLD.
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
virtual void Mult(const Vector &x, Vector &y) const =0
Operator application: y=A(x).
int NumCols() const
Get the number of columns (size of input) of the Operator. Synonym with Width().
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
int NumRows() const
Get the number of rows (size of output) of the Operator. Synonym with Height().
virtual void AddMultTranspose(const Vector &x, Vector &y, const real_t a=1.0) const
Operator transpose application: y+=A^t(x) (default) or y+=a*A^t(x).
virtual void MultTranspose(const Vector &x, Vector &y) const
Action of the transpose operator: y=A^t(x). The default behavior in class Operator is to generate an ...
Abstract parallel finite element space.
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
real_t Norml2() const
Returns the l2 norm of the vector.
int Size() const
Returns the size of the vector.
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
real_t Sum() const
Return the sum of the vector entries.
void SetSize(int s)
Resize the vector to size s.
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
virtual MemoryClass GetMemoryClass() const override
Return the MemoryClass preferred by the Operator.
void Mult(const Vector &v, Vector &y) const override
Operator application: y=A(x).
FDJacobian(const Operator &op, const Vector &x, real_t fixed_eps=0.0)
Base class for parametric spaces.
constexpr void for_constexpr(lambda &&f, std::integral_constant< std::size_t, i >... Is)
constexpr bool always_false
constexpr auto decay_types(tuple< Ts... > const &) -> tuple< std::remove_cv_t< std::remove_reference_t< Ts > >... >
constexpr auto filter_fields(const std::tuple< Ts... > &t)
Filter fields from a tuple based on their field IDs.
const Operator * get_element_restriction(const FieldDescriptor &f, ElementDofOrdering o)
Get the element restriction operator for a field descriptor.
const Operator * get_face_restriction(const FieldDescriptor &f, ElementDofOrdering o, FaceType ft, L2FaceValues m)
Get the face restriction operator for a field descriptor.
void prolongation(const FieldDescriptor field, const Vector &x, Vector &field_l)
Apply the prolongation operator to a field.
constexpr bool contains(const int *arr, std::size_t size, int value)
Helper function to check if an element is in the array.
MFEM_HOST_DEVICE auto unpack_shmem(void *shmem, const shared_mem_info_t &shmem_info, const std::array< DofToQuadMap, num_inputs > &input_dtq_maps, const std::array< DofToQuadMap, num_outputs > &output_dtq_maps, const std::array< DeviceTensor< 2 >, num_fields > &wrapped_fields_e, const int &num_qp, const int &e)
void restriction(const FieldDescriptor u, const Vector &u_l, Vector &field_e, ElementDofOrdering ordering)
Apply the restriction operator to a field.
void GetElementVDofs(const FieldDescriptor &f, int el, Array< int > &vdofs)
Get the element vdofs of a field descriptor.
MFEM_HOST_DEVICE tuple< T... > make_tuple(const T &... args)
helper function for combining a list of values into a tuple
void print_mpi_root(const std::string &msg)
const Operator * get_prolongation(const FieldDescriptor &f)
Get the prolongation operator for a field descriptor.
void get_lvectors(const std::vector< FieldDescriptor > fields, const Vector &x, std::vector< Vector > &fields_l)
std::array< Vector, sizeof...(i)> create_input_qp_memory(int num_qp, int num_entities, input_t &inputs, std::index_sequence< i... >)
Create input memory for a given set of inputs.
MFEM_HOST_DEVICE DeviceTensor< 1 > load_direction_mem(void *mem, int offset, const int &size, const DeviceTensor< 2 > &direction, const int &entity_idx)
MFEM_HOST_DEVICE std::array< DeviceTensor< 2 >, N > load_input_mem(void *mem, int offset, const std::array< int, N > &sizes, const int &num_qp)
decltype(decay_types(std::declval< T >())) decay_tuple
int GetNumEntities(const mfem::Mesh &mesh)
Get the number of entities of a given type.
constexpr auto get_type_name() -> std::string_view
void pretty_print(std::ostream &out, const mfem::DenseMatrix &A)
Pretty print an mfem::DenseMatrix to out.
void print_tuple(const std::tuple< Args... > &t)
std::array< bool, sizeof...(Is)> make_dependency_array(const Tuple &inputs, std::index_sequence< Is... >)
MFEM_HOST_DEVICE void copy(DeviceTensor< n > &u, DeviceTensor< n > &v)
Copy data from DeviceTensor u to DeviceTensor v.
constexpr auto extract_field_ids(const std::tuple< Ts... > &t)
Extracts field IDs from a tuple of objects derived from FieldOperator.
void print_mpi_sync(const std::string &msg)
print with MPI rank synchronization
std::array< DeviceTensor< 3 >, sizeof...(i)> wrap_input_memory(std::array< Vector, sizeof...(i)> &input_qp_mem, int num_qp, int num_entities, const input_t &inputs, std::index_sequence< i... >)
Wrap input memory for a given set of inputs.
std::tuple< std::function< void(const Vector &, Vector &)>, int > get_restriction_transpose(const FieldDescriptor &f, const ElementDofOrdering &o, const fop_t &fop)
Get a transpose restriction callback for a field descriptor.
std::array< DeviceTensor< 2 >, num_fields > wrap_fields(std::vector< Vector > &fields, std::array< int, num_fields > &field_sizes, const int &num_entities)
Wraps plain data in DeviceTensors for fields.
std::vector< int > get_input_size_on_qp(const input_t &inputs, std::index_sequence< i... >)
Get the size on quadrature point for a given set of inputs.
MFEM_HOST_DEVICE std::array< DofToQuadMap, N > load_dtq_mem(void *mem, int offset, const std::array< std::array< int, 2 >, N > &sizes, const std::array< DofToQuadMap, N > &dtq)
SharedMemoryInfo< num_fields, num_inputs, num_outputs > get_shmem_info(const std::array< DofToQuadMap, num_inputs > &input_dtq_maps, const std::array< DofToQuadMap, num_outputs > &output_dtq_maps, const std::vector< FieldDescriptor > &fields, const int &num_entities, const input_t &inputs, const int &num_qp, const std::vector< int > &input_size_on_qp, const int &residual_size_on_qp, const ElementDofOrdering &dof_ordering, const int &derivative_action_field_idx=-1)
const Operator * get_restriction(const FieldDescriptor &f, const ElementDofOrdering &o)
Get the restriction operator for a field descriptor.
std::array< size_t, tuple_size< field_operator_ts >::value > create_descriptors_to_fields_map(const std::vector< FieldDescriptor > &fields, field_operator_ts &fops)
Create a map from field operator types to FieldDescriptor indices.
void element_restriction(const std::array< FieldDescriptor, N > u, const std::array< Vector, N > &u_l, std::array< Vector, M > &fields_e, ElementDofOrdering ordering, const int offset=0)
constexpr std::size_t count_unique_field_ids(const std::tuple< Ts... > &t)
Function to count unique field IDs in a tuple.
constexpr void for_constexpr_with_arg(lambda &&f, arg_t &&arg, std::integer_sequence< std::size_t >)
MFEM_HOST_DEVICE constexpr auto type(const tuple< T... > &values)
a function intended to be used for extracting the ith type from a tuple.
std::array< DofToQuadMap, num_fields > create_dtq_maps(field_operator_ts &fops, std::vector< const DofToQuad * > &dtqmaps, const std::array< size_t, num_fields > &to_field_map)
Create DofToQuad maps for a given set of field operators.
auto make_dependency_map_impl(tuple< input_ts... > inputs, std::index_sequence< Is... >)
constexpr auto to_array(const std::tuple< Ts... > &tuple)
int accumulate_sizes_on_qp(const input_t &inputs, std::array< bool, sizeof...(i)> &kinput_is_dependent, const std::array< int, sizeof...(i)> &input_to_field, const std::array< FieldDescriptor, num_fields > &fields, std::index_sequence< i... > seq)
Accumulates the sizes of field operators on quadrature points for dependent inputs.
const DofToQuad * GetDofToQuad(const FieldDescriptor &f, const IntegrationRule &ir, DofToQuad::Mode mode)
Get the GetDofToQuad object for a given entity type.
std::array< DofToQuadMap, N > create_dtq_maps_impl(field_operator_ts &fops, std::vector< const DofToQuad * > &dtqs, const std::array< size_t, N > &field_map, std::index_sequence< Is... >)
constexpr void for_constexpr(lambda &&f, std::integer_sequence< std::size_t, i ... >)
MFEM_HOST_DEVICE std::array< DeviceTensor< 2 >, sizeof...(i)> get_local_input_qp(const std::array< DeviceTensor< 3 >, sizeof...(i)> &input_qp_global, int e, std::index_sequence< i... >)
void CheckCompatibility(const FieldDescriptor &f)
Check the compatibility of a field operator type with a FieldDescriptor.
std::function< void(const Vector &, Vector &)> get_prolongation_transpose(const FieldDescriptor &f, const fop_t &fop, MPI_Comm mpi_comm)
Get a transpose prolongation callback for a field descriptor.
std::size_t FindIdx(const std::size_t &id, const std::vector< FieldDescriptor > &fields)
Find the index of a field descriptor in a vector of field descriptors.
MFEM_HOST_DEVICE std::array< DeviceTensor< 1 >, num_fields > load_field_mem(void *mem, int offset, const std::array< int, num_fields > &sizes, const std::array< DeviceTensor< 2 >, num_fields > &fields_e, const int &entity_idx)
void forall(func_t f, const int &N, const ThreadBlocks &blocks, int num_shmem=0, real_t *shmem=nullptr)
int GetVDim(const FieldDescriptor &f)
Get the vdim of a field descriptor.
int GetSizeOnQP(const field_operator_t &, const FieldDescriptor &f)
Get the size on quadrature point for a field operator type and FieldDescriptor combination.
MFEM_HOST_DEVICE DeviceTensor< 2 > load_residual_mem(void *mem, int offset, const int &residual_size, const int &num_qp)
auto make_dependency_map(tuple< input_ts... > inputs)
constexpr int GetFieldId()
int GetVSize(const FieldDescriptor &f)
Get the vdof size of a field descriptor.
auto get_marked_entries(const std::array< T, N > &a, const std::array< bool, N > &marker)
Get marked entries from an std::array based on a marker array.
void pretty_print_mpi(const mfem::Vector &v)
Pretty print an mfem::Vector with MPI rank.
MFEM_HOST_DEVICE std::array< DeviceTensor< 1 >, 6 > load_scratch_mem(void *mem, int offset, const std::array< int, N > &sizes)
int GetDimension(const FieldDescriptor &f)
Get the spatial dimension of a field descriptor.
MFEM_HOST_DEVICE void set_zero(std::array< DeviceTensor< 2 >, N > &v)
int GetTrueVSize(const FieldDescriptor &f)
Get the true dof size of a field descriptor.
constexpr auto extract_field_ids_impl(Tuple &&t, std::index_sequence< Is... >)
void print_tuple_impl(const Tuple &t, std::index_sequence< Is... >)
void print_shared_memory_info(shmem_info_t &shmem_info)
MFEM_HOST_DEVICE zero & get(zero &x)
let zero be accessed like a tuple
__global__ void forall_kernel_shmem(func_t f, int n)
real_t u(const Vector &xvec)
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
MemoryClass
Memory classes identify sets of memory types.
T * ReadWrite(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read+write access to mem with the mfem::Device's DeviceMemoryClass,...
MFEM_HOST_DEVICE DeviceTensor< sizeof...(Dims), T > Reshape(T *ptr, Dims... dims)
Wrap a pointer as a DeviceTensor with automatically deduced template parameters.
ElementDofOrdering
Constants describing the possible orderings of the DOFs in one element.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
void forall(int N, lambda &&body)
@ HIP_MASK
Biwise-OR of all HIP backends.
@ CPU_MASK
Biwise-OR of all CPU backends.
@ CUDA_MASK
Biwise-OR of all CUDA backends.
Helper struct to convert a C++ type to an MPI type.
DeviceTensor< 3, const real_t > G
Gradient of the basis functions evaluated at quadrature points.
Index
Enumeration for the indices of the mappings B and G.
int which_input
Reverse mapping indicating which input this map belongs to.
DeviceTensor< 3, const real_t > B
Basis functions evaluated at quadrature points.
FieldDescriptor(std::size_t field_id, const T *v)
Constructor.
data_variant_t data
Field variant.
FieldDescriptor()
Default constructor.
std::variant< const FiniteElementSpace *, const ParFiniteElementSpace *, const ParameterSpace * > data_variant_t
std::array< int, num_fields > field_sizes
std::array< std::array< int, 2 >, num_inputs > input_dtq_sizes
std::array< std::array< int, 2 >, num_outputs > output_dtq_sizes
std::array< int, num_inputs > shadow_sizes
std::array< int, num_inputs > input_sizes
std::array< int, 6 > temp_sizes
std::array< int, 8 > offsets
This is a class that mimics most of std::tuple's interface, except that it is usable in CUDA kernels ...