14 #include "kernels.hpp"
16 #include "../general/forall.hpp"
18 #if defined(MFEM_USE_SUNDIALS)
20 #if defined(MFEM_USE_MPI)
21 #include <nvector/nvector_parallel.h>
25 #ifdef MFEM_USE_OPENMP
41 const int s = v.
Size();
45 MFEM_ASSERT(!v.
data.
Empty(),
"invalid source vector");
62 for (i = 0; i < np; i++)
70 for (i = 0; i < np; i++)
72 for (j = 0; j < dim[i]; j++)
77 if (!*in[i] && errno == ERANGE)
89 for (
int i = 0; i <
size; i++)
94 if (!in && errno == ERANGE)
114 #ifdef MFEM_USE_LEGACY_OPENMP
115 #pragma omp parallel for reduction(+:dot)
117 for (
int i = 0; i <
size; i++)
119 dot +=
data[i] * v[i];
139 const bool use_dev =
UseDevice() || vuse;
142 if (use_dev) {
Write(); }
151 data = std::move(v.data);
162 auto y =
Write(use_dev);
163 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = value;);
172 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= c;);
178 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
183 auto x = v.
Read(use_dev);
184 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= x[i];);
192 const double m = 1.0/c;
194 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= m;);
200 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
205 auto x = v.
Read(use_dev);
206 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] /= x[i];);
215 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= c;);
221 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
226 auto x = v.
Read(use_dev);
227 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= x[i];);
236 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += c;);
242 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
247 auto x = v.
Read(use_dev);
248 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += x[i];);
254 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
261 auto x = Va.
Read(use_dev);
262 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += a * x[i];);
269 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
273 auto x = Va.
Read(use_dev);
274 auto y =
Write(use_dev);
275 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = a * x[i];);
281 MFEM_ASSERT(v.
Size() + offset <=
size,
"invalid sub-vector");
283 const int vs = v.
Size();
284 const double *vp = v.
data;
285 double *
p =
data + offset;
286 for (
int i = 0; i < vs; i++)
294 MFEM_ASSERT(v.
Size() + offset <=
size,
"invalid sub-vector");
296 const int vs = v.
Size();
297 const double *vp = v.
data;
298 double *
p =
data + offset;
299 for (
int i = 0; i < vs; i++)
310 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = -y[i];);
316 "incompatible Vectors!");
318 #if !defined(MFEM_USE_LEGACY_OPENMP)
320 const int N = v.
size;
322 auto x1 = v1.
Read(use_dev);
323 auto x2 = v2.
Read(use_dev);
324 auto y = v.
Write(use_dev);
325 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = x1[i] + x2[i];);
327 #pragma omp parallel for
328 for (
int i = 0; i < v.
size; i++)
338 "incompatible Vectors!");
344 else if (alpha == 1.0)
350 #if !defined(MFEM_USE_LEGACY_OPENMP)
352 const int N = v.
size;
354 auto d_x = v1.
Read(use_dev);
355 auto d_y = v2.
Read(use_dev);
356 auto d_z = v.
Write(use_dev);
357 MFEM_FORALL_SWITCH(use_dev, i, N, d_z[i] = d_x[i] + alpha * d_y[i];);
359 const double *v1p = v1.
data, *v2p = v2.
data;
361 const int s = v.
size;
362 #pragma omp parallel for
363 for (
int i = 0; i <
s; i++)
365 vp[i] = v1p[i] + alpha*v2p[i];
374 "incompatible Vectors!");
386 #if !defined(MFEM_USE_LEGACY_OPENMP)
388 const int N = x.
size;
390 auto xd = x.
Read(use_dev);
391 auto yd = y.
Read(use_dev);
392 auto zd = z.
Write(use_dev);
393 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] + yd[i]););
395 const double *xp = x.
data;
396 const double *yp = y.
data;
398 const int s = x.
size;
399 #pragma omp parallel for
400 for (
int i = 0; i <
s; i++)
402 zp[i] = a * (xp[i] + yp[i]);
412 "incompatible Vectors!");
438 #if !defined(MFEM_USE_LEGACY_OPENMP)
440 const int N = x.
size;
442 auto xd = x.
Read(use_dev);
443 auto yd = y.
Read(use_dev);
444 auto zd = z.
Write(use_dev);
445 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * xd[i] + b * yd[i];);
447 const double *xp = x.
data;
448 const double *yp = y.
data;
450 const int s = x.
size;
451 #pragma omp parallel for
452 for (
int i = 0; i <
s; i++)
454 zp[i] = a * xp[i] + b * yp[i];
463 "incompatible Vectors!");
465 #if !defined(MFEM_USE_LEGACY_OPENMP)
467 const int N = x.
size;
469 auto xd = x.
Read(use_dev);
470 auto yd = y.
Read(use_dev);
471 auto zd = z.
Write(use_dev);
472 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = xd[i] - yd[i];);
474 const double *xp = x.
data;
475 const double *yp = y.
data;
477 const int s = x.
size;
478 #pragma omp parallel for
479 for (
int i = 0; i <
s; i++)
481 zp[i] = xp[i] - yp[i];
489 "incompatible Vectors!");
501 #if !defined(MFEM_USE_LEGACY_OPENMP)
503 const int N = x.
size;
505 auto xd = x.
Read(use_dev);
506 auto yd = y.
Read(use_dev);
507 auto zd = z.
Write(use_dev);
508 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] - yd[i]););
510 const double *xp = x.
data;
511 const double *yp = y.
data;
513 const int s = x.
size;
514 #pragma omp parallel for
515 for (
int i = 0; i <
s; i++)
517 zp[i] = a * (xp[i] - yp[i]);
526 "incompatible Vectors!");
531 auto l = lo.
Read(use_dev);
532 auto h = hi.
Read(use_dev);
533 auto m =
Write(use_dev);
534 MFEM_FORALL_SWITCH(use_dev, i, N,
540 else if (m[i] > h[i])
549 const int n = dofs.
Size();
552 auto d_y = elemvect.
Write(use_dev);
553 auto d_X =
Read(use_dev);
554 auto d_dofs = dofs.
Read(use_dev);
555 MFEM_FORALL_SWITCH(use_dev, i, n,
557 const int dof_i = d_dofs[i];
558 d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
565 const int n = dofs.
Size();
566 for (
int i = 0; i < n; i++)
568 const int j = dofs[i];
569 elem_data[i] = (j >= 0) ?
data[j] : -
data[-1-j];
576 const int n = dofs.
Size();
579 auto d_dofs = dofs.
Read(use_dev);
580 MFEM_FORALL_SWITCH(use_dev, i, n,
582 const int j = d_dofs[i];
596 MFEM_ASSERT(dofs.
Size() == elemvect.
Size(),
597 "Size mismatch: length of dofs is " << dofs.
Size()
598 <<
", length of elemvect is " << elemvect.
Size());
601 const int n = dofs.
Size();
604 auto d_y = elemvect.
Read(use_dev);
605 auto d_dofs = dofs.
Read(use_dev);
606 MFEM_FORALL_SWITCH(use_dev, i, n,
608 const int dof_i = d_dofs[i];
615 d_X[-1-dof_i] = -d_y[i];
624 const int n = dofs.
Size();
625 for (
int i = 0; i < n; i++)
627 const int j= dofs[i];
641 MFEM_ASSERT(dofs.
Size() == elemvect.
Size(),
"Size mismatch: "
642 "length of dofs is " << dofs.
Size() <<
643 ", length of elemvect is " << elemvect.
Size());
646 const int n = dofs.
Size();
647 auto d_y = elemvect.
Read(use_dev);
649 auto d_dofs = dofs.
Read(use_dev);
650 MFEM_FORALL_SWITCH(use_dev, i, n,
652 const int j = d_dofs[i];
667 const int n = dofs.
Size();
668 for (
int i = 0; i < n; i++)
670 const int j = dofs[i];
685 MFEM_ASSERT(dofs.
Size() == elemvect.
Size(),
"Size mismatch: "
686 "length of dofs is " << dofs.
Size() <<
687 ", length of elemvect is " << elemvect.
Size());
690 const int n = dofs.
Size();
692 auto d_x = elemvect.
Read(use_dev);
693 auto d_dofs = dofs.
Read(use_dev);
694 MFEM_FORALL_SWITCH(use_dev, i, n,
696 const int j = d_dofs[i];
699 d_y[j] += a * d_x[i];
703 d_y[-1-j] -= a * d_x[i];
711 const int n = dofs.
Size();
713 Vector dofs_vals(n, use_dev ?
717 auto d_dofs_vals = dofs_vals.
Write(use_dev);
718 auto d_dofs = dofs.
Read(use_dev);
719 MFEM_FORALL_SWITCH(use_dev, i, n, d_dofs_vals[i] = d_data[d_dofs[i]];);
720 MFEM_FORALL_SWITCH(use_dev, i, N, d_data[i] = val;);
721 MFEM_FORALL_SWITCH(use_dev, i, n, d_data[d_dofs[i]] = d_dofs_vals[i];);
726 if (!
size) {
return; }
736 if ( i % width == 0 )
748 #ifdef MFEM_USE_ADIOS2
750 const std::string& variable_name)
const
752 if (!
size) {
return; }
754 os.engine.Put(variable_name, &
data[0] );
761 std::ios::fmtflags old_fmt = os.flags();
762 os.setf(std::ios::scientific);
763 std::streamsize old_prec = os.precision(14);
768 for (i = 0; i <
size; i++)
773 os.precision(old_prec);
779 os <<
"size: " <<
size <<
'\n';
782 os <<
"hash: " << hf.
GetHash() <<
'\n';
787 const double max = (double)(RAND_MAX) + 1.;
796 srand((
unsigned)seed);
800 for (
int i = 0; i <
size; i++)
802 data[i] = std::abs(rand()/max);
808 srand((
unsigned)gseed);
825 return std::abs(
data[0]);
834 for (
int i = 0; i <
size; i++)
836 max = std::max(std::abs(
data[i]), max);
845 for (
int i = 0; i <
size; i++)
847 sum += std::abs(
data[i]);
854 MFEM_ASSERT(p > 0.0,
"Vector::Normlp");
876 return std::abs(
data[0]);
882 for (
int i = 0; i <
size; i++)
886 const double absdata = std::abs(
data[i]);
887 if (scale <= absdata)
889 sum = 1.0 + sum * std::pow(scale / absdata, p);
893 sum += std::pow(absdata / scale, p);
896 return scale * std::pow(sum, 1.0/p);
907 double max =
data[0];
909 for (
int i = 1; i <
size; i++)
924 const double *h_data = this->
HostRead();
925 for (
int i = 0; i <
size; i++)
934 static __global__
void cuKernelMin(
const int N,
double *gdsr,
const double *x)
936 __shared__
double s_min[MFEM_CUDA_BLOCKS];
937 const int n = blockDim.x*blockIdx.x + threadIdx.x;
938 if (n>=N) {
return; }
939 const int bid = blockIdx.x;
940 const int tid = threadIdx.x;
941 const int bbd = bid*blockDim.x;
942 const int rid = bbd+tid;
944 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
947 if (tid >= workers) {
continue; }
948 if (rid >= N) {
continue; }
949 const int dualTid = tid + workers;
950 if (dualTid >= N) {
continue; }
951 const int rdd = bbd+dualTid;
952 if (rdd >= N) {
continue; }
953 if (dualTid >= blockDim.x) {
continue; }
954 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
956 if (tid==0) { gdsr[bid] = s_min[0]; }
959 static Array<double> cuda_reduce_buf;
961 static double cuVectorMin(
const int N,
const double *X)
963 const int tpb = MFEM_CUDA_BLOCKS;
964 const int blockSize = MFEM_CUDA_BLOCKS;
965 const int gridSize = (N+blockSize-1)/blockSize;
966 const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
967 cuda_reduce_buf.
SetSize(min_sz);
968 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
970 cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
971 MFEM_GPU_CHECK(cudaGetLastError());
974 for (
int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
978 static __global__
void cuKernelDot(
const int N,
double *gdsr,
979 const double *x,
const double *y)
981 __shared__
double s_dot[MFEM_CUDA_BLOCKS];
982 const int n = blockDim.x*blockIdx.x + threadIdx.x;
983 if (n>=N) {
return; }
984 const int bid = blockIdx.x;
985 const int tid = threadIdx.x;
986 const int bbd = bid*blockDim.x;
987 const int rid = bbd+tid;
988 s_dot[tid] = x[n] * y[n];
989 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
992 if (tid >= workers) {
continue; }
993 if (rid >= N) {
continue; }
994 const int dualTid = tid + workers;
995 if (dualTid >= N) {
continue; }
996 const int rdd = bbd+dualTid;
997 if (rdd >= N) {
continue; }
998 if (dualTid >= blockDim.x) {
continue; }
999 s_dot[tid] += s_dot[dualTid];
1001 if (tid==0) { gdsr[bid] = s_dot[0]; }
1004 static double cuVectorDot(
const int N,
const double *X,
const double *Y)
1006 const int tpb = MFEM_CUDA_BLOCKS;
1007 const int blockSize = MFEM_CUDA_BLOCKS;
1008 const int gridSize = (N+blockSize-1)/blockSize;
1009 const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
1011 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
1013 cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
1014 MFEM_GPU_CHECK(cudaGetLastError());
1017 for (
int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1020 #endif // MFEM_USE_CUDA
1023 static __global__
void hipKernelMin(
const int N,
double *gdsr,
const double *x)
1025 __shared__
double s_min[MFEM_HIP_BLOCKS];
1026 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1027 if (n>=N) {
return; }
1028 const int bid = hipBlockIdx_x;
1029 const int tid = hipThreadIdx_x;
1030 const int bbd = bid*hipBlockDim_x;
1031 const int rid = bbd+tid;
1033 for (
int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1036 if (tid >= workers) {
continue; }
1037 if (rid >= N) {
continue; }
1038 const int dualTid = tid + workers;
1039 if (dualTid >= N) {
continue; }
1040 const int rdd = bbd+dualTid;
1041 if (rdd >= N) {
continue; }
1042 if (dualTid >= hipBlockDim_x) {
continue; }
1043 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
1045 if (tid==0) { gdsr[bid] = s_min[0]; }
1048 static Array<double> cuda_reduce_buf;
1050 static double hipVectorMin(
const int N,
const double *X)
1052 const int tpb = MFEM_HIP_BLOCKS;
1053 const int blockSize = MFEM_HIP_BLOCKS;
1054 const int gridSize = (N+blockSize-1)/blockSize;
1055 const int min_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1056 cuda_reduce_buf.
SetSize(min_sz);
1057 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
1059 hipLaunchKernelGGL(hipKernelMin,gridSize,blockSize,0,0,N,d_min,X);
1060 MFEM_GPU_CHECK(hipGetLastError());
1063 for (
int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
1067 static __global__
void hipKernelDot(
const int N,
double *gdsr,
1068 const double *x,
const double *y)
1070 __shared__
double s_dot[MFEM_HIP_BLOCKS];
1071 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1072 if (n>=N) {
return; }
1073 const int bid = hipBlockIdx_x;
1074 const int tid = hipThreadIdx_x;
1075 const int bbd = bid*hipBlockDim_x;
1076 const int rid = bbd+tid;
1077 s_dot[tid] = x[n] * y[n];
1078 for (
int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1081 if (tid >= workers) {
continue; }
1082 if (rid >= N) {
continue; }
1083 const int dualTid = tid + workers;
1084 if (dualTid >= N) {
continue; }
1085 const int rdd = bbd+dualTid;
1086 if (rdd >= N) {
continue; }
1087 if (dualTid >= hipBlockDim_x) {
continue; }
1088 s_dot[tid] += s_dot[dualTid];
1090 if (tid==0) { gdsr[bid] = s_dot[0]; }
1093 static double hipVectorDot(
const int N,
const double *X,
const double *Y)
1095 const int tpb = MFEM_HIP_BLOCKS;
1096 const int blockSize = MFEM_HIP_BLOCKS;
1097 const int gridSize = (N+blockSize-1)/blockSize;
1098 const int dot_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1099 cuda_reduce_buf.
SetSize(dot_sz);
1100 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
1102 hipLaunchKernelGGL(hipKernelDot,gridSize,blockSize,0,0,N,d_dot,X,Y);
1103 MFEM_GPU_CHECK(hipGetLastError());
1106 for (
int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1109 #endif // MFEM_USE_HIP
1113 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
1114 if (
size == 0) {
return 0.0; }
1117 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP) || defined(MFEM_USE_OPENMP)
1118 auto m_data =
Read(use_dev);
1122 auto v_data = v.
Read(use_dev);
1124 if (!use_dev) {
goto vector_dot_cpu; }
1126 #ifdef MFEM_USE_OCCA
1129 return occa::linalg::dot<double,double,double>(
1134 #ifdef MFEM_USE_CUDA
1137 return cuVectorDot(
size, m_data, v_data);
1144 return hipVectorDot(
size, m_data, v_data);
1148 #ifdef MFEM_USE_OPENMP
1151 #define MFEM_USE_OPENMP_DETERMINISTIC_DOT
1152 #ifdef MFEM_USE_OPENMP_DETERMINISTIC_DOT
1155 #pragma omp parallel
1157 const int nt = omp_get_num_threads();
1160 const int tid = omp_get_thread_num();
1161 const int stride = (
size + nt - 1)/nt;
1162 const int start = tid*stride;
1163 const int stop = std::min(start + stride,
size);
1164 double my_dot = 0.0;
1165 for (
int i = start; i < stop; i++)
1167 my_dot += m_data[i] * v_data[i];
1170 th_dot(tid) = my_dot;
1172 return th_dot.
Sum();
1176 #pragma omp parallel for reduction(+:prod)
1177 for (
int i = 0; i <
size; i++)
1179 prod += m_data[i] * v_data[i];
1182 #endif // MFEM_USE_OPENMP_DETERMINISTIC_DOT
1184 #endif // MFEM_USE_OPENMP
1188 auto v_data_ = v.
Read();
1189 auto m_data_ =
Read();
1192 auto d_dot = dot.
Write();
1194 MFEM_FORALL(i, N, d_dot[0] += m_data_[i] * v_data_[i];);
1207 auto m_data =
Read(use_dev);
1209 if (!use_dev) {
goto vector_min_cpu; }
1211 #ifdef MFEM_USE_OCCA
1218 #ifdef MFEM_USE_CUDA
1221 return cuVectorMin(
size, m_data);
1228 return hipVectorMin(
size, m_data);
1232 #ifdef MFEM_USE_OPENMP
1235 double minimum = m_data[0];
1236 #pragma omp parallel for reduction(min:minimum)
1237 for (
int i = 0; i <
size; i++)
1239 minimum = std::min(minimum, m_data[i]);
1248 auto m_data_ =
Read();
1253 MFEM_FORALL(i, N, d_min[0] = (d_min[0]<m_data_[i])?d_min[0]:m_data_[i];);
1259 double minimum =
data[0];
1260 for (
int i = 1; i <
size; i++)
1262 if (m_data[i] < minimum)
1264 minimum = m_data[i];
Hash function for data sequences.
int Size() const
Return the logical size of the array.
void SetSubVector(const Array< int > &dofs, const double value)
Set the entries listed in dofs to the given value.
void SetVector(const Vector &v, int offset)
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
double & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Device memory; using CUDA or HIP *Malloc and *Free.
void SetSize(int s)
Resize the vector to size s.
double Norml2() const
Returns the l2 norm of the vector.
double & operator()(int i)
Access Vector entries using () for 0-based indexing.
void SetGlobalSeed(int gseed)
Set global seed for random values in sequential calls to Randomize().
Biwise-OR of all HIP backends.
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
int Size() const
Returns the size of the vector.
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
double Normlinf() const
Returns the l_infinity norm of the vector.
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Randomize(int seed=0)
Set random values in the vector.
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
void add(const Vector &v1, const Vector &v2, Vector &v)
HashFunction & AppendDoubles(const double *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
double operator*(const double *) const
Dot product with a double * array.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
void AddSubVector(const Vector &v, int offset)
Vector & operator=(const double *v)
Copy Size() entries from v.
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
double Normlp(double p) const
Returns the l_p norm of the vector.
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Biwise-OR of all OpenMP backends.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
virtual const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Vector & operator/=(double c)
Biwise-OR of all CUDA backends.
void SetSubVectorComplement(const Array< int > &dofs, const double val)
Set all vector entries NOT in the dofs Array to the given val.
Vector & operator*=(double c)
double Min() const
Returns the minimal element of the vector.
Vector & operator+=(double c)
double Norml1() const
Returns the l_1 norm of the vector.
double p(const Vector &x, double t)
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
void subtract(const Vector &x, const Vector &y, Vector &z)
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Vector & Set(const double a, const Vector &x)
(*this) = a * x
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Vector & Add(const double a, const Vector &Va)
(*this) += a * Va
Host memory; using new[] and delete[].
double Max() const
Returns the maximal element of the vector.
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
bool Empty() const
Return true if the Memory object is empty, see Reset().
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
Vector & operator-=(double c)
double infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
virtual bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
void PrintHash(std::ostream &out) const
Print the Vector size and hash of its data.
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
double Sum() const
Return the sum of the vector entries.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
virtual double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the "device" code-path (using separate host/device memory pools and host <-> device transfers) without any GPU hardware. As 'DEBUG' is sometimes used as a macro, _DEVICE has been added to avoid conflicts.
void Neg()
(*this) = -(*this)