14 #include "kernels.hpp" 16 #include "../general/forall.hpp" 18 #if defined(MFEM_USE_SUNDIALS) 20 #if defined(MFEM_USE_MPI) 21 #include <nvector/nvector_parallel.h> 25 #ifdef MFEM_USE_OPENMP 41 const int s = v.
Size();
45 MFEM_ASSERT(!v.
data.
Empty(),
"invalid source vector");
62 for (i = 0; i < np; i++)
70 for (i = 0; i < np; i++)
72 for (j = 0; j <
dim[i]; j++)
77 if (!*in[i] && errno == ERANGE)
89 for (
int i = 0; i <
size; i++)
94 if (!in && errno == ERANGE)
114 #ifdef MFEM_USE_LEGACY_OPENMP 115 #pragma omp parallel for reduction(+:dot) 117 for (
int i = 0; i <
size; i++)
119 dot +=
data[i] * v[i];
139 const bool use_dev =
UseDevice() || vuse;
142 if (use_dev) {
Write(); }
151 data = std::move(v.data);
153 const auto size_tmp = v.size;
163 auto y =
Write(use_dev);
164 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = value;);
173 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= c;);
179 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
184 auto x = v.
Read(use_dev);
185 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= x[i];);
193 const double m = 1.0/c;
195 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= m;);
201 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
206 auto x = v.
Read(use_dev);
207 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] /= x[i];);
216 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= c;);
222 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
227 auto x = v.
Read(use_dev);
228 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= x[i];);
237 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += c;);
243 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
248 auto x = v.
Read(use_dev);
249 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += x[i];);
255 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
262 auto x = Va.
Read(use_dev);
263 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] +=
a * x[i];);
270 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
274 auto x = Va.
Read(use_dev);
275 auto y =
Write(use_dev);
276 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] =
a * x[i];);
282 MFEM_ASSERT(v.
Size() + offset <=
size,
"invalid sub-vector");
284 const int vs = v.
Size();
285 const double *vp = v.
data;
286 double *
p =
data + offset;
287 for (
int i = 0; i < vs; i++)
295 MFEM_ASSERT(v.
Size() + offset <=
size,
"invalid sub-vector");
297 const int vs = v.
Size();
298 const double *vp = v.
data;
299 double *
p =
data + offset;
300 for (
int i = 0; i < vs; i++)
311 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = -y[i];);
317 "incompatible Vectors!");
319 #if !defined(MFEM_USE_LEGACY_OPENMP) 321 const int N = v.
size;
323 auto x1 = v1.
Read(use_dev);
324 auto x2 = v2.
Read(use_dev);
325 auto y = v.
Write(use_dev);
326 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = x1[i] + x2[i];);
328 #pragma omp parallel for 329 for (
int i = 0; i < v.
size; i++)
339 "incompatible Vectors!");
345 else if (
alpha == 1.0)
351 #if !defined(MFEM_USE_LEGACY_OPENMP) 353 const int N = v.
size;
355 auto d_x = v1.
Read(use_dev);
356 auto d_y = v2.
Read(use_dev);
357 auto d_z = v.
Write(use_dev);
358 MFEM_FORALL_SWITCH(use_dev, i, N, d_z[i] = d_x[i] +
alpha * d_y[i];);
360 const double *v1p = v1.
data, *v2p = v2.
data;
362 const int s = v.
size;
363 #pragma omp parallel for 364 for (
int i = 0; i <
s; i++)
366 vp[i] = v1p[i] +
alpha*v2p[i];
375 "incompatible Vectors!");
387 #if !defined(MFEM_USE_LEGACY_OPENMP) 389 const int N = x.
size;
391 auto xd = x.
Read(use_dev);
392 auto yd = y.
Read(use_dev);
393 auto zd = z.
Write(use_dev);
394 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] =
a * (xd[i] + yd[i]););
396 const double *xp = x.
data;
397 const double *yp = y.
data;
399 const int s = x.
size;
400 #pragma omp parallel for 401 for (
int i = 0; i <
s; i++)
403 zp[i] =
a * (xp[i] + yp[i]);
413 "incompatible Vectors!");
439 #if !defined(MFEM_USE_LEGACY_OPENMP) 441 const int N = x.
size;
443 auto xd = x.
Read(use_dev);
444 auto yd = y.
Read(use_dev);
445 auto zd = z.
Write(use_dev);
446 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] =
a * xd[i] +
b * yd[i];);
448 const double *xp = x.
data;
449 const double *yp = y.
data;
451 const int s = x.
size;
452 #pragma omp parallel for 453 for (
int i = 0; i <
s; i++)
455 zp[i] =
a * xp[i] +
b * yp[i];
464 "incompatible Vectors!");
466 #if !defined(MFEM_USE_LEGACY_OPENMP) 468 const int N = x.
size;
470 auto xd = x.
Read(use_dev);
471 auto yd = y.
Read(use_dev);
472 auto zd = z.
Write(use_dev);
473 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = xd[i] - yd[i];);
475 const double *xp = x.
data;
476 const double *yp = y.
data;
478 const int s = x.
size;
479 #pragma omp parallel for 480 for (
int i = 0; i <
s; i++)
482 zp[i] = xp[i] - yp[i];
490 "incompatible Vectors!");
502 #if !defined(MFEM_USE_LEGACY_OPENMP) 504 const int N = x.
size;
506 auto xd = x.
Read(use_dev);
507 auto yd = y.
Read(use_dev);
508 auto zd = z.
Write(use_dev);
509 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] =
a * (xd[i] - yd[i]););
511 const double *xp = x.
data;
512 const double *yp = y.
data;
514 const int s = x.
size;
515 #pragma omp parallel for 516 for (
int i = 0; i <
s; i++)
518 zp[i] =
a * (xp[i] - yp[i]);
527 "incompatible Vectors!");
532 auto l = lo.
Read(use_dev);
533 auto h = hi.
Read(use_dev);
534 auto m =
Write(use_dev);
535 MFEM_FORALL_SWITCH(use_dev, i, N,
541 else if (m[i] > h[i])
550 const int n = dofs.
Size();
553 auto d_y = elemvect.
Write(use_dev);
554 auto d_X =
Read(use_dev);
555 auto d_dofs = dofs.
Read(use_dev);
556 MFEM_FORALL_SWITCH(use_dev, i, n,
558 const int dof_i = d_dofs[i];
559 d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
566 const int n = dofs.
Size();
567 for (
int i = 0; i < n; i++)
569 const int j = dofs[i];
570 elem_data[i] = (j >= 0) ?
data[j] : -
data[-1-j];
577 const int n = dofs.
Size();
580 auto d_dofs = dofs.
Read(use_dev);
581 MFEM_FORALL_SWITCH(use_dev, i, n,
583 const int j = d_dofs[i];
597 MFEM_ASSERT(dofs.
Size() <= elemvect.
Size(),
598 "Size mismatch: length of dofs is " << dofs.
Size()
599 <<
", length of elemvect is " << elemvect.
Size());
602 const int n = dofs.
Size();
605 auto d_y = elemvect.
Read(use_dev);
606 auto d_dofs = dofs.
Read(use_dev);
607 MFEM_FORALL_SWITCH(use_dev, i, n,
609 const int dof_i = d_dofs[i];
616 d_X[-1-dof_i] = -d_y[i];
625 const int n = dofs.
Size();
626 for (
int i = 0; i < n; i++)
628 const int j= dofs[i];
642 MFEM_ASSERT(dofs.
Size() <= elemvect.
Size(),
"Size mismatch: " 643 "length of dofs is " << dofs.
Size() <<
644 ", length of elemvect is " << elemvect.
Size());
647 const int n = dofs.
Size();
648 auto d_y = elemvect.
Read(use_dev);
650 auto d_dofs = dofs.
Read(use_dev);
651 MFEM_FORALL_SWITCH(use_dev, i, n,
653 const int j = d_dofs[i];
668 const int n = dofs.
Size();
669 for (
int i = 0; i < n; i++)
671 const int j = dofs[i];
686 MFEM_ASSERT(dofs.
Size() <= elemvect.
Size(),
"Size mismatch: " 687 "length of dofs is " << dofs.
Size() <<
688 ", length of elemvect is " << elemvect.
Size());
691 const int n = dofs.
Size();
693 auto d_x = elemvect.
Read(use_dev);
694 auto d_dofs = dofs.
Read(use_dev);
695 MFEM_FORALL_SWITCH(use_dev, i, n,
697 const int j = d_dofs[i];
700 d_y[j] +=
a * d_x[i];
704 d_y[-1-j] -=
a * d_x[i];
712 const int n = dofs.
Size();
714 Vector dofs_vals(n, use_dev ?
718 auto d_dofs_vals = dofs_vals.
Write(use_dev);
719 auto d_dofs = dofs.
Read(use_dev);
720 MFEM_FORALL_SWITCH(use_dev, i, n, d_dofs_vals[i] = d_data[d_dofs[i]];);
721 MFEM_FORALL_SWITCH(use_dev, i, N, d_data[i] = val;);
722 MFEM_FORALL_SWITCH(use_dev, i, n, d_data[d_dofs[i]] = d_dofs_vals[i];);
727 if (!
size) {
return; }
737 if ( i % width == 0 )
749 #ifdef MFEM_USE_ADIOS2 751 const std::string& variable_name)
const 753 if (!
size) {
return; }
755 os.engine.Put(variable_name, &
data[0] );
762 std::ios::fmtflags old_fmt = os.flags();
763 os.setf(std::ios::scientific);
764 std::streamsize old_prec = os.precision(14);
769 for (i = 0; i <
size; i++)
774 os.precision(old_prec);
780 os <<
"size: " <<
size <<
'\n';
783 os <<
"hash: " << hf.
GetHash() <<
'\n';
788 const double max = (double)(RAND_MAX) + 1.;
795 srand((
unsigned)seed);
798 for (
int i = 0; i <
size; i++)
800 data[i] = std::abs(rand()/max);
817 return std::abs(
data[0]);
826 for (
int i = 0; i <
size; i++)
828 max = std::max(std::abs(
data[i]), max);
837 for (
int i = 0; i <
size; i++)
839 sum += std::abs(
data[i]);
846 MFEM_ASSERT(
p > 0.0,
"Vector::Normlp");
868 return std::abs(
data[0]);
874 for (
int i = 0; i <
size; i++)
878 const double absdata = std::abs(
data[i]);
879 if (scale <= absdata)
881 sum = 1.0 + sum * std::pow(scale / absdata,
p);
885 sum += std::pow(absdata / scale,
p);
888 return scale * std::pow(sum, 1.0/
p);
899 double max =
data[0];
901 for (
int i = 1; i <
size; i++)
916 const double *h_data = this->
HostRead();
917 for (
int i = 0; i <
size; i++)
926 static __global__
void cuKernelMin(
const int N,
double *gdsr,
const double *x)
928 __shared__
double s_min[MFEM_CUDA_BLOCKS];
929 const int n = blockDim.x*blockIdx.x + threadIdx.x;
930 if (n>=N) {
return; }
931 const int bid = blockIdx.x;
932 const int tid = threadIdx.x;
933 const int bbd = bid*blockDim.x;
934 const int rid = bbd+tid;
936 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
939 if (tid >= workers) {
continue; }
940 if (rid >= N) {
continue; }
941 const int dualTid = tid + workers;
942 if (dualTid >= N) {
continue; }
943 const int rdd = bbd+dualTid;
944 if (rdd >= N) {
continue; }
945 if (dualTid >= blockDim.x) {
continue; }
946 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
948 if (tid==0) { gdsr[bid] = s_min[0]; }
951 static Array<double> cuda_reduce_buf;
953 static double cuVectorMin(
const int N,
const double *X)
955 const int tpb = MFEM_CUDA_BLOCKS;
956 const int blockSize = MFEM_CUDA_BLOCKS;
957 const int gridSize = (N+blockSize-1)/blockSize;
958 const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
959 cuda_reduce_buf.
SetSize(min_sz);
960 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
962 cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
963 MFEM_GPU_CHECK(cudaGetLastError());
966 for (
int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
970 static __global__
void cuKernelDot(
const int N,
double *gdsr,
971 const double *x,
const double *y)
973 __shared__
double s_dot[MFEM_CUDA_BLOCKS];
974 const int n = blockDim.x*blockIdx.x + threadIdx.x;
975 if (n>=N) {
return; }
976 const int bid = blockIdx.x;
977 const int tid = threadIdx.x;
978 const int bbd = bid*blockDim.x;
979 const int rid = bbd+tid;
980 s_dot[tid] = x[n] * y[n];
981 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
984 if (tid >= workers) {
continue; }
985 if (rid >= N) {
continue; }
986 const int dualTid = tid + workers;
987 if (dualTid >= N) {
continue; }
988 const int rdd = bbd+dualTid;
989 if (rdd >= N) {
continue; }
990 if (dualTid >= blockDim.x) {
continue; }
991 s_dot[tid] += s_dot[dualTid];
993 if (tid==0) { gdsr[bid] = s_dot[0]; }
996 static double cuVectorDot(
const int N,
const double *X,
const double *Y)
998 const int tpb = MFEM_CUDA_BLOCKS;
999 const int blockSize = MFEM_CUDA_BLOCKS;
1000 const int gridSize = (N+blockSize-1)/blockSize;
1001 const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
1003 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
1005 cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
1006 MFEM_GPU_CHECK(cudaGetLastError());
1009 for (
int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1012 #endif // MFEM_USE_CUDA 1015 static __global__
void hipKernelMin(
const int N,
double *gdsr,
const double *x)
1017 __shared__
double s_min[MFEM_HIP_BLOCKS];
1018 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1019 if (n>=N) {
return; }
1020 const int bid = hipBlockIdx_x;
1021 const int tid = hipThreadIdx_x;
1022 const int bbd = bid*hipBlockDim_x;
1023 const int rid = bbd+tid;
1025 for (
int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1028 if (tid >= workers) {
continue; }
1029 if (rid >= N) {
continue; }
1030 const int dualTid = tid + workers;
1031 if (dualTid >= N) {
continue; }
1032 const int rdd = bbd+dualTid;
1033 if (rdd >= N) {
continue; }
1034 if (dualTid >= hipBlockDim_x) {
continue; }
1035 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
1037 if (tid==0) { gdsr[bid] = s_min[0]; }
1040 static Array<double> cuda_reduce_buf;
1042 static double hipVectorMin(
const int N,
const double *X)
1044 const int tpb = MFEM_HIP_BLOCKS;
1045 const int blockSize = MFEM_HIP_BLOCKS;
1046 const int gridSize = (N+blockSize-1)/blockSize;
1047 const int min_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1048 cuda_reduce_buf.
SetSize(min_sz);
1049 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
1051 hipLaunchKernelGGL(hipKernelMin,gridSize,blockSize,0,0,N,d_min,X);
1052 MFEM_GPU_CHECK(hipGetLastError());
1055 for (
int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
1059 static __global__
void hipKernelDot(
const int N,
double *gdsr,
1060 const double *x,
const double *y)
1062 __shared__
double s_dot[MFEM_HIP_BLOCKS];
1063 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1064 if (n>=N) {
return; }
1065 const int bid = hipBlockIdx_x;
1066 const int tid = hipThreadIdx_x;
1067 const int bbd = bid*hipBlockDim_x;
1068 const int rid = bbd+tid;
1069 s_dot[tid] = x[n] * y[n];
1070 for (
int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1073 if (tid >= workers) {
continue; }
1074 if (rid >= N) {
continue; }
1075 const int dualTid = tid + workers;
1076 if (dualTid >= N) {
continue; }
1077 const int rdd = bbd+dualTid;
1078 if (rdd >= N) {
continue; }
1079 if (dualTid >= hipBlockDim_x) {
continue; }
1080 s_dot[tid] += s_dot[dualTid];
1082 if (tid==0) { gdsr[bid] = s_dot[0]; }
1085 static double hipVectorDot(
const int N,
const double *X,
const double *Y)
1087 const int tpb = MFEM_HIP_BLOCKS;
1088 const int blockSize = MFEM_HIP_BLOCKS;
1089 const int gridSize = (N+blockSize-1)/blockSize;
1090 const int dot_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1091 cuda_reduce_buf.
SetSize(dot_sz);
1092 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
1094 hipLaunchKernelGGL(hipKernelDot,gridSize,blockSize,0,0,N,d_dot,X,Y);
1095 MFEM_GPU_CHECK(hipGetLastError());
1098 for (
int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1101 #endif // MFEM_USE_HIP 1105 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
1106 if (
size == 0) {
return 0.0; }
1109 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP) || defined(MFEM_USE_OPENMP) 1110 auto m_data =
Read(use_dev);
1114 auto v_data = v.
Read(use_dev);
1116 if (!use_dev) {
goto vector_dot_cpu; }
1118 #ifdef MFEM_USE_OCCA 1121 return occa::linalg::dot<double,double,double>(
1126 #ifdef MFEM_USE_CUDA 1129 return cuVectorDot(
size, m_data, v_data);
1136 return hipVectorDot(
size, m_data, v_data);
1140 #ifdef MFEM_USE_OPENMP 1143 #define MFEM_USE_OPENMP_DETERMINISTIC_DOT 1144 #ifdef MFEM_USE_OPENMP_DETERMINISTIC_DOT 1147 #pragma omp parallel 1149 const int nt = omp_get_num_threads();
1152 const int tid = omp_get_thread_num();
1153 const int stride = (
size + nt - 1)/nt;
1154 const int start = tid*stride;
1155 const int stop = std::min(start + stride,
size);
1156 double my_dot = 0.0;
1157 for (
int i = start; i < stop; i++)
1159 my_dot += m_data[i] * v_data[i];
1162 th_dot(tid) = my_dot;
1164 return th_dot.
Sum();
1168 #pragma omp parallel for reduction(+:prod) 1169 for (
int i = 0; i <
size; i++)
1171 prod += m_data[i] * v_data[i];
1174 #endif // MFEM_USE_OPENMP_DETERMINISTIC_DOT 1176 #endif // MFEM_USE_OPENMP 1180 auto v_data_ = v.
Read();
1181 auto m_data_ =
Read();
1183 dot.UseDevice(
true);
1184 auto d_dot = dot.Write();
1186 MFEM_FORALL(i, N, d_dot[0] += m_data_[i] * v_data_[i];);
1187 dot.HostReadWrite();
1199 auto m_data =
Read(use_dev);
1201 if (!use_dev) {
goto vector_min_cpu; }
1203 #ifdef MFEM_USE_OCCA 1210 #ifdef MFEM_USE_CUDA 1213 return cuVectorMin(
size, m_data);
1220 return hipVectorMin(
size, m_data);
1224 #ifdef MFEM_USE_OPENMP 1227 double minimum = m_data[0];
1228 #pragma omp parallel for reduction(min:minimum) 1229 for (
int i = 0; i <
size; i++)
1231 minimum = std::min(minimum, m_data[i]);
1240 auto m_data_ =
Read();
1245 MFEM_FORALL(i, N, d_min[0] = (d_min[0]<m_data_[i])?d_min[0]:m_data_[i];);
1251 double minimum =
data[0];
1252 for (
int i = 1; i <
size; i++)
1254 if (m_data[i] < minimum)
1256 minimum = m_data[i];
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Hash function for data sequences.
void SetSubVector(const Array< int > &dofs, const double value)
Set the entries listed in dofs to the given value.
void SetVector(const Vector &v, int offset)
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
double & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Device memory; using CUDA or HIP *Malloc and *Free.
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
void SetSize(int s)
Resize the vector to size s.
virtual const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
double & operator()(int i)
Access Vector entries using () for 0-based indexing.
Biwise-OR of all HIP backends.
int Size() const
Returns the size of the vector.
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
void Randomize(int seed=0)
Set random values in the vector.
void add(const Vector &v1, const Vector &v2, Vector &v)
HashFunction & AppendDoubles(const double *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
void AddSubVector(const Vector &v, int offset)
Vector & operator=(const double *v)
Copy Size() entries from v.
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
bool Empty() const
Return true if the Memory object is empty, see Reset().
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
double Sum() const
Return the sum of the vector entries.
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Biwise-OR of all OpenMP backends.
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
double Normlp(double p) const
Returns the l_p norm of the vector.
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Vector & operator/=(double c)
Biwise-OR of all CUDA backends.
void SetSubVectorComplement(const Array< int > &dofs, const double val)
Set all vector entries NOT in the dofs Array to the given val.
Vector & operator*=(double c)
Vector & operator+=(double c)
double p(const Vector &x, double t)
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
void subtract(const Vector &x, const Vector &y, Vector &z)
double Min() const
Returns the minimal element of the vector.
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
void PrintHash(std::ostream &out) const
Print the Vector size and hash of its data.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
double Max() const
Returns the maximal element of the vector.
double Norml1() const
Returns the l_1 norm of the vector.
Vector & Set(const double a, const Vector &x)
(*this) = a * x
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Vector & Add(const double a, const Vector &Va)
(*this) += a * Va
Host memory; using new[] and delete[].
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
Vector & operator-=(double c)
double infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
double Norml2() const
Returns the l2 norm of the vector.
int Size() const
Return the logical size of the array.
virtual bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
double Normlinf() const
Returns the l_infinity norm of the vector.
virtual double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
double operator*(const double *) const
Dot product with a double * array.
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the "device" code-path (using separate host/device memory pools and host <-> device transfers) without any GPU hardware. As 'DEBUG' is sometimes used as a macro, _DEVICE has been added to avoid conflicts.
void Neg()
(*this) = -(*this)