14 #include "kernels.hpp" 16 #include "../general/forall.hpp" 18 #ifdef MFEM_USE_OPENMP 34 const int s = v.
Size();
38 MFEM_ASSERT(!v.
data.
Empty(),
"invalid source vector");
55 for (i = 0; i < np; i++)
64 for (i = 0; i < np; i++)
66 for (j = 0; j <
dim[i]; j++)
71 if (!*in[i] && errno == ERANGE)
84 for (
int i = 0; i <
size; i++)
89 if (!in && errno == ERANGE)
109 #ifdef MFEM_USE_LEGACY_OPENMP 110 #pragma omp parallel for reduction(+:dot) 112 for (
int i = 0; i <
size; i++)
114 dot +=
data[i] * v[i];
134 const bool use_dev =
UseDevice() || vuse;
137 if (use_dev) {
Write(); }
146 data = std::move(v.data);
148 const auto size_tmp = v.size;
158 auto y =
Write(use_dev);
174 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
179 auto x = v.
Read(use_dev);
188 const double m = 1.0/c;
196 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
201 auto x = v.
Read(use_dev);
217 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
222 auto x = v.
Read(use_dev);
238 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
243 auto x = v.
Read(use_dev);
250 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
257 auto x = Va.
Read(use_dev);
265 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
269 auto x = Va.
Read(use_dev);
270 auto y =
Write(use_dev);
277 MFEM_ASSERT(v.
Size() + offset <=
size,
"invalid sub-vector");
279 const int vs = v.
Size();
280 const double *vp = v.
data;
281 double *
p =
data + offset;
282 for (
int i = 0; i < vs; i++)
290 MFEM_ASSERT(v.
Size() + offset <=
size,
"invalid sub-vector");
292 const int vs = v.
Size();
293 const double *vp = v.
data;
294 double *
p =
data + offset;
295 for (
int i = 0; i < vs; i++)
320 "incompatible Vectors!");
322 #if !defined(MFEM_USE_LEGACY_OPENMP) 324 const int N = v.
size;
326 auto x1 = v1.
Read(use_dev);
327 auto x2 = v2.
Read(use_dev);
328 auto y = v.
Write(use_dev);
331 #pragma omp parallel for 332 for (
int i = 0; i < v.
size; i++)
342 "incompatible Vectors!");
348 else if (
alpha == 1.0)
354 #if !defined(MFEM_USE_LEGACY_OPENMP) 356 const int N = v.
size;
358 auto d_x = v1.
Read(use_dev);
359 auto d_y = v2.
Read(use_dev);
360 auto d_z = v.
Write(use_dev);
363 d_z[i] = d_x[i] +
alpha * d_y[i];
366 const double *v1p = v1.
data, *v2p = v2.
data;
368 const int s = v.
size;
369 #pragma omp parallel for 370 for (
int i = 0; i <
s; i++)
372 vp[i] = v1p[i] +
alpha*v2p[i];
381 "incompatible Vectors!");
393 #if !defined(MFEM_USE_LEGACY_OPENMP) 395 const int N = x.
size;
397 auto xd = x.
Read(use_dev);
398 auto yd = y.
Read(use_dev);
399 auto zd = z.
Write(use_dev);
402 zd[i] =
a * (xd[i] + yd[i]);
405 const double *xp = x.
data;
406 const double *yp = y.
data;
408 const int s = x.
size;
409 #pragma omp parallel for 410 for (
int i = 0; i <
s; i++)
412 zp[i] =
a * (xp[i] + yp[i]);
422 "incompatible Vectors!");
448 #if !defined(MFEM_USE_LEGACY_OPENMP) 450 const int N = x.
size;
452 auto xd = x.
Read(use_dev);
453 auto yd = y.
Read(use_dev);
454 auto zd = z.
Write(use_dev);
457 zd[i] =
a * xd[i] +
b * yd[i];
460 const double *xp = x.
data;
461 const double *yp = y.
data;
463 const int s = x.
size;
464 #pragma omp parallel for 465 for (
int i = 0; i <
s; i++)
467 zp[i] =
a * xp[i] +
b * yp[i];
476 "incompatible Vectors!");
478 #if !defined(MFEM_USE_LEGACY_OPENMP) 480 const int N = x.
size;
482 auto xd = x.
Read(use_dev);
483 auto yd = y.
Read(use_dev);
484 auto zd = z.
Write(use_dev);
487 zd[i] = xd[i] - yd[i];
490 const double *xp = x.
data;
491 const double *yp = y.
data;
493 const int s = x.
size;
494 #pragma omp parallel for 495 for (
int i = 0; i <
s; i++)
497 zp[i] = xp[i] - yp[i];
505 "incompatible Vectors!");
517 #if !defined(MFEM_USE_LEGACY_OPENMP) 519 const int N = x.
size;
521 auto xd = x.
Read(use_dev);
522 auto yd = y.
Read(use_dev);
523 auto zd = z.
Write(use_dev);
526 zd[i] =
a * (xd[i] - yd[i]);
529 const double *xp = x.
data;
530 const double *yp = y.
data;
532 const int s = x.
size;
533 #pragma omp parallel for 534 for (
int i = 0; i <
s; i++)
536 zp[i] =
a * (xp[i] - yp[i]);
547 MFEM_VERIFY(
size == 3,
"Only 3D vectors supported in cross.");
548 MFEM_VERIFY(vin.
Size() == 3,
"Only 3D vectors supported in cross.");
550 vout(0) =
data[1]*vin(2)-
data[2]*vin(1);
551 vout(1) =
data[2]*vin(0)-
data[0]*vin(2);
552 vout(2) =
data[0]*vin(1)-
data[1]*vin(0);
558 "incompatible Vectors!");
563 auto l = lo.
Read(use_dev);
564 auto h = hi.
Read(use_dev);
565 auto m =
Write(use_dev);
572 else if (m[i] > h[i])
581 const int n = dofs.
Size();
584 auto d_y = elemvect.
Write(use_dev);
585 auto d_X =
Read(use_dev);
586 auto d_dofs = dofs.
Read(use_dev);
589 const int dof_i = d_dofs[i];
590 d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
597 const int n = dofs.
Size();
598 for (
int i = 0; i < n; i++)
600 const int j = dofs[i];
601 elem_data[i] = (j >= 0) ?
data[j] : -
data[-1-j];
608 const int n = dofs.
Size();
611 auto d_dofs = dofs.
Read(use_dev);
614 const int j = d_dofs[i];
628 MFEM_ASSERT(dofs.
Size() <= elemvect.
Size(),
629 "Size mismatch: length of dofs is " << dofs.
Size()
630 <<
", length of elemvect is " << elemvect.
Size());
633 const int n = dofs.
Size();
636 auto d_y = elemvect.
Read(use_dev);
637 auto d_dofs = dofs.
Read(use_dev);
640 const int dof_i = d_dofs[i];
647 d_X[-1-dof_i] = -d_y[i];
656 const int n = dofs.
Size();
657 for (
int i = 0; i < n; i++)
659 const int j= dofs[i];
673 MFEM_ASSERT(dofs.
Size() <= elemvect.
Size(),
"Size mismatch: " 674 "length of dofs is " << dofs.
Size() <<
675 ", length of elemvect is " << elemvect.
Size());
678 const int n = dofs.
Size();
679 auto d_y = elemvect.
Read(use_dev);
681 auto d_dofs = dofs.
Read(use_dev);
684 const int j = d_dofs[i];
699 const int n = dofs.
Size();
700 for (
int i = 0; i < n; i++)
702 const int j = dofs[i];
717 MFEM_ASSERT(dofs.
Size() <= elemvect.
Size(),
"Size mismatch: " 718 "length of dofs is " << dofs.
Size() <<
719 ", length of elemvect is " << elemvect.
Size());
722 const int n = dofs.
Size();
724 auto d_x = elemvect.
Read(use_dev);
725 auto d_dofs = dofs.
Read(use_dev);
728 const int j = d_dofs[i];
731 d_y[j] +=
a * d_x[i];
735 d_y[-1-j] -=
a * d_x[i];
743 const int n = dofs.
Size();
745 Vector dofs_vals(n, use_dev ?
749 auto d_dofs_vals = dofs_vals.
Write(use_dev);
750 auto d_dofs = dofs.
Read(use_dev);
751 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (
int i) { d_dofs_vals[i] = d_data[d_dofs[i]]; });
753 mfem::forall_switch(use_dev, n, [=] MFEM_HOST_DEVICE (
int i) { d_data[d_dofs[i]] = d_dofs_vals[i]; });
758 if (!
size) {
return; }
768 if ( i % width == 0 )
780 #ifdef MFEM_USE_ADIOS2 782 const std::string& variable_name)
const 784 if (!
size) {
return; }
786 os.engine.Put(variable_name, &
data[0] );
793 std::ios::fmtflags old_fmt = os.flags();
794 os.setf(std::ios::scientific);
795 std::streamsize old_prec = os.precision(14);
800 for (i = 0; i <
size; i++)
805 os.precision(old_prec);
811 os <<
"size: " <<
size <<
'\n';
814 os <<
"hash: " << hf.
GetHash() <<
'\n';
819 const double max = (double)(RAND_MAX) + 1.;
826 srand((
unsigned)seed);
829 for (
int i = 0; i <
size; i++)
831 data[i] = std::abs(rand()/max);
848 return std::abs(
data[0]);
857 for (
int i = 0; i <
size; i++)
859 max = std::max(std::abs(
data[i]), max);
868 for (
int i = 0; i <
size; i++)
870 sum += std::abs(
data[i]);
877 MFEM_ASSERT(
p > 0.0,
"Vector::Normlp");
899 return std::abs(
data[0]);
905 for (
int i = 0; i <
size; i++)
909 const double absdata = std::abs(
data[i]);
910 if (scale <= absdata)
912 sum = 1.0 + sum * std::pow(scale / absdata,
p);
916 sum += std::pow(absdata / scale,
p);
919 return scale * std::pow(sum, 1.0/
p);
930 double max =
data[0];
932 for (
int i = 1; i <
size; i++)
944 static __global__
void cuKernelMin(
const int N,
double *gdsr,
const double *x)
946 __shared__
double s_min[MFEM_CUDA_BLOCKS];
947 const int n = blockDim.x*blockIdx.x + threadIdx.x;
948 if (n>=N) {
return; }
949 const int bid = blockIdx.x;
950 const int tid = threadIdx.x;
951 const int bbd = bid*blockDim.x;
952 const int rid = bbd+tid;
954 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
957 if (tid >= workers) {
continue; }
958 if (rid >= N) {
continue; }
959 const int dualTid = tid + workers;
960 if (dualTid >= N) {
continue; }
961 const int rdd = bbd+dualTid;
962 if (rdd >= N) {
continue; }
963 if (dualTid >= blockDim.x) {
continue; }
964 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
966 if (tid==0) { gdsr[bid] = s_min[0]; }
969 static Array<double> cuda_reduce_buf;
971 static double cuVectorMin(
const int N,
const double *X)
973 const int tpb = MFEM_CUDA_BLOCKS;
974 const int blockSize = MFEM_CUDA_BLOCKS;
975 const int gridSize = (N+blockSize-1)/blockSize;
976 const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
977 cuda_reduce_buf.
SetSize(min_sz);
978 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
980 cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
981 MFEM_GPU_CHECK(cudaGetLastError());
984 for (
int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
988 static __global__
void cuKernelDot(
const int N,
double *gdsr,
989 const double *x,
const double *y)
991 __shared__
double s_dot[MFEM_CUDA_BLOCKS];
992 const int n = blockDim.x*blockIdx.x + threadIdx.x;
993 if (n>=N) {
return; }
994 const int bid = blockIdx.x;
995 const int tid = threadIdx.x;
996 const int bbd = bid*blockDim.x;
997 const int rid = bbd+tid;
998 s_dot[tid] = y ? (x[n] * y[n]) : x[n];
999 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
1002 if (tid >= workers) {
continue; }
1003 if (rid >= N) {
continue; }
1004 const int dualTid = tid + workers;
1005 if (dualTid >= N) {
continue; }
1006 const int rdd = bbd+dualTid;
1007 if (rdd >= N) {
continue; }
1008 if (dualTid >= blockDim.x) {
continue; }
1009 s_dot[tid] += s_dot[dualTid];
1011 if (tid==0) { gdsr[bid] = s_dot[0]; }
1014 static double cuVectorDot(
const int N,
const double *X,
const double *Y)
1016 const int tpb = MFEM_CUDA_BLOCKS;
1017 const int blockSize = MFEM_CUDA_BLOCKS;
1018 const int gridSize = (N+blockSize-1)/blockSize;
1019 const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
1021 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
1023 cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
1024 MFEM_GPU_CHECK(cudaGetLastError());
1027 for (
int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1030 #endif // MFEM_USE_CUDA 1033 static __global__
void hipKernelMin(
const int N,
double *gdsr,
const double *x)
1035 __shared__
double s_min[MFEM_HIP_BLOCKS];
1036 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1037 if (n>=N) {
return; }
1038 const int bid = hipBlockIdx_x;
1039 const int tid = hipThreadIdx_x;
1040 const int bbd = bid*hipBlockDim_x;
1041 const int rid = bbd+tid;
1043 for (
int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1046 if (tid >= workers) {
continue; }
1047 if (rid >= N) {
continue; }
1048 const int dualTid = tid + workers;
1049 if (dualTid >= N) {
continue; }
1050 const int rdd = bbd+dualTid;
1051 if (rdd >= N) {
continue; }
1052 if (dualTid >= hipBlockDim_x) {
continue; }
1053 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
1055 if (tid==0) { gdsr[bid] = s_min[0]; }
1058 static Array<double> hip_reduce_buf;
1060 static double hipVectorMin(
const int N,
const double *X)
1062 const int tpb = MFEM_HIP_BLOCKS;
1063 const int blockSize = MFEM_HIP_BLOCKS;
1064 const int gridSize = (N+blockSize-1)/blockSize;
1065 const int min_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1066 hip_reduce_buf.
SetSize(min_sz);
1067 Memory<double> &buf = hip_reduce_buf.
GetMemory();
1069 hipLaunchKernelGGL(hipKernelMin,gridSize,blockSize,0,0,N,d_min,X);
1070 MFEM_GPU_CHECK(hipGetLastError());
1073 for (
int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
1077 static __global__
void hipKernelDot(
const int N,
double *gdsr,
1078 const double *x,
const double *y)
1080 __shared__
double s_dot[MFEM_HIP_BLOCKS];
1081 const int n = hipBlockDim_x*hipBlockIdx_x + hipThreadIdx_x;
1082 if (n>=N) {
return; }
1083 const int bid = hipBlockIdx_x;
1084 const int tid = hipThreadIdx_x;
1085 const int bbd = bid*hipBlockDim_x;
1086 const int rid = bbd+tid;
1087 s_dot[tid] = y ? (x[n] * y[n]) : x[n];
1088 for (
int workers=hipBlockDim_x>>1; workers>0; workers>>=1)
1091 if (tid >= workers) {
continue; }
1092 if (rid >= N) {
continue; }
1093 const int dualTid = tid + workers;
1094 if (dualTid >= N) {
continue; }
1095 const int rdd = bbd+dualTid;
1096 if (rdd >= N) {
continue; }
1097 if (dualTid >= hipBlockDim_x) {
continue; }
1098 s_dot[tid] += s_dot[dualTid];
1100 if (tid==0) { gdsr[bid] = s_dot[0]; }
1103 static double hipVectorDot(
const int N,
const double *X,
const double *Y)
1105 const int tpb = MFEM_HIP_BLOCKS;
1106 const int blockSize = MFEM_HIP_BLOCKS;
1107 const int gridSize = (N+blockSize-1)/blockSize;
1108 const int dot_sz = (N%tpb)==0 ? (N/tpb) : (1+N/tpb);
1109 hip_reduce_buf.
SetSize(dot_sz);
1110 Memory<double> &buf = hip_reduce_buf.
GetMemory();
1112 hipLaunchKernelGGL(hipKernelDot,gridSize,blockSize,0,0,N,d_dot,X,Y);
1113 MFEM_GPU_CHECK(hipGetLastError());
1116 for (
int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
1119 #endif // MFEM_USE_HIP 1123 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
1124 if (
size == 0) {
return 0.0; }
1127 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_HIP) || defined(MFEM_USE_OPENMP) 1128 auto m_data =
Read(use_dev);
1132 auto v_data = v.
Read(use_dev);
1134 if (!use_dev) {
goto vector_dot_cpu; }
1136 #ifdef MFEM_USE_OCCA 1139 return occa::linalg::dot<double,double,double>(
1144 #ifdef MFEM_USE_CUDA 1147 return cuVectorDot(
size, m_data, v_data);
1154 return hipVectorDot(
size, m_data, v_data);
1158 #ifdef MFEM_USE_OPENMP 1161 #define MFEM_USE_OPENMP_DETERMINISTIC_DOT 1162 #ifdef MFEM_USE_OPENMP_DETERMINISTIC_DOT 1165 #pragma omp parallel 1167 const int nt = omp_get_num_threads();
1170 const int tid = omp_get_thread_num();
1171 const int stride = (
size + nt - 1)/nt;
1172 const int start = tid*stride;
1173 const int stop = std::min(start + stride,
size);
1174 double my_dot = 0.0;
1175 for (
int i = start; i < stop; i++)
1177 my_dot += m_data[i] * v_data[i];
1180 th_dot(tid) = my_dot;
1182 return th_dot.
Sum();
1186 #pragma omp parallel for reduction(+:prod) 1187 for (
int i = 0; i <
size; i++)
1189 prod += m_data[i] * v_data[i];
1192 #endif // MFEM_USE_OPENMP_DETERMINISTIC_DOT 1194 #endif // MFEM_USE_OPENMP 1198 auto v_data_ = v.
Read();
1199 auto m_data_ =
Read();
1201 dot.UseDevice(
true);
1202 auto d_dot = dot.Write();
1206 d_dot[0] += m_data_[i] * v_data_[i];
1208 dot.HostReadWrite();
1220 auto m_data =
Read(use_dev);
1222 if (!use_dev) {
goto vector_min_cpu; }
1224 #ifdef MFEM_USE_OCCA 1231 #ifdef MFEM_USE_CUDA 1234 return cuVectorMin(
size, m_data);
1241 return hipVectorMin(
size, m_data);
1245 #ifdef MFEM_USE_OPENMP 1248 double minimum = m_data[0];
1249 #pragma omp parallel for reduction(min:minimum) 1250 for (
int i = 0; i <
size; i++)
1252 minimum = std::min(minimum, m_data[i]);
1261 auto m_data_ =
Read();
1268 d_min[0] = (d_min[0]<m_data_[i])?d_min[0]:m_data_[i];
1275 double minimum =
data[0];
1276 for (
int i = 1; i <
size; i++)
1278 if (m_data[i] < minimum)
1280 minimum = m_data[i];
1288 if (
size == 0) {
return 0.0; }
1292 #ifdef MFEM_USE_CUDA 1295 return cuVectorDot(
size,
Read(),
nullptr);
1301 return hipVectorDot(
size,
Read(),
nullptr);
1307 auto d_data =
Read();
1310 auto d_sum = sum.
Write();
1314 d_sum[0] += d_data[i];
1324 for (
int i = 0; i <
size; i++)
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Hash function for data sequences.
void SetSubVector(const Array< int > &dofs, const double value)
Set the entries listed in dofs to the given value.
void SetVector(const Vector &v, int offset)
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
double & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
Device memory; using CUDA or HIP *Malloc and *Free.
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
void SetSize(int s)
Resize the vector to size s.
virtual const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
double & operator()(int i)
Access Vector entries using () for 0-based indexing.
Biwise-OR of all HIP backends.
int Size() const
Returns the size of the vector.
std::string GetHash() const
Return the hash string for the current sequence and reset (clear) the sequence.
virtual double * HostWrite()
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), false).
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
Extract entries listed in dofs to the output Vector elemvect.
void Randomize(int seed=0)
Set random values in the vector.
void add(const Vector &v1, const Vector &v2, Vector &v)
HashFunction & AppendDoubles(const double *doubles, size_t num_doubles)
Add a sequence of doubles for hashing, given as a c-array.
void AddSubVector(const Vector &v, int offset)
Vector & operator=(const double *v)
Copy Size() entries from v.
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
bool Empty() const
Return true if the Memory object is empty, see Reset().
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
void Reciprocal()
(*this)(i) = 1.0 / (*this)(i)
double Sum() const
Return the sum of the vector entries.
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Biwise-OR of all OpenMP backends.
void forall_switch(bool use_dev, int N, lambda &&body)
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
MFEM_HOST_DEVICE double Norml2(const int size, const T *data)
Returns the l2 norm of the Vector with given size and data.
double Normlp(double p) const
Returns the l_p norm of the vector.
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add elements of the elemvect Vector to the entries listed in dofs. Negative dof values cause the -dof...
Vector & operator/=(double c)
Biwise-OR of all CUDA backends.
void SetSubVectorComplement(const Array< int > &dofs, const double val)
Set all vector entries NOT in the dofs Array to the given val.
Vector & operator*=(double c)
Vector & operator+=(double c)
double p(const Vector &x, double t)
static MemoryType GetHostMemoryType()
Get the current Host MemoryType. This is the MemoryType used by most MFEM classes when allocating mem...
void forall(int N, lambda &&body)
void subtract(const Vector &x, const Vector &y, Vector &z)
double Min() const
Returns the minimal element of the vector.
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
void PrintHash(std::ostream &out) const
Print the Vector size and hash of its data.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
double Max() const
Returns the maximal element of the vector.
double Norml1() const
Returns the l_1 norm of the vector.
Vector & Set(const double a, const Vector &x)
(*this) = a * x
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Vector & Add(const double a, const Vector &Va)
(*this) += a * Va
Host memory; using new[] and delete[].
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
Vector & operator-=(double c)
double infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
double Norml2() const
Returns the l2 norm of the vector.
int Size() const
Return the logical size of the array.
virtual bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
void cross3D(const Vector &vin, Vector &vout) const
double Normlinf() const
Returns the l_infinity norm of the vector.
virtual double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
double operator*(const double *) const
Dot product with a double * array.
[device] Debug backend: host memory is READ/WRITE protected while a device is in use. It allows to test the "device" code-path (using separate host/device memory pools and host <-> device transfers) without any GPU hardware. As 'DEBUG' is sometimes used as a macro, _DEVICE has been added to avoid conflicts.
void Neg()
(*this) = -(*this)