16 #include "../general/forall.hpp"
18 #if defined(MFEM_USE_SUNDIALS) && defined(MFEM_USE_MPI)
19 #include <nvector/nvector_parallel.h>
20 #include <nvector/nvector_parhyp.h>
35 const int s = v.
Size();
38 MFEM_ASSERT(!v.
data.
Empty(),
"invalid source vector");
56 for (i = 0; i < np; i++)
64 for (i = 0; i < np; i++)
66 for (j = 0; j < dim[i]; j++)
77 for (
int i = 0; i <
size; i++)
96 #ifdef MFEM_USE_LEGACY_OPENMP
97 #pragma omp parallel for reduction(+:dot)
99 for (
int i = 0; i <
size; i++)
101 dot +=
data[i] * v[i];
123 if (use_dev) {
Write(); }
133 auto y =
Write(use_dev);
134 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = value;);
143 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= c;);
151 const double m = 1.0/c;
153 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] *= m;);
162 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= c;);
168 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
173 auto x = v.
Read(use_dev);
174 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] -= x[i];);
180 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
185 auto x = v.
Read(use_dev);
186 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += x[i];);
192 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
199 auto x = Va.
Read(use_dev);
200 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] += a * x[i];);
207 MFEM_ASSERT(
size == Va.
size,
"incompatible Vectors!");
211 auto x = Va.
Read(use_dev);
212 auto y =
Write(use_dev);
213 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = a * x[i];);
219 MFEM_ASSERT(v.
Size() + offset <=
size,
"invalid sub-vector");
221 const int vs = v.
Size();
222 const double *vp = v.
data;
223 double *p =
data + offset;
224 for (
int i = 0; i < vs; i++)
235 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = -y[i];);
241 "incompatible Vectors!");
243 #if !defined(MFEM_USE_LEGACY_OPENMP)
245 const int N = v.
size;
247 auto x1 = v1.
Read(use_dev);
248 auto x2 = v2.
Read(use_dev);
249 auto y = v.
Write(use_dev);
250 MFEM_FORALL_SWITCH(use_dev, i, N, y[i] = x1[i] + x2[i];);
252 #pragma omp parallel for
253 for (
int i = 0; i < v.
size; i++)
263 "incompatible Vectors!");
269 else if (alpha == 1.0)
275 #if !defined(MFEM_USE_LEGACY_OPENMP)
277 const int N = v.
size;
279 auto d_x = v1.
Read(use_dev);
280 auto d_y = v2.
Read(use_dev);
281 auto d_z = v.
Write(use_dev);
282 MFEM_FORALL_SWITCH(use_dev, i, N, d_z[i] = d_x[i] + alpha * d_y[i];);
284 const double *v1p = v1.
data, *v2p = v2.
data;
286 const int s = v.
size;
287 #pragma omp parallel for
288 for (
int i = 0; i < s; i++)
290 vp[i] = v1p[i] + alpha*v2p[i];
299 "incompatible Vectors!");
311 #if !defined(MFEM_USE_LEGACY_OPENMP)
313 const int N = x.
size;
315 auto xd = x.
Read(use_dev);
316 auto yd = y.
Read(use_dev);
317 auto zd = z.
Write(use_dev);
318 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] + yd[i]););
320 const double *xp = x.
data;
321 const double *yp = y.
data;
323 const int s = x.
size;
324 #pragma omp parallel for
325 for (
int i = 0; i < s; i++)
327 zp[i] = a * (xp[i] + yp[i]);
337 "incompatible Vectors!");
363 #if !defined(MFEM_USE_LEGACY_OPENMP)
365 const int N = x.
size;
367 auto xd = x.
Read(use_dev);
368 auto yd = y.
Read(use_dev);
369 auto zd = z.
Write(use_dev);
370 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * xd[i] + b * yd[i];);
372 const double *xp = x.
data;
373 const double *yp = y.
data;
375 const int s = x.
size;
376 #pragma omp parallel for
377 for (
int i = 0; i < s; i++)
379 zp[i] = a * xp[i] + b * yp[i];
388 "incompatible Vectors!");
390 #if !defined(MFEM_USE_LEGACY_OPENMP)
392 const int N = x.
size;
394 auto xd = x.
Read(use_dev);
395 auto yd = y.
Read(use_dev);
396 auto zd = z.
Write(use_dev);
397 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = xd[i] - yd[i];);
399 const double *xp = x.
data;
400 const double *yp = y.
data;
402 const int s = x.
size;
403 #pragma omp parallel for
404 for (
int i = 0; i < s; i++)
406 zp[i] = xp[i] - yp[i];
414 "incompatible Vectors!");
426 #if !defined(MFEM_USE_LEGACY_OPENMP)
428 const int N = x.
size;
430 auto xd = x.
Read(use_dev);
431 auto yd = y.
Read(use_dev);
432 auto zd = z.
Write(use_dev);
433 MFEM_FORALL_SWITCH(use_dev, i, N, zd[i] = a * (xd[i] - yd[i]););
435 const double *xp = x.
data;
436 const double *yp = y.
data;
438 const int s = x.
size;
439 #pragma omp parallel for
440 for (
int i = 0; i < s; i++)
442 zp[i] = a * (xp[i] - yp[i]);
451 "incompatible Vectors!");
456 auto l = lo.
Read(use_dev);
457 auto h = hi.
Read(use_dev);
458 auto m =
Write(use_dev);
459 MFEM_FORALL_SWITCH(use_dev, i, N,
465 else if (m[i] > h[i])
474 const int n = dofs.
Size();
477 auto d_y = elemvect.
Write(use_dev);
478 auto d_X =
Read(use_dev);
479 auto d_dofs = dofs.
Read(use_dev);
480 MFEM_FORALL_SWITCH(use_dev, i, n,
482 const int dof_i = d_dofs[i];
483 d_y[i] = dof_i >= 0 ? d_X[dof_i] : -d_X[-dof_i-1];
490 const int n = dofs.
Size();
491 for (
int i = 0; i < n; i++)
493 const int j = dofs[i];
494 elem_data[i] = (j >= 0) ?
data[j] : -
data[-1-j];
501 const int n = dofs.
Size();
504 auto d_dofs = dofs.
Read(use_dev);
505 MFEM_FORALL_SWITCH(use_dev, i, n,
507 const int j = d_dofs[i];
521 MFEM_ASSERT(dofs.
Size() == elemvect.
Size(),
522 "Size mismatch: length of dofs is " << dofs.
Size()
523 <<
", length of elemvect is " << elemvect.
Size());
526 const int n = dofs.
Size();
529 auto d_y = elemvect.
Read(use_dev);
530 auto d_dofs = dofs.
Read(use_dev);
531 MFEM_FORALL_SWITCH(use_dev, i, n,
533 const int dof_i = d_dofs[i];
540 d_X[-1-dof_i] = -d_y[i];
549 const int n = dofs.
Size();
550 for (
int i = 0; i < n; i++)
552 const int j= dofs[i];
566 MFEM_ASSERT(dofs.
Size() == elemvect.
Size(),
"Size mismatch: "
567 "length of dofs is " << dofs.
Size() <<
568 ", length of elemvect is " << elemvect.
Size());
571 const int n = dofs.
Size();
572 auto d_y = elemvect.
Read(use_dev);
574 auto d_dofs = dofs.
Read(use_dev);
575 MFEM_FORALL_SWITCH(use_dev, i, n,
577 const int j = d_dofs[i];
592 const int n = dofs.
Size();
593 for (
int i = 0; i < n; i++)
595 const int j = dofs[i];
610 MFEM_ASSERT(dofs.
Size() == elemvect.
Size(),
"Size mismatch: "
611 "length of dofs is " << dofs.
Size() <<
612 ", length of elemvect is " << elemvect.
Size());
615 const int n = dofs.
Size();
617 auto d_x = elemvect.
Read(use_dev);
618 auto d_dofs = dofs.
Read(use_dev);
619 MFEM_FORALL_SWITCH(use_dev, i, n,
621 const int j = d_dofs[i];
624 d_y[j] += a * d_x[i];
628 d_y[-1-j] -= a * d_x[i];
636 const int n = dofs.
Size();
640 auto d_dofs_vals = dofs_vals.
Write(use_dev);
641 auto d_dofs = dofs.
Read(use_dev);
642 MFEM_FORALL_SWITCH(use_dev, i, n, d_dofs_vals[i] = d_data[d_dofs[i]];);
643 MFEM_FORALL_SWITCH(use_dev, i, N, d_data[i] = val;);
644 MFEM_FORALL_SWITCH(use_dev, i, n, d_data[d_dofs[i]] = d_dofs_vals[i];);
649 if (!
size) {
return; }
659 if ( i % width == 0 )
674 std::ios::fmtflags old_fmt = out.flags();
675 out.setf(std::ios::scientific);
676 std::streamsize old_prec = out.precision(14);
681 for (i = 0; i <
size; i++)
683 out <<
data[i] <<
'\n';
686 out.precision(old_prec);
693 const double max = (double)(RAND_MAX) + 1.;
701 srand((
unsigned)seed);
703 for (
int i = 0; i <
size; i++)
705 data[i] = std::abs(rand()/max);
721 return std::abs(
data[0]);
727 for (
int i = 0; i <
size; i++)
731 const double absdata = std::abs(
data[i]);
732 if (scale <= absdata)
734 const double sqr_arg = scale / absdata;
735 sum = 1.0 + sum * (sqr_arg * sqr_arg);
739 const double sqr_arg = absdata / scale;
740 sum += (sqr_arg * sqr_arg);
743 return scale * std::sqrt(sum);
749 for (
int i = 0; i <
size; i++)
751 max = std::max(std::abs(
data[i]), max);
759 for (
int i = 0; i <
size; i++)
761 sum += std::abs(
data[i]);
768 MFEM_ASSERT(p > 0.0,
"Vector::Normlp");
790 return std::abs(
data[0]);
796 for (
int i = 0; i <
size; i++)
800 const double absdata = std::abs(
data[i]);
801 if (scale <= absdata)
803 sum = 1.0 + sum * std::pow(scale / absdata, p);
807 sum += std::pow(absdata / scale, p);
810 return scale * std::pow(sum, 1.0/p);
820 double max =
data[0];
822 for (
int i = 1; i <
size; i++)
837 for (
int i = 0; i <
size; i++)
846 static __global__
void cuKernelMin(
const int N,
double *gdsr,
const double *x)
848 __shared__
double s_min[MFEM_CUDA_BLOCKS];
849 const int n = blockDim.x*blockIdx.x + threadIdx.x;
850 if (n>=N) {
return; }
851 const int bid = blockIdx.x;
852 const int tid = threadIdx.x;
853 const int bbd = bid*blockDim.x;
854 const int rid = bbd+tid;
856 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
859 if (tid >= workers) {
continue; }
860 if (rid >= N) {
continue; }
861 const int dualTid = tid + workers;
862 if (dualTid >= N) {
continue; }
863 const int rdd = bbd+dualTid;
864 if (rdd >= N) {
continue; }
865 if (dualTid >= blockDim.x) {
continue; }
866 s_min[tid] = fmin(s_min[tid], s_min[dualTid]);
868 if (tid==0) { gdsr[bid] = s_min[0]; }
871 static Array<double> cuda_reduce_buf;
873 static double cuVectorMin(
const int N,
const double *X)
875 const int tpb = MFEM_CUDA_BLOCKS;
876 const int blockSize = MFEM_CUDA_BLOCKS;
877 const int gridSize = (N+blockSize-1)/blockSize;
878 const int min_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
879 cuda_reduce_buf.
SetSize(min_sz);
880 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
882 cuKernelMin<<<gridSize,blockSize>>>(N, d_min, X);
883 MFEM_CUDA_CHECK(cudaGetLastError());
886 for (
int i = 0; i < min_sz; i++) { min = fmin(min, h_min[i]); }
890 static __global__
void cuKernelDot(
const int N,
double *gdsr,
891 const double *x,
const double *y)
893 __shared__
double s_dot[MFEM_CUDA_BLOCKS];
894 const int n = blockDim.x*blockIdx.x + threadIdx.x;
895 if (n>=N) {
return; }
896 const int bid = blockIdx.x;
897 const int tid = threadIdx.x;
898 const int bbd = bid*blockDim.x;
899 const int rid = bbd+tid;
900 s_dot[tid] = x[n] * y[n];
901 for (
int workers=blockDim.x>>1; workers>0; workers>>=1)
904 if (tid >= workers) {
continue; }
905 if (rid >= N) {
continue; }
906 const int dualTid = tid + workers;
907 if (dualTid >= N) {
continue; }
908 const int rdd = bbd+dualTid;
909 if (rdd >= N) {
continue; }
910 if (dualTid >= blockDim.x) {
continue; }
911 s_dot[tid] += s_dot[dualTid];
913 if (tid==0) { gdsr[bid] = s_dot[0]; }
916 static double cuVectorDot(
const int N,
const double *X,
const double *Y)
918 const int tpb = MFEM_CUDA_BLOCKS;
919 const int blockSize = MFEM_CUDA_BLOCKS;
920 const int gridSize = (N+blockSize-1)/blockSize;
921 const int dot_sz = (N%tpb)==0? (N/tpb) : (1+N/tpb);
922 cuda_reduce_buf.
SetSize(dot_sz);
923 Memory<double> &buf = cuda_reduce_buf.
GetMemory();
925 cuKernelDot<<<gridSize,blockSize>>>(N, d_dot, X, Y);
926 MFEM_CUDA_CHECK(cudaGetLastError());
929 for (
int i = 0; i < dot_sz; i++) { dot += h_dot[i]; }
932 #endif // MFEM_USE_CUDA
936 MFEM_ASSERT(
size == v.
size,
"incompatible Vectors!");
939 #if defined(MFEM_USE_CUDA) || defined(MFEM_USE_OPENMP)
940 auto m_data =
Read(use_dev);
944 auto v_data = v.
Read(use_dev);
946 if (!use_dev) {
goto vector_dot_cpu; }
951 return occa::linalg::dot<double,double,double>(
959 return cuVectorDot(
size, m_data, v_data);
963 #ifdef MFEM_USE_OPENMP
967 #pragma omp parallel for reduction(+:prod)
968 for (
int i = 0; i <
size; i++)
970 prod += m_data[i] * v_data[i];
985 auto m_data =
Read(use_dev);
987 if (!use_dev) {
goto vector_min_cpu; }
999 return cuVectorMin(
size, m_data);
1003 #ifdef MFEM_USE_OPENMP
1006 double minimum = m_data[0];
1007 #pragma omp parallel for reduction(min:minimum)
1008 for (
int i = 0; i <
size; i++)
1010 minimum = std::min(minimum, m_data[i]);
1017 double minimum =
data[0];
1018 for (
int i = 1; i <
size; i++)
1020 if (m_data[i] < minimum)
1022 minimum = m_data[i];
1029 #ifdef MFEM_USE_SUNDIALS
1032 #define SUNTRUE TRUE
1035 #define SUNFALSE FALSE
1040 N_Vector_ID nvid = N_VGetVectorID(nv);
1043 case SUNDIALS_NVEC_SERIAL:
1047 case SUNDIALS_NVEC_PARALLEL:
1050 case SUNDIALS_NVEC_PARHYP:
1052 hypre_Vector *hpv_local = N_VGetVector_ParHyp(nv)->local_vector;
1058 MFEM_ABORT(
"N_Vector type " << nvid <<
" is not supported");
1064 MFEM_ASSERT(nv,
"N_Vector handle is NULL");
1065 N_Vector_ID nvid = N_VGetVectorID(nv);
1068 case SUNDIALS_NVEC_SERIAL:
1069 MFEM_ASSERT(NV_OWN_DATA_S(nv) == SUNFALSE,
"invalid serial N_Vector");
1070 NV_DATA_S(nv) =
data;
1071 NV_LENGTH_S(nv) =
size;
1074 case SUNDIALS_NVEC_PARALLEL:
1075 MFEM_ASSERT(NV_OWN_DATA_P(nv) == SUNFALSE,
"invalid parallel N_Vector");
1076 NV_DATA_P(nv) =
data;
1077 NV_LOCLENGTH_P(nv) =
size;
1079 case SUNDIALS_NVEC_PARHYP:
1081 hypre_Vector *hpv_local = N_VGetVector_ParHyp(nv)->local_vector;
1082 MFEM_ASSERT(hpv_local->owns_data ==
false,
"invalid hypre N_Vector");
1083 hpv_local->data =
data;
1084 hpv_local->size =
size;
1089 MFEM_ABORT(
"N_Vector type " << nvid <<
" is not supported");
1093 #endif // MFEM_USE_SUNDIALS
int Size() const
Logical size of the array.
void SetSubVector(const Array< int > &dofs, const double value)
Set the entries listed in dofs to the given value.
void SetVector(const Vector &v, int offset)
Vector()
Default constructor for Vector. Sets size = 0 and data = NULL.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
double & Elem(int i)
Access Vector entries. Index i = 0 .. size-1.
void SetSize(int s)
Resize the vector to size s.
double Norml2() const
Returns the l2 norm of the vector.
double & operator()(int i)
Access Vector entries using () for 0-based indexing.
void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
void GetSubVector(const Array< int > &dofs, Vector &elemvect) const
int Size() const
Returns the size of the vector.
Memory types: { CUDA, CUDA_UVM }.
T * Write(MemoryClass mc, int size)
Get write-only access to the memory with the given MemoryClass.
bool UseDevice() const
Return the device flag of the Memory object used by the Array.
double Normlinf() const
Returns the l_infinity norm of the vector.
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Randomize(int seed=0)
Set random values in the vector.
double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
void add(const Vector &v1, const Vector &v2, Vector &v)
double operator*(const double *) const
Dot product with a double * array.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
Vector & operator=(const double *v)
Copy Size() entries from v.
void Load(std::istream **in, int np, int *dim)
Reads a vector from multiple files.
double Normlp(double p) const
Returns the l_p norm of the vector.
double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
bool UseDevice() const
Return the device flag of the Memory object used by the Vector.
void CopyFromHost(const T *src, int size)
Copy size entries from the host pointer src to *this.
bool DeviceCanUseOcca()
Function that determines if an OCCA kernel should be used, based on the current mfem::Device configur...
static MemoryType GetMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
const occa::memory OccaMemoryRead(const Memory< T > &mem, size_t size)
Wrap a Memory object as occa::memory for read only access with the mfem::Device MemoryClass. The returned occa::memory is associated with the default occa::device used by MFEM.
void median(const Vector &lo, const Vector &hi)
v = median(v,lo,hi) entrywise. Implementation assumes lo <= hi.
Biwise-OR of all OpenMP backends.
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void AddElementVector(const Array< int > &dofs, const Vector &elemvect)
Add (element) subvector to the vector.
Vector & operator/=(double c)
Biwise-OR of all CUDA backends.
void SetSubVectorComplement(const Array< int > &dofs, const double val)
Set all vector entries NOT in the 'dofs' array to the given 'val'.
Vector & operator*=(double c)
double Min() const
Returns the minimal element of the vector.
double Norml1() const
Returns the l_1 norm of the vector.
void Print(std::ostream &out=mfem::out, int width=8) const
Prints vector to stream out.
void subtract(const Vector &x, const Vector &y, Vector &z)
void SetSize(int nsize)
Change logical size of the array, keep existing entries.
void Print_HYPRE(std::ostream &out) const
Prints vector to stream out in HYPRE_Vector format.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
void SetDataAndSize(double *d, int s)
Set the Vector data and size.
Vector & Set(const double a, const Vector &x)
(*this) = a * x
Vector & Add(const double a, const Vector &Va)
(*this) += a * Va
Host memory; using new[] and delete[].
const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
double Max() const
Returns the maximal element of the vector.
void New(int size)
Allocate host memory for size entries with type MemoryType::HOST.
bool Empty() const
Return true if the Memory object is empty, see Reset().
T * ReadWrite(MemoryClass mc, int size)
Get read-write access to the memory with the given MemoryClass.
Vector & operator-=(double c)
double infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
Vector & operator+=(const Vector &v)
OutStream out(std::cout)
Global stream used by the library for standard output. Initially it uses the same std::streambuf as s...
const T * Read(MemoryClass mc, int size) const
Get read-only access to the memory with the given MemoryClass.
double Sum() const
Return the sum of the vector entries.
virtual N_Vector ToNVector()
Return a new wrapper SUNDIALS N_Vector of type SUNDIALS_NVEC_SERIAL.
void Neg()
(*this) = -(*this)