27#if defined(MFEM_USE_CUDA)
28#define MFEM_cu_or_hip(stub) cu##stub
29#define MFEM_Cu_or_Hip(stub) Cu##stub
30#define MFEM_CU_or_HIP(stub) CU##stub
31#define MFEM_CUDA_or_HIP(stub) CUDA##stub
33#if CUSPARSE_VERSION >= 11400
34#define MFEM_GPUSPARSE_ALG CUSPARSE_SPMV_CSR_ALG1
36#define MFEM_GPUSPARSE_ALG CUSPARSE_CSRMV_ALG1
39#elif defined(MFEM_USE_HIP)
40#define MFEM_cu_or_hip(stub) hip##stub
41#define MFEM_Cu_or_Hip(stub) Hip##stub
42#define MFEM_CU_or_HIP(stub) HIP##stub
43#define MFEM_CUDA_or_HIP(stub) HIP##stub
46#define MFEM_GPUSPARSE_ALG HIPSPARSE_CSRMV_ALG1
54#ifdef MFEM_USE_CUDA_OR_HIP
67#ifdef MFEM_USE_CUDA_OR_HIP
83#ifdef MFEM_USE_CUDA_OR_HIP
86#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
87 MFEM_cu_or_hip(sparseDestroySpMat)(
matA_descr);
88 MFEM_cu_or_hip(sparseDestroyDnVec)(
vecX_descr);
89 MFEM_cu_or_hip(sparseDestroyDnVec)(
vecY_descr);
112 for (
int i = 0; i < nrows; i++)
117#ifdef MFEM_USE_MEMALLOC
136#ifdef MFEM_USE_MEMALLOC
144 bool ownij,
bool owna,
bool issorted)
155#ifdef MFEM_USE_MEMALLOC
167 for (
int ii=0; ii<nnz; ++ii)
184#ifdef MFEM_USE_MEMALLOC
188 J.
New(nrows * rowsize);
189 A.
New(nrows * rowsize);
191 for (
int i = 0; i <= nrows; i++)
225#ifdef MFEM_USE_MEMALLOC
231#ifdef MFEM_USE_MEMALLOC
235 for (
int i = 0; i <
height; i++)
238 for (
RowNode *node_p = mat.
Rows[i]; node_p; node_p = node_p->Prev)
240#ifdef MFEM_USE_MEMALLOC
245 new_node_p->
Value = node_p->Value;
246 new_node_p->
Column = node_p->Column;
247 *node_pp = new_node_p;
248 node_pp = &new_node_p->
Prev;
276#ifdef MFEM_USE_MEMALLOC
283 for (
int i = 0; i <=
height; i++)
288 for (
int r=0; r<
height; r++)
315 MFEM_ASSERT(master.
Finalized(),
"'master' must be finalized");
336#ifdef MFEM_USE_MEMALLOC
354 return I[gi+1]-
I[gi];
359 for ( ; row != NULL; row = row->
Prev)
360 if (row->
Value != 0.0)
373 for (
int i=0; i <
height; ++i)
375 rowSize =
I[i+1]-
I[i];
376 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
381 for (
int i=0; i <
height; ++i)
384 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
393 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
400 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
407 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
414 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
421 if (newWidth ==
width)
426 else if (newWidth == -1)
432 else if (newWidth >
width)
443 ColPtrJ =
static_cast<int *
>(NULL);
451 "The new width needs to be bigger or equal to the actual width");
459 MFEM_VERIFY(
Finalized(),
"Matrix is not Finalized!");
466#ifdef MFEM_USE_CUDA_OR_HIP
469#if defined(MFEM_USE_CUDA)
470 size_t pBufferSizeInBytes = 0;
471 void *pBuffer = NULL;
474 const int m =
Width();
477 const int * d_ia =
ReadI();
479 csru2csrInfo_t sortInfoA;
483 cusparseSetMatIndexBase(
matA_descr, CUSPARSE_INDEX_BASE_ZERO );
484 cusparseSetMatType(
matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL );
486 cusparseCreateCsru2csrInfo( &sortInfoA );
488#ifdef MFEM_USE_SINGLE
489 cusparseScsru2csr_bufferSizeExt(
handle, n, m, nnzA, d_a_sorted, d_ia,
490 d_ja_sorted, sortInfoA,
491 &pBufferSizeInBytes);
492#elif defined MFEM_USE_DOUBLE
493 cusparseDcsru2csr_bufferSizeExt(
handle, n, m, nnzA, d_a_sorted, d_ia,
494 d_ja_sorted, sortInfoA,
495 &pBufferSizeInBytes);
497 MFEM_ABORT(
"Floating point type undefined");
502#ifdef MFEM_USE_SINGLE
504 d_ja_sorted, sortInfoA, pBuffer);
505#elif defined MFEM_USE_DOUBLE
507 d_ja_sorted, sortInfoA, pBuffer);
509 MFEM_ABORT(
"Floating point type undefined");
516 cusparseDestroyCsru2csrInfo( sortInfoA );
524#if defined(MFEM_USE_HIP)
525 size_t pBufferSizeInBytes = 0;
526 void *pBuffer = NULL;
530 const int m =
Width();
533 const int * d_ia =
ReadI();
536 hipsparseMatDescr_t descrA;
537 hipsparseCreateMatDescr( &descrA );
545 hipsparseXcsrsort_bufferSizeExt(
handle, n, m, nnzA, d_ia, d_ja_sorted,
546 &pBufferSizeInBytes);
551 hipsparseCreateIdentityPermutation(
handle, nnzA, P);
552 hipsparseXcsrsort(
handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P, pBuffer);
554#if defined(MFEM_USE_SINGLE)
555 hipsparseSgthr(
handle, nnzA, d_a_sorted, d_a_tmp, P,
556 HIPSPARSE_INDEX_BASE_ZERO);
557#elif defined(MFEM_USE_DOUBLE)
558 hipsparseDgthr(
handle, nnzA, d_a_sorted, d_a_tmp, P,
559 HIPSPARSE_INDEX_BASE_ZERO);
561 MFEM_ABORT(
"Unsupported floating point type!");
565 hipsparseDestroyMatDescr( descrA );
579 for (
int j = 0, i = 0; i <
height; i++)
583 for (
int k = 0; k < row.
Size(); k++)
589 for (
int k = 0; k < row.
Size(); k++, j++)
601 MFEM_VERIFY(
Finalized(),
"Matrix is not Finalized!");
603 for (
int row = 0, end = 0; row <
height; row++)
607 for (j = start;
true; j++)
609 MFEM_VERIFY(j < end,
"diagonal entry not found in row = " << row);
610 if (
J[j] == row) {
break; }
613 for ( ; j > start; j--)
635 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
636 "Trying to access element outside of the matrix. "
637 <<
"height = " <<
height <<
", "
638 <<
"width = " <<
width <<
", "
639 <<
"i = " << i <<
", "
642 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
644 for (
int k =
I[i], end =
I[i+1]; k < end; k++)
652 MFEM_ABORT(
"Did not find i = " << i <<
", j = " << j <<
" in matrix.");
658 static const real_t zero = 0.0;
660 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
661 "Trying to access element outside of the matrix. "
662 <<
"height = " <<
height <<
", "
663 <<
"width = " <<
width <<
", "
664 <<
"i = " << i <<
", "
672 for (
int k =
I[i], end =
I[i+1]; k < end; k++)
684 if (node_p->Column == j)
686 return node_p->Value;
696 MFEM_VERIFY(
height ==
width,
"Matrix must be square, not height = "
698 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
702 const auto II = this->
ReadI();
703 const auto JJ = this->
ReadJ();
709 const int begin = II[i];
710 const int end = II[i+1];
712 for (j = begin; j < end; j++)
730 int num_rows = this->
Height();
731 int num_cols = this->
Width();
746 for (
int r=0; r<
height; r++)
751 for (
int cj=0; cj<this->
RowSize(r); cj++)
753 B(r, col[cj]) = val[cj];
767 MFEM_ASSERT(
width == x.
Size(),
"Input vector size (" << x.
Size()
768 <<
") must match matrix width (" <<
width <<
")");
769 MFEM_ASSERT(
height == y.
Size(),
"Output vector size (" << y.
Size()
770 <<
") must match matrix height (" <<
height <<
")");
778 for (
int i = 0; i <
height; i++)
782 for ( ; row != NULL; row = row->
Prev)
792#ifndef MFEM_USE_LEGACY_OPENMP
796 auto d_J =
Read(
J, nnz);
797 auto d_A =
Read(
A, nnz);
802 if (nnz == 0) {
return;}
805#ifdef MFEM_USE_CUDA_OR_HIP
812#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
814 MFEM_cu_or_hip(sparseCreateCsr)(
818 const_cast<int *
>(d_I),
819 const_cast<int *
>(d_J),
820 const_cast<real_t *
>(d_A),
821 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
822 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
823 MFEM_CU_or_HIP(SPARSE_INDEX_BASE_ZERO),
824#ifdef MFEM_USE_SINGLE
825 MFEM_CUDA_or_HIP(_R_32F));
827 MFEM_CUDA_or_HIP(_R_64F));
831 MFEM_cu_or_hip(sparseCreateDnVec)(&
vecX_descr,
833 const_cast<real_t *
>(d_x),
834#ifdef MFEM_USE_SINGLE
835 MFEM_CUDA_or_HIP(_R_32F));
837 MFEM_CUDA_or_HIP(_R_64F));
840#ifdef MFEM_USE_SINGLE
841 MFEM_CUDA_or_HIP(_R_32F));
843 MFEM_CUDA_or_HIP(_R_64F));
847 cusparseSetMatIndexBase(
matA_descr, CUSPARSE_INDEX_BASE_ZERO);
848 cusparseSetMatType(
matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
853 size_t newBufferSize = 0;
855 MFEM_cu_or_hip(sparseSpMV_bufferSize)(
857 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
863#ifdef MFEM_USE_SINGLE
864 MFEM_CUDA_or_HIP(_R_32F),
866 MFEM_CUDA_or_HIP(_R_64F),
879#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
881 MFEM_cu_or_hip(sparseDnVecSetValues)(
vecX_descr,
882 const_cast<real_t *
>(d_x));
883 MFEM_cu_or_hip(sparseDnVecSetValues)(
vecY_descr, d_y);
886 MFEM_cu_or_hip(sparseSpMV)(
888 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
894#ifdef MFEM_USE_SINGLE
895 MFEM_CUDA_or_HIP(_R_32F),
897 MFEM_CUDA_or_HIP(_R_64F),
902#ifdef MFEM_USE_SINGLE
907 CUSPARSE_OPERATION_NON_TRANSPOSE,
913 const_cast<real_t *
>(d_A),
914 const_cast<int *
>(d_I),
915 const_cast<int *
>(d_J),
916 const_cast<real_t *
>(d_x),
928 const int end = d_I[i+1];
929 for (
int j = d_I[i]; j < end; j++)
931 d += d_A[j] * d_x[d_J[j]];
941 const int *Jp =
J, *Ip =
I;
943 #pragma omp parallel for
944 for (
int i = 0; i <
height; i++)
947 const int end = Ip[i+1];
948 for (
int j = Ip[i]; j < end; j++)
950 d += Ap[j] * xp[Jp[j]];
968 <<
") must match matrix height (" <<
height <<
")");
969 MFEM_ASSERT(
width == y.
Size(),
"Output vector size (" << y.
Size()
970 <<
") must match matrix width (" <<
width <<
")");
977 for (
int i = 0; i <
height; i++)
981 for ( ; row != NULL; row = row->
Prev)
1000 const int nnz = Ip[
height];
1004 for (
int i = 0; i <
height; i++)
1007 const int end = Ip[i+1];
1008 for (
int j = Ip[i]; j < end; j++)
1010 const int Jj = Jp[j];
1011 yp[Jj] += Ap[j] * xi;
1042 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1044 const int n = rows.
Size();
1046 auto d_rows = rows.
Read();
1048 auto d_J =
Read(
J, nnz);
1049 auto d_A =
Read(
A, nnz);
1050 auto d_x = x.
Read();
1051 auto d_y = y.
Write();
1054 const int r = d_rows[i];
1055 const int end = d_I[r + 1];
1057 for (
int j = d_I[r]; j < end; j++)
1059 a += d_A[j] * d_x[d_J[j]];
1068 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1070 for (
int i = 0; i < rows.
Size(); i++)
1075 for (
int j =
I[r]; j < end; j++)
1077 val +=
A[j] * x(
J[j]);
1085 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1086 MFEM_ASSERT(x.
Size() ==
Width(),
"Input vector size (" << x.
Size()
1087 <<
") must match matrix width (" <<
Width() <<
")");
1094 auto d_J =
Read(
J, nnz);
1100 const int end = d_I[i+1];
1101 for (
int j = d_I[i]; j < end; j++)
1116 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1117 MFEM_ASSERT(x.
Size() ==
Height(),
"Input vector size (" << x.
Size()
1118 <<
") must match matrix height (" <<
Height() <<
")");
1123 for (
int i = 0; i <
Height(); i++)
1128 for (
int j =
I[i]; j < end; j++)
1138 MFEM_ASSERT(
width == x.
Size(),
"Input vector size (" << x.
Size()
1139 <<
") must match matrix width (" <<
width <<
")");
1140 MFEM_ASSERT(
height == y.
Size(),
"Output vector size (" << y.
Size()
1141 <<
") must match matrix height (" <<
height <<
")");
1152 for (
int i = 0; i <
height; i++)
1156 for ( ; row != NULL; row = row->
Prev)
1169 auto d_J =
Read(
J, nnz);
1170 auto d_A =
Read(
A, nnz);
1171 auto d_x = x.
Read();
1176 const int end = d_I[i+1];
1177 for (
int j = d_I[i]; j < end; j++)
1179 d += std::abs(d_A[j]) * d_x[d_J[j]];
1187 MFEM_ASSERT(
height == x.
Size(),
"Input vector size (" << x.
Size()
1188 <<
") must match matrix height (" <<
height <<
")");
1189 MFEM_ASSERT(
width == y.
Size(),
"Output vector size (" << y.
Size()
1190 <<
") must match matrix width (" <<
width <<
")");
1198 for (
int i = 0; i <
height; i++)
1202 for ( ; row != NULL; row = row->
Prev)
1217 for (
int i = 0; i <
height; i++)
1220 const int end =
I[i+1];
1221 for (
int j =
I[i]; j < end; j++)
1223 const int Jj =
J[j];
1224 y[Jj] += std::abs(
A[j]) * xi;
1233 <<
" must be equal to Width() = " <<
Width());
1235 <<
" must be equal to Height() = " <<
Height());
1248 for (
int i = 0; i <
height; i++)
1253 for (
int j =
I[i], end =
I[i+1]; j < end; j++)
1255 a +=
A[j] * x(
J[j]);
1262 a += np->Value * x(np->Column);
1277 auto d_x = x.
Write();
1281 for (
int j = d_I[i], end = d_I[i+1]; j < end; j++)
1290 for (
int i = 0; i <
height; i++)
1304 MFEM_VERIFY(irow <
height,
1305 "row " << irow <<
" not in matrix with height " <<
height);
1310 for (
int j =
I[irow], end =
I[irow+1]; j < end; j++)
1319 a += fabs(np->Value);
1328 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1330 atol = std::abs(tol);
1332 fix_empty_rows =
height ==
width ? fix_empty_rows :
false;
1340 for (i = 0, nz = 0; i <
height; i++)
1343 for (j =
I[i]; j <
I[i+1]; j++)
1344 if (std::abs(
A[j]) > atol)
1349 if (fix_empty_rows && !found) { nz++; }
1357 for (i = 0, nz = 0; i <
height; i++)
1361 for (j =
I[i]; j <
I[i+1]; j++)
1362 if (std::abs(
A[j]) > atol)
1367 if ( lastCol > newJ[nz] )
1374 if (fix_empty_rows && !found)
1402 for (i = 1; i <=
height; i++)
1405 for (aux =
Rows[i-1]; aux != NULL; aux = aux->
Prev)
1407 if (skip_zeros && aux->
Value == 0.0)
1409 if (skip_zeros == 2) {
continue; }
1410 if ((i-1) != aux->
Column) {
continue; }
1416 if (other->Column == (i-1))
1419 found_val = other->Value;
1423 if (found && found_val == 0.0) {
continue; }
1428 if (fix_empty_rows && !nr) { nr = 1; }
1437 for (j = i = 0; i <
height; i++)
1441 for (aux =
Rows[i]; aux != NULL; aux = aux->
Prev)
1443 if (skip_zeros && aux->
Value == 0.0)
1445 if (skip_zeros == 2) {
continue; }
1446 if (i != aux->
Column) {
continue; }
1452 if (other->Column == i)
1455 found_val = other->Value;
1459 if (found && found_val == 0.0) {
continue; }
1465 if ( lastCol >
J[j] )
1474 if (fix_empty_rows && !nr)
1482#ifdef MFEM_USE_MEMALLOC
1486 for (i = 0; i <
height; i++)
1489 while (node_p != NULL)
1492 node_p = node_p->
Prev;
1505 int nr = (
height + br - 1)/br, nc = (
width + bc - 1)/bc;
1507 for (
int j = 0; j < bc; j++)
1509 for (
int i = 0; i < br; i++)
1512 for (
int k = 0; k <= nr; k++)
1516 blocks(i,j) =
new SparseMatrix(bI, NULL, NULL, nr, nc);
1520 for (
int gr = 0; gr <
height; gr++)
1522 int bi = gr/nr, i = gr%nr + 1;
1525 for (
int j =
I[gr]; j <
I[gr+1]; j++)
1529 blocks(bi,
J[j]/nc)->I[i]++;
1537 if (n_p->Value != 0.0)
1539 blocks(bi, n_p->Column/nc)->I[i]++;
1545 for (
int j = 0; j < bc; j++)
1547 for (
int i = 0; i < br; i++)
1551 for (
int k = 1; k <= nr; k++)
1553 rs =
b.I[k],
b.I[k] = nnz, nnz += rs;
1560 for (
int gr = 0; gr <
height; gr++)
1562 int bi = gr/nr, i = gr%nr + 1;
1565 for (
int j =
I[gr]; j <
I[gr+1]; j++)
1570 b.J[
b.I[i]] =
J[j] % nc;
1580 if (n_p->Value != 0.0)
1583 b.J[
b.I[i]] = n_p->Column % nc;
1584 b.A[
b.I[i]] = n_p->Value;
1606 for (
int i = 1; i <
height; i++)
1608 for (
int j =
I[i]; j <
I[i+1]; j++)
1612 symm = std::max(symm, std::abs(
A[j]-(*
this)(
J[j],i)));
1619 for (
int i = 0; i <
height; i++)
1621 for (
RowNode *node_p =
Rows[i]; node_p != NULL; node_p = node_p->
Prev)
1623 int col = node_p->Column;
1626 symm = std::max(symm, std::abs(node_p->Value-(*
this)(col,i)));
1636 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1639 for (i = 1; i <
height; i++)
1641 for (j =
I[i]; j <
I[i+1]; j++)
1645 A[j] += (*this)(
J[j],i);
1647 (*this)(
J[j],i) =
A[j];
1664 for (
int i = 0; i <
height; i++)
1666 for (
RowNode *node_p =
Rows[i]; node_p != NULL; node_p = node_p->
Prev)
1683 for (
int j = 0; j < nnz; j++)
1685 m = std::max(m, std::abs(
A[j]));
1690 for (
int i = 0; i <
height; i++)
1694 m = std::max(m, std::abs(n_p->Value));
1710 for (
int i = 0; i < nz; i++)
1712 counter += (std::abs(Ap[i]) <= tol);
1717 for (
int i = 0; i <
height; i++)
1721 counter += (std::abs(aux->Value) <= tol);
1742 for (
int i = 0; i <
height; i++)
1762 MFEM_ASSERT(row < height && row >= 0,
1763 "Row " << row <<
" not in matrix of height " <<
height);
1765 MFEM_VERIFY(!
Finalized(),
"Matrix must NOT be finalized.");
1767 for (aux =
Rows[row]; aux != NULL; aux = aux->
Prev)
1778 MFEM_ASSERT(row < height && row >= 0,
1779 "Row " << row <<
" not in matrix of height " <<
height);
1780 MFEM_ASSERT(dpolicy !=
DIAG_KEEP,
"Diagonal policy must not be DIAG_KEEP");
1782 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1787 for (
int i=
I[row]; i <
I[row+1]; ++i)
1794 for (aux =
Rows[row]; aux != NULL; aux = aux->
Prev)
1808 MFEM_ASSERT(col < width && col >= 0,
1809 "Col " << col <<
" not in matrix of width " <<
width);
1810 MFEM_ASSERT(dpolicy !=
DIAG_KEEP,
"Diagonal policy must not be DIAG_KEEP");
1812 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1818 for (
int jpos = 0; jpos != nnz; ++jpos)
1828 for (
int i = 0; i <
height; i++)
1832 if (aux->Column == col)
1852 for (
int i = 0; i <
height; i++)
1854 for (
int jpos =
I[i]; jpos !=
I[i+1]; ++jpos)
1860 (*b)(i) -=
A[jpos] * (*x)(
J[jpos] );
1869 for (
int i = 0; i <
height; i++)
1873 if (cols[aux -> Column])
1877 (*b)(i) -= aux -> Value * (*x)(aux -> Column);
1891 for (
int row = 0; row <
height; row++)
1893 for (nd =
Rows[row]; nd != NULL; nd = nd->
Prev)
1895 if (col_marker[nd->
Column])
1905 for (
int row = 0; row <
height; row++)
1907 for (
int j =
I[row]; j <
I[row+1]; j++)
1909 if (col_marker[
J[j]])
1911 Ae.
Add(row,
J[j],
A[j]);
1923 MFEM_ASSERT(rc < height && rc >= 0,
1924 "Row " << rc <<
" not in matrix of height " <<
height);
1931 for (
int j =
I[rc]; j <
I[rc+1]; j++)
1933 const int col =
J[j];
1939 rhs(rc) =
A[j] * sol;
1950 mfem_error(
"SparseMatrix::EliminateRowCol () #2");
1957 for (
int k =
I[col]; 1; k++)
1961 mfem_error(
"SparseMatrix::EliminateRowCol () #3");
1963 else if (
J[k] == rc)
1965 rhs(col) -= sol *
A[k];
1977 const int col = aux->Column;
1983 rhs(rc) = aux->Value * sol;
1994 mfem_error(
"SparseMatrix::EliminateRowCol () #4");
2005 mfem_error(
"SparseMatrix::EliminateRowCol () #5");
2007 else if (node->Column == rc)
2009 rhs(col) -= sol * node->Value;
2023 MFEM_ASSERT(rc < height && rc >= 0,
2024 "Row " << rc <<
" not in matrix of height " <<
height);
2025 MFEM_ASSERT(sol.
Size() == rhs.
Width(),
"solution size (" << sol.
Size()
2026 <<
") must match rhs width (" << rhs.
Width() <<
")");
2028 const int num_rhs = rhs.
Width();
2031 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2033 const int col =
J[j];
2039 for (
int r = 0; r < num_rhs; r++)
2041 rhs(rc,r) =
A[j] * sol(r);
2046 for (
int r = 0; r < num_rhs; r++)
2053 for (
int r = 0; r < num_rhs; r++)
2059 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #3");
2066 for (
int k =
I[col]; 1; k++)
2070 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #4");
2072 else if (
J[k] == rc)
2074 for (
int r = 0; r < num_rhs; r++)
2076 rhs(col,r) -= sol(r) *
A[k];
2089 const int col = aux->Column;
2095 for (
int r = 0; r < num_rhs; r++)
2097 rhs(rc,r) = aux->Value * sol(r);
2102 for (
int r = 0; r < num_rhs; r++)
2109 for (
int r = 0; r < num_rhs; r++)
2115 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #5");
2126 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #6");
2128 else if (node->Column == rc)
2130 for (
int r = 0; r < num_rhs; r++)
2132 rhs(col,r) -= sol(r) * node->Value;
2145 MFEM_ASSERT(rc < height && rc >= 0,
2146 "Row " << rc <<
" not in matrix of height " <<
height);
2150 const auto &II = this->
I;
2151 const auto &JJ = this->
J;
2152 for (
int j = II[rc]; j < II[rc+1]; j++)
2154 const int col = JJ[j];
2169 for (
int k = II[col]; 1; k++)
2173 mfem_error(
"SparseMatrix::EliminateRowCol() #2");
2175 else if (JJ[k] == rc)
2188 for (aux =
Rows[rc]; aux != NULL; aux = aux->
Prev)
2190 const int col = aux->
Column;
2205 for (node =
Rows[col]; 1; node = node->
Prev)
2209 mfem_error(
"SparseMatrix::EliminateRowCol() #3");
2211 else if (node->
Column == rc)
2226 MFEM_ASSERT(rc < height && rc >= 0,
2227 "Row " << rc <<
" not in matrix of height " <<
height);
2231 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2233 const int col =
J[j];
2241 for (
int k =
I[col]; 1; k++)
2245 mfem_error(
"SparseMatrix::EliminateRowCol() #2");
2247 else if (
J[k] == rc)
2260 for (aux =
Rows[rc]; aux != NULL; aux = aux->
Prev)
2262 const int col = aux->
Column;
2270 for (node =
Rows[col]; 1; node = node->
Prev)
2274 mfem_error(
"SparseMatrix::EliminateRowCol() #3");
2276 else if (node->
Column == rc)
2293 for (nd =
Rows[rc]; nd != NULL; nd = nd->
Prev)
2295 const int col = nd->
Column;
2311 mfem_error(
"SparseMatrix::EliminateRowCol #1");
2319 for (nd2 =
Rows[col]; 1; nd2 = nd2->
Prev)
2323 mfem_error(
"SparseMatrix::EliminateRowCol #2");
2325 else if (nd2->
Column == rc)
2337 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2339 const int col =
J[j];
2345 Ae.
Add(rc, rc,
A[j] - 1.0);
2349 Ae.
Add(rc, rc,
A[j]);
2355 mfem_error(
"SparseMatrix::EliminateRowCol #3");
2361 Ae.
Add(rc, col,
A[j]);
2363 for (
int k =
I[col];
true; k++)
2367 mfem_error(
"SparseMatrix::EliminateRowCol #4");
2369 else if (
J[k] == rc)
2371 Ae.
Add(col, rc,
A[k]);
2384 const int n_ess_dofs = ess_dofs.
Size();
2385 const auto ess_dofs_d = ess_dofs.
Read();
2386 const auto dI =
ReadI();
2387 const auto dJ =
ReadJ();
2392 const int idof = ess_dofs_d[i];
2393 for (
int j=dI[idof]; j<dI[idof+1]; ++j)
2395 const int jdof = dJ[j];
2399 for (
int k=dI[jdof]; k<dI[jdof+1]; ++k)
2426 for (
int i = 0; i <
height; i++)
2428 if (
I[i+1] ==
I[i]+1 && fabs(
A[
I[i]]) < 1e-16)
2437 for (
int i = 0; i <
height; i++)
2440 for (
int j =
I[i]; j <
I[i+1]; j++)
2444 if (zero <= threshold)
2446 for (
int j =
I[i]; j <
I[i+1]; j++)
2448 A[j] = (
J[j] == i) ? 1.0 : 0.0;
2463 for (
int i = 0; i < s; i++)
2467 for (n_p = R[i]; n_p != NULL; n_p = n_p->
Prev)
2469 const int c = n_p->
Column;
2476 sum += n_p->
Value * yp[c];
2480 if (diag_p != NULL && diag_p->
Value != 0.0)
2482 yp[i] = (xp[i] - sum) / diag_p->
Value;
2484 else if (xp[i] == sum)
2490 mfem_error(
"SparseMatrix::Gauss_Seidel_forw()");
2504 for (
int i = 0, j = Ip[0]; i < s; i++)
2506 const int end = Ip[i+1];
2509 for ( ; j < end; j++)
2511 const int c = Jp[j];
2518 sum += Ap[j] * yp[c];
2522 if (d >= 0 && Ap[d] != 0.0)
2524 yp[i] = (xp[i] - sum) / Ap[d];
2526 else if (xp[i] == sum)
2532 mfem_error(
"SparseMatrix::Gauss_Seidel_forw(...) #2");
2546 for (
int i =
height-1; i >= 0; i--)
2550 for (n_p = R[i]; n_p != NULL; n_p = n_p->
Prev)
2552 const int c = n_p->
Column;
2559 sum += n_p->
Value * yp[c];
2563 if (diag_p != NULL && diag_p->
Value != 0.0)
2565 yp[i] = (xp[i] - sum) / diag_p->
Value;
2567 else if (xp[i] == sum)
2573 mfem_error(
"SparseMatrix::Gauss_Seidel_back()");
2587 for (
int i = s-1, j = Ip[s]-1; i >= 0; i--)
2589 const int beg = Ip[i];
2592 for ( ; j >= beg; j--)
2594 const int c = Jp[j];
2601 sum += Ap[j] * yp[c];
2605 if (d >= 0 && Ap[d] != 0.0)
2607 yp[i] = (xp[i] - sum) / Ap[d];
2609 else if (xp[i] == sum)
2615 mfem_error(
"SparseMatrix::Gauss_Seidel_back(...) #2");
2623 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2626 for (
int i = 0; i <
height; i++)
2630 for (
int j =
I[i]; j <
I[i+1]; j++)
2638 if (d >= 0 &&
A[d] != 0.0)
2640 real_t a = 1.8 * fabs(
A[d]) / norm;
2648 mfem_error(
"SparseMatrix::GetJacobiScaling() #2");
2655 real_t sc,
bool use_abs_diag)
const
2657 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2659 for (
int i = 0; i <
height; i++)
2663 for (
int j =
I[i]; j <
I[i+1]; j++)
2671 sum -=
A[j] * x0(
J[j]);
2674 if (d >= 0 &&
A[d] != 0.0)
2676 const real_t diag = (use_abs_diag) ? fabs(
A[d]) :
A[d];
2677 x1(i) = sc * (sum / diag) + (1.0 - sc) * x0(i);
2687 real_t sc,
bool use_abs_diag)
const
2689 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2693 const bool use_dev =
b.UseDevice() || x.
UseDevice();
2695 const auto Ap =
Read(
A, nnz, use_dev);
2697 const auto Jp =
Read(
J, nnz, use_dev);
2699 const auto bp =
b.Read(use_dev);
2700 auto xp = x.
Write(use_dev);
2704 const int end = Ip[i+1];
2705 for (
int j = Ip[i];
true; j++)
2709 MFEM_ABORT_KERNEL(
"Diagonal not found in SparseMatrix::DiagScale");
2713 const real_t diag = (use_abs_diag) ? fabs(Ap[j]) : Ap[j];
2716 MFEM_ABORT_KERNEL(
"Zero diagonal in SparseMatrix::DiagScale");
2718 xp[i] = sc * bp[i] / diag;
2725template <
bool useFabs>
2733 const auto bp =
b.Read(useDevice);
2734 const auto x0p = x0.
Read(useDevice);
2735 auto x1p = x1.
Write(useDevice);
2737 const auto Ip =
Read(I, height+1, useDevice);
2743 real_t resi = bp[i], norm = 0.0;
2744 for (
int j = Ip[i]; j < Ip[i+1]; j++)
2746 resi -= Ap[j] * x0p[Jp[j]];
2749 norm += fabs(Ap[j]);
2758 x1p[i] = x0p[i] + sc * resi / norm;
2764 MFEM_ABORT_KERNEL(
"L1 norm of row is zero.");
2768 MFEM_ABORT_KERNEL(
"sum of row is zero.");
2777 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2778 JacobiDispatch<true>(
b,x0,x1,
I,
J,
A,
height,sc);
2784 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2785 JacobiDispatch<false>(
b,x0,x1,
I,
J,
A,
height,sc);
2791 int i, j, gi, gj, s, t;
2801 for (i = 0; i < rows.
Size(); i++)
2803 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2806 "Trying to insert a row " << gi <<
" outside the matrix height "
2809 for (j = 0; j < cols.
Size(); j++)
2811 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2813 MFEM_ASSERT(gj <
width,
2814 "Trying to insert a column " << gj <<
" outside the matrix width "
2817 if (skip_zeros &&
a == 0.0)
2822 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2827 if (t < 0) {
a = -
a; }
2839 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2842 "Trying to set a row " << gi <<
" outside the matrix height "
2844 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2846 MFEM_ASSERT(gj <
width,
2847 "Trying to set a column " << gj <<
" outside the matrix width "
2849 if (t < 0) {
a = -
a; }
2858 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2861 "Trying to insert a row " << gi <<
" outside the matrix height "
2863 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2865 MFEM_ASSERT(gj <
width,
2866 "Trying to insert a column " << gj <<
" outside the matrix width "
2868 if (t < 0) {
a = -
a; }
2875 int i, j, gi, gj, s, t;
2878 for (i = 0; i < rows.
Size(); i++)
2880 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2883 "Trying to set a row " << gi <<
" outside the matrix height "
2886 for (j = 0; j < cols.
Size(); j++)
2889 if (skip_zeros &&
a == 0.0)
2894 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2899 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2901 MFEM_ASSERT(gj <
width,
2902 "Trying to set a column " << gj <<
" outside the matrix width "
2904 if (t < 0) {
a = -
a; }
2916 int i, j, gi, gj, s, t;
2919 for (i = 0; i < rows.
Size(); i++)
2921 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2924 "Trying to set a row " << gi <<
" outside the matrix height "
2927 for (j = 0; j < cols.
Size(); j++)
2930 if (skip_zeros &&
a == 0.0)
2935 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2940 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2942 MFEM_ASSERT(gj <
width,
2943 "Trying to set a column " << gj <<
" outside the matrix width "
2945 if (t < 0) {
a = -
a; }
2955 int i, j, gi, gj, s, t;
2958 for (i = 0; i < rows.
Size(); i++)
2960 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2963 "Trying to read a row " << gi <<
" outside the matrix height "
2966 for (j = 0; j < cols.
Size(); j++)
2968 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2970 MFEM_ASSERT(gj <
width,
2971 "Trying to read a column " << gj <<
" outside the matrix width "
2974 subm(i, j) = (t < 0) ? (-
a) : (
a);
2989 "Trying to query a row " << gi <<
" outside the matrix height "
2993 return (
Rows[gi] == NULL);
2997 return (
I[gi] ==
I[gi+1]);
3006 if ((gi=row) < 0) { gi = -1-gi; }
3008 "Trying to read a row " << gi <<
" outside the matrix height "
3012 for (n =
Rows[gi], j = 0; n; n = n->
Prev)
3018 for (n =
Rows[gi], j = 0; n; n = n->
Prev, j++)
3033 cols.
MakeRef(
const_cast<int*
>((
const int*)
J) + j,
I[gi+1]-j);
3036 MFEM_ASSERT(row >= 0,
"Row not valid: " << row <<
", height: " <<
height);
3047 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
3050 "Trying to set a row " << gi <<
" outside the matrix height "
3056 for (
int j = 0; j < cols.
Size(); j++)
3058 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3060 MFEM_ASSERT(gj <
width,
3061 "Trying to set a column " << gj <<
" outside the matrix"
3062 " width " <<
width);
3064 if (t < 0) {
a = -
a; }
3072 MFEM_ASSERT(cols.
Size() == srow.
Size(),
"");
3074 for (
int i =
I[gi], j = 0; j < cols.
Size(); j++, i++)
3076 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3078 MFEM_ASSERT(gj <
width,
3079 "Trying to set a column " << gj <<
" outside the matrix"
3080 " width " <<
width);
3091 int j, gi, gj, s, t;
3094 MFEM_VERIFY(!
Finalized(),
"Matrix must NOT be finalized.");
3096 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
3099 "Trying to insert a row " << gi <<
" outside the matrix height "
3102 for (j = 0; j < cols.
Size(); j++)
3104 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3106 MFEM_ASSERT(gj <
width,
3107 "Trying to insert a column " << gj <<
" outside the matrix width "
3114 if (t < 0) {
a = -
a; }
3132 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3134 aux -> Value *= scale;
3139 int j, end =
I[i+1];
3141 for (j =
I[i]; j < end; j++)
3154 for (
int i=0; i <
height; ++i)
3157 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3159 aux -> Value *= scale;
3167 for (
int i=0; i <
height; ++i)
3171 for (j =
I[i]; j < end; j++)
3184 for (
int i=0; i <
height; ++i)
3186 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3188 aux -> Value *= sr(aux->
Column);
3196 for (
int i=0; i <
height; ++i)
3199 for (j =
I[i]; j < end; j++)
3210 "Mismatch of this matrix size and rhs. This height = "
3211 <<
height <<
", width = " <<
width <<
", B.height = "
3214 for (
int i = 0; i <
height; i++)
3219 for (
RowNode *aux = B.
Rows[i]; aux != NULL; aux = aux->Prev)
3221 _Add_(aux->Column, aux->Value);
3226 for (
int j = B.
I[i]; j < B.
I[i+1]; j++)
3239 for (
int i = 0; i <
height; i++)
3246 np->Value +=
a * B.
_Get_(np->Column);
3251 for (
int j =
I[i]; j <
I[i+1]; j++)
3266 for (
int i = 0; i < nnz; i++)
3273 for (
int i = 0; i <
height; i++)
3276 node_p = node_p -> Prev)
3278 node_p -> Value =
a;
3290 for (
int i = 0, nnz =
I[
height]; i < nnz; i++)
3297 for (
int i = 0; i <
height; i++)
3300 node_p = node_p -> Prev)
3302 node_p -> Value *=
a;
3317 for (i = 0; i <
height; i++)
3319 os <<
"[row " << i <<
"]\n";
3320 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3322 os <<
" (" << nd->
Column <<
"," << nd->
Value <<
")";
3323 if ( !((j+1) % width_) )
3340 for (i = 0; i <
height; i++)
3342 os <<
"[row " << i <<
"]\n";
3343 for (j =
I[i]; j <
I[i+1]; j++)
3345 os <<
" (" <<
J[j] <<
"," <<
A[j] <<
")";
3346 if ( !((j+1-
I[i]) % width_) )
3351 if ((j-
I[i]) % width_)
3360 os <<
"% size " <<
height <<
" " <<
width <<
"\n";
3364 ios::fmtflags old_fmt = os.flags();
3365 os.setf(ios::scientific);
3366 std::streamsize old_prec = os.precision(14);
3371 for (i = 0; i <
height; i++)
3373 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3375 os << i+1 <<
" " << nd->
Column+1 <<
" " << nd->
Value <<
'\n';
3385 for (i = 0; i <
height; i++)
3387 for (j =
I[i]; j <
I[i+1]; j++)
3389 os << i+1 <<
" " <<
J[j]+1 <<
" " <<
A[j] <<
'\n';
3395 os.precision(old_prec);
3402 ios::fmtflags old_fmt = os.flags();
3403 os.setf(ios::scientific);
3404 std::streamsize old_prec = os.precision(14);
3406 os <<
"(* Read file into Mathematica using: "
3407 <<
"myMat = Get[\"this_file_name\"] *)\n";
3408 os <<
"SparseArray[";
3415 for (i = 0; i <
height; i++)
3417 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++, c++)
3419 os <<
"{"<< i+1 <<
", " << nd->
Column+1
3420 <<
"} -> Internal`StringToMReal[\"" << nd->
Value <<
"\"]";
3435 for (i = 0; i <
height; i++)
3437 for (j =
I[i]; j <
I[i+1]; j++, c++)
3439 os <<
"{" << i+1 <<
", " <<
J[j]+1
3440 <<
"} -> Internal`StringToMReal[\"" <<
A[j] <<
"\"]";
3450 os.precision(old_prec);
3457 ios::fmtflags old_fmt = os.flags();
3458 os.setf(ios::scientific);
3459 std::streamsize old_prec = os.precision(14);
3461 os <<
"%%MatrixMarket matrix coordinate real general" <<
'\n'
3462 <<
"% Generated by MFEM" <<
'\n';
3469 for (i = 0; i <
height; i++)
3471 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3473 os << i+1 <<
" " << nd->
Column+1 <<
" " << nd->
Value <<
'\n';
3483 for (i = 0; i <
height; i++)
3485 for (j =
I[i]; j <
I[i+1]; j++)
3487 os << i+1 <<
" " <<
J[j]+1 <<
" " <<
A[j] <<
'\n';
3491 os.precision(old_prec);
3497 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
3507 for (i = 0; i <=
height; i++)
3509 os <<
I[i]+1 <<
'\n';
3512 for (i = 0; i <
I[
height]; i++)
3514 os <<
J[i]+1 <<
'\n';
3517 for (i = 0; i <
I[
height]; i++)
3525 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
3530 os <<
width <<
'\n';
3536 for (i = 0; i <=
height; i++)
3541 for (i = 0; i <
I[
height]; i++)
3546 for (i = 0; i <
I[
height]; i++)
3554 const real_t MiB = 1024.*1024;
3566 "SparseMatrix statistics:\n"
3569 " Dimensions : " <<
height <<
" x " <<
width <<
"\n"
3570 " Number of entries (total) : " << nnz <<
"\n"
3571 " Number of entries (per row) : " << 1.*nnz/
Height() <<
"\n"
3572 " Number of stored zeros : " << nz*pz <<
"% (" << nz <<
")\n"
3573 " Number of Inf/Nan entries : " << nnf*pz <<
"% ("<< nnf <<
")\n"
3574 " Norm, max |a_ij| : " << max_norm <<
"\n"
3575 " Symmetry, max |a_ij-a_ji| : " << symm <<
"\n"
3576 " Number of small entries:\n"
3577 " |a_ij| <= 1e-12*Norm : " << ns12*pz <<
"% (" << ns12 <<
")\n"
3578 " |a_ij| <= 1e-15*Norm : " << ns15*pz <<
"% (" << ns15 <<
")\n"
3579 " |a_ij| <= 1e-18*Norm : " << ns18*pz <<
"% (" << ns18 <<
")\n";
3582 os <<
" Memory used by CSR : " <<
3583 (
sizeof(int)*(
height+1+nnz)+
sizeof(
real_t)*nnz)/MiB <<
" MiB\n";
3588#ifdef MFEM_USE_MEMALLOC
3591 for (
int i = 0; i <
height; i++)
3599 os <<
" Memory used by LIL : " << used_mem/MiB <<
" MiB\n";
3611#if !defined(MFEM_USE_MEMALLOC)
3612 for (
int i = 0; i <
height; i++)
3615 while (node_p != NULL)
3618 node_p = node_p->
Prev;
3628#ifdef MFEM_USE_MEMALLOC
3641 const int *start_j =
J;
3643 for (
const int *jptr = start_j; jptr != end_j; ++jptr)
3645 awidth = std::max(awidth, *jptr + 1);
3651 for (
int i = 0; i <
height; i++)
3653 for (aux =
Rows[i]; aux != NULL; aux = aux->
Prev)
3655 awidth = std::max(awidth, aux->
Column + 1);
3667 for (
int i = 0; i < n; i++)
3677 "Finalize must be called before Transpose. Use TransposeRowMatrix instead");
3680 const int *A_i, *A_j;
3681 int m, n, nnz, *At_i, *At_j;
3696 for (i = 0; i <= n; i++)
3700 for (i = 0; i < nnz; i++)
3704 for (i = 1; i < n; i++)
3706 At_i[i+1] += At_i[i];
3709 for (i = j = 0; i < m; i++)
3712 for ( ; j < end; j++)
3714 At_j[At_i[A_j[j]]] = i;
3715 At_data[At_i[A_j[j]]] = A_data[j];
3720 for (i = n; i > 0; i--)
3722 At_i[i] = At_i[i-1];
3733 int m, n, nnz, *At_i, *At_j;
3743 for (i = 0; i < m; i++)
3745 A.
GetRow(i, Acols, Avals);
3767 for (i = 0; i <= n; i++)
3772 for (i = 0; i < m; i++)
3774 A.
GetRow(i, Acols, Avals);
3775 for (j = 0; j<Acols.
Size(); ++j)
3780 for (i = 1; i < n; i++)
3782 At_i[i+1] += At_i[i];
3785 for (i = 0; i < m; i++)
3787 A.
GetRow(i, Acols, Avals);
3788 for (j = 0; j<Acols.
Size(); ++j)
3790 At_j[At_i[Acols[j]]] = i;
3791 At_data[At_i[Acols[j]]] = Avals[j];
3796 for (i = n; i > 0; i--)
3798 At_i[i] = At_i[i-1];
3809 int nrowsA, ncolsA, nrowsB, ncolsB;
3810 const int *A_i, *A_j, *B_i, *B_j;
3811 int *C_i, *C_j, *B_marker;
3812 const real_t *A_data, *B_data;
3814 int ia, ib, ic, ja, jb, num_nonzeros;
3815 int row_start, counter;
3824 MFEM_VERIFY(ncolsA == nrowsB,
3825 "number of columns of A (" << ncolsA
3826 <<
") must equal number of rows of B (" << nrowsB <<
")");
3835 B_marker =
new int[ncolsB];
3837 for (ib = 0; ib < ncolsB; ib++)
3846 C_i[0] = num_nonzeros = 0;
3847 for (ic = 0; ic < nrowsA; ic++)
3849 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3852 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3855 if (B_marker[jb] != ic)
3862 C_i[ic+1] = num_nonzeros;
3868 C =
new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3870 for (ib = 0; ib < ncolsB; ib++)
3879 MFEM_VERIFY(nrowsA == C->
Height() && ncolsB == C->
Width(),
3880 "Input matrix sizes do not match output sizes"
3881 <<
" nrowsA = " << nrowsA
3882 <<
", C->Height() = " << C->
Height()
3883 <<
" ncolsB = " << ncolsB
3884 <<
", C->Width() = " << C->
Width());
3892 for (ic = 0; ic < nrowsA; ic++)
3895 row_start = counter;
3896 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3899 a_entry = A_data[ia];
3900 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3903 b_entry = B_data[ib];
3904 if (B_marker[jb] < row_start)
3906 B_marker[jb] = counter;
3911 C_data[counter] = a_entry*b_entry;
3916 C_data[B_marker[jb]] += a_entry*b_entry;
3924 "With pre-allocated output matrix, number of non-zeros ("
3926 <<
") did not match number of entries changed from matrix-matrix multiply, "
3945 int nrowsA, ncolsA, nrowsB, ncolsB;
3946 int *C_i, *C_j, *B_marker;
3948 int ia, ib, ic, ja, jb, num_nonzeros;
3949 int row_start, counter;
3958 MFEM_VERIFY(ncolsA == nrowsB,
3959 "number of columns of A (" << ncolsA
3960 <<
") must equal number of rows of B (" << nrowsB <<
")");
3962 B_marker =
new int[ncolsB];
3964 for (ib = 0; ib < ncolsB; ib++)
3971 C_i[0] = num_nonzeros = 0;
3975 for (ic = 0; ic < nrowsA; ic++)
3977 A.
GetRow(ic, colsA, dataA);
3978 for (ia = 0; ia < colsA.
Size(); ia++)
3981 B.
GetRow(ja, colsB, dataB);
3982 for (ib = 0; ib < colsB.
Size(); ib++)
3985 if (B_marker[jb] != ic)
3992 C_i[ic+1] = num_nonzeros;
3998 C =
new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
4000 for (ib = 0; ib < ncolsB; ib++)
4006 for (ic = 0; ic < nrowsA; ic++)
4008 row_start = counter;
4009 A.
GetRow(ic, colsA, dataA);
4010 for (ia = 0; ia < colsA.
Size(); ia++)
4013 a_entry = dataA[ia];
4014 B.
GetRow(ja, colsB, dataB);
4015 for (ib = 0; ib < colsB.
Size(); ib++)
4018 b_entry = dataB[ib];
4019 if (B_marker[jb] < row_start)
4021 B_marker[jb] = counter;
4023 C_data[counter] = a_entry*b_entry;
4028 C_data[B_marker[jb]] += a_entry*b_entry;
4043 for (
int j = 0; j < B.
Width(); ++j)
4047 A.
Mult(columnB, columnC);
4057 Mult (R, *AP, *RAP_);
4100 int i, At_nnz, *At_j;
4104 At_nnz = At -> NumNonZeroElems();
4105 At_j = At -> GetJ();
4106 At_data = At -> GetData();
4107 for (i = 0; i < At_nnz; i++)
4109 At_data[i] *= D(At_j[i]);
4120 int ncols = A.
Width();
4134 int * marker =
new int[ncols];
4135 std::fill(marker, marker+ncols, -1);
4137 int num_nonzeros = 0, jcol;
4139 for (
int ic = 0; ic < nrows; ic++)
4141 for (
int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4147 for (
int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4150 if (marker[jcol] != ic)
4156 C_i[ic+1] = num_nonzeros;
4162 for (
int ia = 0; ia < ncols; ia++)
4168 for (
int ic = 0; ic < nrows; ic++)
4170 for (
int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4174 C_data[pos] =
a*A_data[ia];
4178 for (
int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4181 if (marker[jcol] < C_i[ic])
4184 C_data[pos] =
b*B_data[ib];
4190 C_data[marker[jcol]] +=
b*B_data[ib];
4196 return new SparseMatrix(C_i, C_j, C_data, nrows, ncols);
4201 return Add(1.,A,1.,B);
4206 MFEM_ASSERT(Ai.
Size() > 0,
"invalid size Ai.Size() = " << Ai.
Size());
4211 for (
int i=1; i < Ai.
Size(); ++i)
4213 result =
Add(*accumulate, *Ai[i]);
4219 accumulate = result;
4229 for (
int r = 0; r < B.
Height(); r++)
4233 for (
int i=0; i<A.
RowSize(r); i++)
4235 B(r, colA[i]) +=
alpha * valA[i];
4248 for (
int i=0; i<mA; i++)
4250 for (
int j=0; j<nA; j++)
4252 C->
AddMatrix(A(i,j), B, i * mB, j * nB);
4266 for (
int i=0; i<mA; i++)
4268 for (
int j=0; j<nA; j++)
4270 for (
int r=0; r<mB; r++)
4275 for (
int cj=0; cj<B.
RowSize(r); cj++)
4277 C->
Set(i * mB + r, j * nB + colB[cj], A(i,j) * valB[cj]);
4295 for (
int r=0; r<mA; r++)
4300 for (
int aj=0; aj<A.
RowSize(r); aj++)
4302 for (
int i=0; i<mB; i++)
4304 for (
int j=0; j<nB; j++)
4306 C->
Set(r * mB + i, colA[aj] * nB + j, valA[aj] * B(i, j));
4324 for (
int ar=0; ar<mA; ar++)
4329 for (
int aj=0; aj<A.
RowSize(ar); aj++)
4331 for (
int br=0; br<mB; br++)
4336 for (
int bj=0; bj<B.
RowSize(br); bj++)
4338 C->
Set(ar * mB + br, colA[aj] * nB + colB[bj],
4339 valA[aj] * valB[bj]);
4362#ifdef MFEM_USE_MEMALLOC
4372#ifdef MFEM_USE_CUDA_OR_HIP
4379 MFEM_cu_or_hip(sparseDestroy)(
handle);
4384 MFEM_Cu_or_Hip(MemFree)(
dBuffer);
Abstract data type for sparse matrices.
virtual int NumNonZeroElems() const =0
Returns the number of non-zeros in a matrix.
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const =0
Gets the columns indexes and values for row row.
Dynamic 2D array using row-major layout.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
T Max() const
Find the maximal element in the array, using the comparison operator < for class T.
void Sort()
Sorts the array in ascending order. This requires operator< to be defined for T.
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
int Size() const
Return the logical size of the array.
void MakeRef(T *data_, int size_, bool own_data=false)
Make this Array a reference to a pointer.
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Data type dense matrix using column-major storage.
void GetColumnReference(int c, Vector &col)
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
void AddMatrix(DenseMatrix &A, int ro, int co)
Perform (ro+i,co+j)+=A(i,j) for 0<=i.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Abstract data type for matrix inverse.
size_t MemoryUsage() const
Class used by MFEM to store pointers to host and/or device memory.
void SetHostPtrOwner(bool own) const
Set/clear the ownership flag for the host pointer. Ownership indicates whether the pointer will be de...
int Capacity() const
Return the size of the allocated memory.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
bool Empty() const
Return true if the Memory object is empty, see Reset().
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void Wrap(T *ptr, int size, bool own)
Wrap an externally allocated host pointer, ptr with the current host memory type returned by MemoryMa...
void Delete()
Delete the owned pointers and reset the Memory object.
void ClearOwnerFlags() const
Clear the ownership flags for the host and device pointers, as well as any internal data allocated by...
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
int width
Dimension of the input / number of columns in the matrix.
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
int height
Dimension of the output / number of rows in the matrix.
DiagonalPolicy
Defines operator diagonal policy upon elimination of rows and/or columns.
@ DIAG_ONE
Set the diagonal value to one.
@ DIAG_KEEP
Keep the diagonal value.
@ DIAG_ZERO
Set the diagonal value to zero.
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
int GetRow(const int row, Array< int > &cols, Vector &srow) const override
Extract all column indices and values from a given row.
void PrintCSR2(std::ostream &out) const
Prints a sparse matrix to stream out in CSR format.
const int * HostReadJ() const
void GetDiag(Vector &d) const
Returns the Diagonal of A.
real_t GetRowNorml1(int irow) const
For i = irow compute .
void PartMult(const Array< int > &rows, const Vector &x, Vector &y) const
bool isSorted
Are the columns sorted already.
void Gauss_Seidel_back(const Vector &x, Vector &y) const
bool RowIsEmpty(const int row) const
void ClearColPtr() const
Reset the "current row" set by calling SetColPtr(). This method must be called between any two calls ...
MatrixInverse * Inverse() const override
This virtual method is not supported: it always returns NULL.
void Jacobi3(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
real_t * ReadWriteData(bool on_dev=true)
Memory< real_t > A
Array with size I[height], containing the actual entries of the sparse matrix, as indexed by the I ar...
void EliminateRowCol(int rc, const real_t sol, Vector &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Eliminate row rc and column rc and modify the rhs using sol.
SparseMatrix & operator*=(real_t a)
void MultTranspose(const Vector &x, Vector &y) const override
Multiply a vector with the transposed matrix. y = At * x.
void SetWidth(int width_=-1)
Change the width of a SparseMatrix.
MemAlloc< RowNode, 1024 > RowNodeAlloc
bool Empty() const
Check if the SparseMatrix is empty.
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
SparseMatrix * At
Transpose of A. Owned. Used to perform MultTranspose() on devices.
void PartAddMult(const Array< int > &rows, const Vector &x, Vector &y, const real_t a=1.0) const
void GetRowSums(Vector &x) const
For all i compute .
int NumNonZeroElems() const override
Returns the number of the nonzero elements in the matrix.
bool Finalized() const
Returns whether or not CSR format has been finalized.
void EliminateBC(const Array< int > &ess_dofs, DiagonalPolicy diag_policy)
Eliminate essential (Dirichlet) boundary conditions.
void PrintInfo(std::ostream &out) const
Print various sparse matrix statistics.
void MoveDiagonalFirst()
Move the diagonal entry to the first position in each row, preserving the order of the rest of the co...
void Add(const int i, const int j, const real_t val)
void EliminateRowColMultipleRHS(int rc, const Vector &sol, DenseMatrix &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Similar to EliminateRowCol(int, const double, Vector &, DiagonalPolicy), but multiple values for elim...
void Jacobi(const Vector &b, const Vector &x0, Vector &x1, real_t sc, bool use_abs_diag=false) const
void SetColPtr(const int row) const
Initialize the SparseMatrix for fast access to the entries of the given row which becomes the "curren...
void SetDiagIdentity()
If a row contains only one diag entry of zero, set it to 1.
void AbsMultTranspose(const Vector &x, Vector &y) const
y = |At| * x, using entry-wise absolute values of the transpose of matrix A
void Gauss_Seidel_forw(const Vector &x, Vector &y) const
Gauss-Seidel forward and backward iterations over a vector x.
void MakeRef(const SparseMatrix &master)
Clear the contents of the SparseMatrix and make it a reference to master.
int CheckFinite() const
Count the number of entries that are NOT finite, i.e. Inf or Nan.
Memory< int > J
Array with size I[height], containing the column indices for all matrix entries, as indexed by the I ...
void EliminateCol(int col, DiagonalPolicy dpolicy=DIAG_ZERO)
Eliminates the column col from the matrix.
void EnsureMultTranspose() const
Ensures that the matrix is capable of performing MultTranspose(), AddMultTranspose(),...
void Print(std::ostream &out=mfem::out, int width_=4) const override
Prints matrix to stream out.
void _Add_(const int col, const real_t a)
Add a value to an entry in the "current row". See SetColPtr().
SparseMatrix()
Create an empty SparseMatrix.
cusparseSpMatDescr_t matA_descr
real_t & Elem(int i, int j) override
Returns reference to a_{ij}.
virtual void PrintMathematica(std::ostream &out=mfem::out) const
Prints matrix as a SparseArray for importing into Mathematica.
void AbsMult(const Vector &x, Vector &y) const
y = |A| * x, using entry-wise absolute values of matrix A
void ClearGPUSparse()
Clear the cuSPARSE/hipSPARSE descriptors. This must be called after releasing the device memory of A.
void Threshold(real_t tol, bool fix_empty_rows=false)
Remove entries smaller in absolute value than a given tolerance tol. If fix_empty_rows is true,...
void SetSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void PrintMatlab(std::ostream &out=mfem::out) const override
Prints matrix in matlab format.
const int * ReadI(bool on_dev=true) const
real_t InnerProduct(const Vector &x, const Vector &y) const
Compute y^t A x.
Memory< int > I
Array with size (height+1) containing the row offsets.
real_t & operator()(int i, int j)
Returns reference to A[i][j].
const real_t * HostReadData() const
void PrintMM(std::ostream &out=mfem::out) const
Prints matrix in Matrix Market sparse format.
void ScaleColumns(const Vector &sr)
this = this * diag(sr);
void PrintCSR(std::ostream &out) const
Prints matrix to stream out in hypre_CSRMatrix format.
void Swap(SparseMatrix &other)
int * ReadWriteJ(bool on_dev=true)
cusparseDnVecDescr_t vecY_descr
void BooleanMultTranspose(const Array< int > &x, Array< int > &y) const
y = At * x, treating all entries as booleans (zero=false, nonzero=true).
RowNode ** Rows
Array of linked lists, one for every row. This array represents the linked list (LIL) storage format.
real_t & SearchRow(const int col)
Perform a fast search for an entry in the "current row". See SetColPtr().
static int SparseMatrixCount
void AddSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void Symmetrize()
(*this) = 1/2 ((*this) + (*this)^t)
void BooleanMult(const Array< int > &x, Array< int > &y) const
y = A * x, treating all entries as booleans (zero=false, nonzero=true).
void AddMultTranspose(const Vector &x, Vector &y, const real_t a=1.0) const override
y += At * x (default) or y += a * At * x
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
real_t GetJacobiScaling() const
Determine appropriate scaling for Jacobi iteration.
int RowSize(const int i) const
Returns the number of elements in row i.
real_t * HostReadWriteData()
int CountSmallElems(real_t tol) const
Count the number of entries with |a_ij| <= tol.
const int * ReadJ(bool on_dev=true) const
int MaxRowSize() const
Returns the maximum number of elements among all rows.
real_t _Get_(const int col) const
Read the value of an entry in the "current row". See SetColPtr().
void AddMult(const Vector &x, Vector &y, const real_t a=1.0) const override
y += A * x (default) or y += a * A * x
virtual ~SparseMatrix()
Destroys sparse matrix.
SparseMatrix & operator=(const SparseMatrix &rhs)
Assignment operator: deep copy.
void Mult(const Vector &x, Vector &y) const override
Matrix vector multiplication.
void OverrideSize(int height_, int width_)
Sets the height and width of the matrix.
static cusparseHandle_t handle
void EliminateRow(int row, const real_t sol, Vector &rhs)
Eliminates a column from the transpose matrix.
void GetBlocks(Array2D< SparseMatrix * > &blocks) const
SparseMatrix & operator+=(const SparseMatrix &B)
Add the sparse matrix 'B' to '*this'. This operation will cause an error if '*this' is finalized and ...
void Clear()
Clear the contents of the SparseMatrix.
void ResetTranspose() const
void EliminateRowColDiag(int rc, real_t value)
Perform elimination and set the diagonal entry to the given value.
real_t * GetRowEntries(const int row)
Return a pointer to the entries in a row.
void ScaleRows(const Vector &sl)
this = diag(sl) * this;
void SetRow(const int row, const Array< int > &cols, const Vector &srow)
void DiagScale(const Vector &b, Vector &x, real_t sc=1.0, bool use_abs_diag=false) const
x = sc b / A_ii. When use_abs_diag = true, |A_ii| is used.
real_t * GetData()
Return the element data, i.e. the array A.
void SetSubMatrixTranspose(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void BuildTranspose() const
Build and store internally the transpose of this matrix which will be used in the methods AddMultTran...
void SortColumnIndices()
Sort the column indices corresponding to each row.
void Finalize(int skip_zeros=1) override
Finalize the matrix initialization, switching the storage format from LIL to CSR.
real_t IsSymmetric() const
Returns max_{i,j} |(i,j)-(j,i)| for a finalized matrix.
DenseMatrix * ToDenseMatrix() const
Produces a DenseMatrix from a SparseMatrix.
void _Set_(const int col, const real_t a)
Set an entry in the "current row". See SetColPtr().
void EliminateZeroRows(const real_t threshold=1e-12) override
If a row contains only zeros, set its diagonal to 1.
void EliminateCols(const Array< int > &cols, const Vector *x=NULL, Vector *b=NULL)
Eliminate all columns i for which cols[i] != 0.
cusparseDnVecDescr_t vecX_descr
const real_t * ReadData(bool on_dev=true) const
void ScaleRow(const int row, const real_t scale)
void AddRow(const int row, const Array< int > &cols, const Vector &srow)
void Jacobi2(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
int ActualWidth() const
Returns the actual Width of the matrix.
const int * HostReadI() const
void Set(const int i, const int j, const real_t val)
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void Neg()
(*this) = -(*this)
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
int Size() const
Returns the size of the vector.
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
void SetSize(int s)
Resize the vector to size s.
void NewDataAndSize(real_t *d, int s)
Set the Vector data and size, deleting the old data, if owned.
real_t * GetData() const
Return a pointer to the beginning of the Vector data.
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
DenseMatrix * OuterProduct(const DenseMatrix &A, const DenseMatrix &B)
Produces a block matrix with blocks A_{ij}*B.
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
void Swap(Array< T > &, Array< T > &)
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
void mfem_error(const char *msg)
void Mult(const Table &A, const Table &B, Table &C)
C = A * B (as boolean matrices)
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory<T> &mem, int size, false)
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
bool IsFinite(const real_t &val)
void Transpose(const Table &A, Table &At, int ncols_A_)
Transpose a Table.
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
int CheckFinite(const real_t *v, const int n)
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory<T> &mem, int size, false)
SparseMatrix * MultAbstractSparseMatrix(const AbstractSparseMatrix &A, const AbstractSparseMatrix &B)
Matrix product of sparse matrices. A and B do not need to be CSR matrices.
void * HipMemFree(void *dptr)
Frees device memory.
SparseMatrix * TransposeAbstractSparseMatrix(const AbstractSparseMatrix &A, int useActualWidth)
Transpose of a sparse matrix. A does not need to be a CSR matrix.
SparseMatrix * Mult_AtDA(const SparseMatrix &A, const Vector &D, SparseMatrix *OAtDA)
Matrix multiplication A^t D A. All matrices must be finalized.
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
SparseMatrix * TransposeMult(const SparseMatrix &A, const SparseMatrix &B)
C = A^T B.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
void forall(int N, lambda &&body)
void forall_switch(bool use_dev, int N, lambda &&body)
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
void SparseMatrixFunction(SparseMatrix &S, real_t(*f)(real_t))
Applies f() to each element of the matrix (after it is finalized).
@ HIP_MASK
Biwise-OR of all HIP backends.
@ CPU_MASK
Biwise-OR of all CPU backends.
@ CUDA_MASK
Biwise-OR of all CUDA backends.