27#if defined(MFEM_USE_CUDA)
28#define MFEM_cu_or_hip(stub) cu##stub
29#define MFEM_Cu_or_Hip(stub) Cu##stub
30#define MFEM_CU_or_HIP(stub) CU##stub
31#define MFEM_CUDA_or_HIP(stub) CUDA##stub
33#if CUSPARSE_VERSION >= 11400
34#define MFEM_GPUSPARSE_ALG CUSPARSE_SPMV_CSR_ALG1
36#define MFEM_GPUSPARSE_ALG CUSPARSE_CSRMV_ALG1
39#elif defined(MFEM_USE_HIP)
40#define MFEM_cu_or_hip(stub) hip##stub
41#define MFEM_Cu_or_Hip(stub) Hip##stub
42#define MFEM_CU_or_HIP(stub) HIP##stub
43#define MFEM_CUDA_or_HIP(stub) HIP##stub
46#define MFEM_GPUSPARSE_ALG HIPSPARSE_CSRMV_ALG1
49#if defined(MFEM_USE_SINGLE)
50#define MFEM_CUDA_or_HIP_REAL_T MFEM_CUDA_or_HIP(_R_32F)
51#elif defined(MFEM_USE_DOUBLE)
52#define MFEM_CUDA_or_HIP_REAL_T MFEM_CUDA_or_HIP(_R_64F)
60#ifdef MFEM_USE_CUDA_OR_HIP
66#ifndef MFEM_CUDA_1897_WORKAROUND
75#ifdef MFEM_USE_CUDA_OR_HIP
91#ifdef MFEM_USE_CUDA_OR_HIP
94#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
95 MFEM_cu_or_hip(sparseDestroySpMat)(
matA_descr);
96 MFEM_cu_or_hip(sparseDestroyDnVec)(
vecX_descr);
97 MFEM_cu_or_hip(sparseDestroyDnVec)(
vecY_descr);
120 for (
int i = 0; i < nrows; i++)
125#ifdef MFEM_USE_MEMALLOC
144#ifdef MFEM_USE_MEMALLOC
152 bool ownij,
bool owna,
bool issorted)
163#ifdef MFEM_USE_MEMALLOC
175 for (
int ii=0; ii<nnz; ++ii)
192#ifdef MFEM_USE_MEMALLOC
196 J.
New(nrows * rowsize);
197 A.
New(nrows * rowsize);
199 for (
int i = 0; i <= nrows; i++)
233#ifdef MFEM_USE_MEMALLOC
239#ifdef MFEM_USE_MEMALLOC
243 for (
int i = 0; i <
height; i++)
246 for (
RowNode *node_p = mat.
Rows[i]; node_p; node_p = node_p->Prev)
248#ifdef MFEM_USE_MEMALLOC
253 new_node_p->
Value = node_p->Value;
254 new_node_p->
Column = node_p->Column;
255 *node_pp = new_node_p;
256 node_pp = &new_node_p->
Prev;
284#ifdef MFEM_USE_MEMALLOC
291 for (
int i = 0; i <=
height; i++)
296 for (
int r=0; r<
height; r++)
323 MFEM_ASSERT(master.
Finalized(),
"'master' must be finalized");
344#ifdef MFEM_USE_MEMALLOC
362 return I[gi+1]-
I[gi];
367 for ( ; row != NULL; row = row->
Prev)
368 if (row->
Value != 0.0)
381 for (
int i=0; i <
height; ++i)
383 rowSize =
I[i+1]-
I[i];
384 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
389 for (
int i=0; i <
height; ++i)
392 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
401 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
408 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
415 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
422 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
429 if (newWidth ==
width)
434 else if (newWidth == -1)
440 else if (newWidth >
width)
451 ColPtrJ =
static_cast<int *
>(NULL);
459 "The new width needs to be bigger or equal to the actual width");
467 MFEM_VERIFY(
Finalized(),
"Matrix is not Finalized!");
474#ifdef MFEM_USE_CUDA_OR_HIP
478 const int n =
Width();
480 const int *d_ia =
ReadI();
485 size_t pBufferSizeInBytes;
486 MFEM_cu_or_hip(sparseXcsrsort_bufferSizeExt)(
handle, m, n, nnzA, d_ia,
487 d_ja, &pBufferSizeInBytes);
488 void *pBuffer = MFEM_Cu_or_Hip(
MemAlloc)(&pBuffer, pBufferSizeInBytes);
493 MFEM_cu_or_hip(sparseCreateMatDescr)(&
matA_descr);
497 int *d_P = P.
Write();
498 mfem::forall(nnzA, [=] MFEM_HOST_DEVICE (
int i) { d_P[i] = i; });
507 void *d_a_unsorted = MFEM_Cu_or_Hip(
MemAlloc)(&d_a_unsorted,
509 MFEM_Cu_or_Hip(MemcpyDtoD)(d_a_unsorted, d_a, nnzA *
sizeof(
real_t));
512 MFEM_cu_or_hip(sparseDnVecDescr_t) d_a_dense;
513 MFEM_cu_or_hip(sparseCreateDnVec)(&d_a_dense, nnzA, d_a_unsorted,
514 MFEM_CUDA_or_HIP_REAL_T);
517 MFEM_cu_or_hip(sparseSpVecDescr_t) d_a_sparse;
518 MFEM_cu_or_hip(sparseCreateSpVec)(&d_a_sparse, nnzA, nnzA, d_P, d_a,
519 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
520 MFEM_CU_or_HIP(SPARSE_INDEX_BASE_ZERO),
521 MFEM_CUDA_or_HIP_REAL_T);
524 MFEM_cu_or_hip(sparseGather)(
handle, d_a_dense, d_a_sparse);
530 MFEM_cu_or_hip(sparseDestroyDnVec)(d_a_dense);
531 MFEM_cu_or_hip(sparseDestroySpVec)(d_a_sparse);
532 MFEM_cu_or_hip(sparseDestroyMatDescr)(
matA_descr);
534 MFEM_Cu_or_Hip(MemFree)(d_a_unsorted);
535 MFEM_Cu_or_Hip(MemFree)(pBuffer);
545 for (
int j = 0, i = 0; i <
height; i++)
549 for (
int k = 0; k < row.
Size(); k++)
555 for (
int k = 0; k < row.
Size(); k++, j++)
567 MFEM_VERIFY(
Finalized(),
"Matrix is not Finalized!");
569 for (
int row = 0, end = 0; row <
height; row++)
573 for (j = start;
true; j++)
575 MFEM_VERIFY(j < end,
"diagonal entry not found in row = " << row);
576 if (
J[j] == row) {
break; }
579 for ( ; j > start; j--)
601 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
602 "Trying to access element outside of the matrix. "
603 <<
"height = " <<
height <<
", "
604 <<
"width = " <<
width <<
", "
605 <<
"i = " << i <<
", "
608 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
610 for (
int k =
I[i], end =
I[i+1]; k < end; k++)
618 MFEM_ABORT(
"Did not find i = " << i <<
", j = " << j <<
" in matrix.");
624 static const real_t zero = 0.0;
626 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
627 "Trying to access element outside of the matrix. "
628 <<
"height = " <<
height <<
", "
629 <<
"width = " <<
width <<
", "
630 <<
"i = " << i <<
", "
638 for (
int k =
I[i], end =
I[i+1]; k < end; k++)
650 if (node_p->Column == j)
652 return node_p->Value;
662 MFEM_VERIFY(
height ==
width,
"Matrix must be square, not height = "
664 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
668 const auto II = this->
ReadI();
669 const auto JJ = this->
ReadJ();
675 const int begin = II[i];
676 const int end = II[i+1];
678 for (j = begin; j < end; j++)
696 int num_rows = this->
Height();
697 int num_cols = this->
Width();
712 for (
int r=0; r<
height; r++)
717 for (
int cj=0; cj<this->
RowSize(r); cj++)
719 B(r, col[cj]) = val[cj];
733 MFEM_ASSERT(
width == x.
Size(),
"Input vector size (" << x.
Size()
734 <<
") must match matrix width (" <<
width <<
")");
735 MFEM_ASSERT(
height == y.
Size(),
"Output vector size (" << y.
Size()
736 <<
") must match matrix height (" <<
height <<
")");
744 for (
int i = 0; i <
height; i++)
748 for ( ; row != NULL; row = row->
Prev)
758#ifndef MFEM_USE_LEGACY_OPENMP
762 auto d_J =
Read(
J, nnz);
763 auto d_A =
Read(
A, nnz);
768 if (nnz == 0) {
return;}
771#ifdef MFEM_USE_CUDA_OR_HIP
778#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
780 MFEM_cu_or_hip(sparseCreateCsr)(
784 const_cast<int *
>(d_I),
785 const_cast<int *
>(d_J),
786 const_cast<real_t *
>(d_A),
787 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
788 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
789 MFEM_CU_or_HIP(SPARSE_INDEX_BASE_ZERO),
790 MFEM_CUDA_or_HIP_REAL_T);
793 MFEM_cu_or_hip(sparseCreateDnVec)(&
vecX_descr,
795 const_cast<real_t *
>(d_x),
796 MFEM_CUDA_or_HIP_REAL_T);
798 MFEM_CUDA_or_HIP_REAL_T);
801 cusparseSetMatIndexBase(
matA_descr, CUSPARSE_INDEX_BASE_ZERO);
802 cusparseSetMatType(
matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
807 size_t newBufferSize = 0;
809 MFEM_cu_or_hip(sparseSpMV_bufferSize)(
811 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
817 MFEM_CUDA_or_HIP_REAL_T,
829#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
831 MFEM_cu_or_hip(sparseDnVecSetValues)(
vecX_descr,
832 const_cast<real_t *
>(d_x));
833 MFEM_cu_or_hip(sparseDnVecSetValues)(
vecY_descr, d_y);
836 MFEM_cu_or_hip(sparseSpMV)(
838 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
844 MFEM_CUDA_or_HIP_REAL_T,
848#ifdef MFEM_USE_SINGLE
853 CUSPARSE_OPERATION_NON_TRANSPOSE,
859 const_cast<real_t *
>(d_A),
860 const_cast<int *
>(d_I),
861 const_cast<int *
>(d_J),
862 const_cast<real_t *
>(d_x),
874 const int end = d_I[i+1];
875 for (
int j = d_I[i]; j < end; j++)
877 d += d_A[j] * d_x[d_J[j]];
887 const int *Jp =
J, *Ip =
I;
889 #pragma omp parallel for
890 for (
int i = 0; i <
height; i++)
893 const int end = Ip[i+1];
894 for (
int j = Ip[i]; j < end; j++)
896 d += Ap[j] * xp[Jp[j]];
914 <<
") must match matrix height (" <<
height <<
")");
915 MFEM_ASSERT(
width == y.
Size(),
"Output vector size (" << y.
Size()
916 <<
") must match matrix width (" <<
width <<
")");
923 for (
int i = 0; i <
height; i++)
927 for ( ; row != NULL; row = row->
Prev)
946 const int nnz = Ip[
height];
950 for (
int i = 0; i <
height; i++)
953 const int end = Ip[i+1];
954 for (
int j = Ip[i]; j < end; j++)
956 const int Jj = Jp[j];
957 yp[Jj] += Ap[j] * xi;
988 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
990 const int n = rows.
Size();
992 auto d_rows = rows.
Read();
994 auto d_J =
Read(
J, nnz);
995 auto d_A =
Read(
A, nnz);
997 auto d_y = y.
Write();
1000 const int r = d_rows[i];
1001 const int end = d_I[r + 1];
1003 for (
int j = d_I[r]; j < end; j++)
1005 a += d_A[j] * d_x[d_J[j]];
1014 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1016 for (
int i = 0; i < rows.
Size(); i++)
1021 for (
int j =
I[r]; j < end; j++)
1023 val +=
A[j] * x(
J[j]);
1031 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1032 MFEM_ASSERT(x.
Size() ==
Width(),
"Input vector size (" << x.
Size()
1033 <<
") must match matrix width (" <<
Width() <<
")");
1040 auto d_J =
Read(
J, nnz);
1046 const int end = d_I[i+1];
1047 for (
int j = d_I[i]; j < end; j++)
1062 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1063 MFEM_ASSERT(x.
Size() ==
Height(),
"Input vector size (" << x.
Size()
1064 <<
") must match matrix height (" <<
Height() <<
")");
1069 for (
int i = 0; i <
Height(); i++)
1074 for (
int j =
I[i]; j < end; j++)
1084 MFEM_ASSERT(
width == x.
Size(),
"Input vector size (" << x.
Size()
1085 <<
") must match matrix width (" <<
width <<
")");
1086 MFEM_ASSERT(
height == y.
Size(),
"Output vector size (" << y.
Size()
1087 <<
") must match matrix height (" <<
height <<
")");
1098 for (
int i = 0; i <
height; i++)
1102 for ( ; row != NULL; row = row->
Prev)
1115 auto d_J =
Read(
J, nnz);
1116 auto d_A =
Read(
A, nnz);
1117 auto d_x = x.
Read();
1122 const int end = d_I[i+1];
1123 for (
int j = d_I[i]; j < end; j++)
1125 d += std::abs(d_A[j]) * d_x[d_J[j]];
1133 MFEM_ASSERT(
height == x.
Size(),
"Input vector size (" << x.
Size()
1134 <<
") must match matrix height (" <<
height <<
")");
1135 MFEM_ASSERT(
width == y.
Size(),
"Output vector size (" << y.
Size()
1136 <<
") must match matrix width (" <<
width <<
")");
1144 for (
int i = 0; i <
height; i++)
1148 for ( ; row != NULL; row = row->
Prev)
1163 for (
int i = 0; i <
height; i++)
1166 const int end =
I[i+1];
1167 for (
int j =
I[i]; j < end; j++)
1169 const int Jj =
J[j];
1170 y[Jj] += std::abs(
A[j]) * xi;
1179 <<
" must be equal to Width() = " <<
Width());
1181 <<
" must be equal to Height() = " <<
Height());
1194 for (
int i = 0; i <
height; i++)
1199 for (
int j =
I[i], end =
I[i+1]; j < end; j++)
1201 a +=
A[j] * x(
J[j]);
1208 a += np->Value * x(np->Column);
1223 auto d_x = x.
Write();
1227 for (
int j = d_I[i], end = d_I[i+1]; j < end; j++)
1236 for (
int i = 0; i <
height; i++)
1250 MFEM_VERIFY(irow <
height,
1251 "row " << irow <<
" not in matrix with height " <<
height);
1256 for (
int j =
I[irow], end =
I[irow+1]; j < end; j++)
1265 a += fabs(np->Value);
1274 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1276 atol = std::abs(tol);
1278 fix_empty_rows =
height ==
width ? fix_empty_rows :
false;
1286 for (i = 0, nz = 0; i <
height; i++)
1289 for (j =
I[i]; j <
I[i+1]; j++)
1290 if (std::abs(
A[j]) > atol)
1295 if (fix_empty_rows && !found) { nz++; }
1303 for (i = 0, nz = 0; i <
height; i++)
1307 for (j =
I[i]; j <
I[i+1]; j++)
1308 if (std::abs(
A[j]) > atol)
1313 if ( lastCol > newJ[nz] )
1320 if (fix_empty_rows && !found)
1348 for (i = 1; i <=
height; i++)
1351 for (aux =
Rows[i-1]; aux != NULL; aux = aux->
Prev)
1353 if (skip_zeros && aux->
Value == 0.0)
1355 if (skip_zeros == 2) {
continue; }
1356 if ((i-1) != aux->
Column) {
continue; }
1362 if (other->Column == (i-1))
1365 found_val = other->Value;
1369 if (found && found_val == 0.0) {
continue; }
1374 if (fix_empty_rows && !nr) { nr = 1; }
1383 for (j = i = 0; i <
height; i++)
1387 for (aux =
Rows[i]; aux != NULL; aux = aux->
Prev)
1389 if (skip_zeros && aux->
Value == 0.0)
1391 if (skip_zeros == 2) {
continue; }
1392 if (i != aux->
Column) {
continue; }
1398 if (other->Column == i)
1401 found_val = other->Value;
1405 if (found && found_val == 0.0) {
continue; }
1411 if ( lastCol >
J[j] )
1420 if (fix_empty_rows && !nr)
1428#ifdef MFEM_USE_MEMALLOC
1432 for (i = 0; i <
height; i++)
1435 while (node_p != NULL)
1438 node_p = node_p->
Prev;
1451 int nr = (
height + br - 1)/br, nc = (
width + bc - 1)/bc;
1453 for (
int j = 0; j < bc; j++)
1455 for (
int i = 0; i < br; i++)
1458 for (
int k = 0; k <= nr; k++)
1462 blocks(i,j) =
new SparseMatrix(bI, NULL, NULL, nr, nc);
1466 for (
int gr = 0; gr <
height; gr++)
1468 int bi = gr/nr, i = gr%nr + 1;
1471 for (
int j =
I[gr]; j <
I[gr+1]; j++)
1475 blocks(bi,
J[j]/nc)->I[i]++;
1483 if (n_p->Value != 0.0)
1485 blocks(bi, n_p->Column/nc)->I[i]++;
1491 for (
int j = 0; j < bc; j++)
1493 for (
int i = 0; i < br; i++)
1497 for (
int k = 1; k <= nr; k++)
1499 rs =
b.I[k],
b.I[k] = nnz, nnz += rs;
1506 for (
int gr = 0; gr <
height; gr++)
1508 int bi = gr/nr, i = gr%nr + 1;
1511 for (
int j =
I[gr]; j <
I[gr+1]; j++)
1516 b.J[
b.I[i]] =
J[j] % nc;
1526 if (n_p->Value != 0.0)
1529 b.J[
b.I[i]] = n_p->Column % nc;
1530 b.A[
b.I[i]] = n_p->Value;
1552 for (
int i = 1; i <
height; i++)
1554 for (
int j =
I[i]; j <
I[i+1]; j++)
1558 symm = std::max(symm, std::abs(
A[j]-(*
this)(
J[j],i)));
1565 for (
int i = 0; i <
height; i++)
1567 for (
RowNode *node_p =
Rows[i]; node_p != NULL; node_p = node_p->
Prev)
1569 int col = node_p->Column;
1572 symm = std::max(symm, std::abs(node_p->Value-(*
this)(col,i)));
1582 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1585 for (i = 1; i <
height; i++)
1587 for (j =
I[i]; j <
I[i+1]; j++)
1591 A[j] += (*this)(
J[j],i);
1593 (*this)(
J[j],i) =
A[j];
1610 for (
int i = 0; i <
height; i++)
1612 for (
RowNode *node_p =
Rows[i]; node_p != NULL; node_p = node_p->
Prev)
1629 for (
int j = 0; j < nnz; j++)
1631 m = std::max(m, std::abs(
A[j]));
1636 for (
int i = 0; i <
height; i++)
1640 m = std::max(m, std::abs(n_p->Value));
1656 for (
int i = 0; i < nz; i++)
1658 counter += (std::abs(Ap[i]) <= tol);
1663 for (
int i = 0; i <
height; i++)
1667 counter += (std::abs(aux->Value) <= tol);
1688 for (
int i = 0; i <
height; i++)
1708 MFEM_ASSERT(row < height && row >= 0,
1709 "Row " << row <<
" not in matrix of height " <<
height);
1711 MFEM_VERIFY(!
Finalized(),
"Matrix must NOT be finalized.");
1713 for (aux =
Rows[row]; aux != NULL; aux = aux->
Prev)
1724 MFEM_ASSERT(row < height && row >= 0,
1725 "Row " << row <<
" not in matrix of height " <<
height);
1726 MFEM_ASSERT(dpolicy !=
DIAG_KEEP,
"Diagonal policy must not be DIAG_KEEP");
1728 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1733 for (
int i=
I[row]; i <
I[row+1]; ++i)
1740 for (aux =
Rows[row]; aux != NULL; aux = aux->
Prev)
1754 MFEM_ASSERT(col < width && col >= 0,
1755 "Col " << col <<
" not in matrix of width " <<
width);
1756 MFEM_ASSERT(dpolicy !=
DIAG_KEEP,
"Diagonal policy must not be DIAG_KEEP");
1758 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1764 for (
int jpos = 0; jpos != nnz; ++jpos)
1774 for (
int i = 0; i <
height; i++)
1778 if (aux->Column == col)
1798 for (
int i = 0; i <
height; i++)
1800 for (
int jpos =
I[i]; jpos !=
I[i+1]; ++jpos)
1806 (*b)(i) -=
A[jpos] * (*x)(
J[jpos] );
1815 for (
int i = 0; i <
height; i++)
1819 if (cols[aux -> Column])
1823 (*b)(i) -= aux -> Value * (*x)(aux -> Column);
1837 for (
int row = 0; row <
height; row++)
1839 for (nd =
Rows[row]; nd != NULL; nd = nd->
Prev)
1841 if (col_marker[nd->
Column])
1851 for (
int row = 0; row <
height; row++)
1853 for (
int j =
I[row]; j <
I[row+1]; j++)
1855 if (col_marker[
J[j]])
1857 Ae.
Add(row,
J[j],
A[j]);
1869 MFEM_ASSERT(rc < height && rc >= 0,
1870 "Row " << rc <<
" not in matrix of height " <<
height);
1877 for (
int j =
I[rc]; j <
I[rc+1]; j++)
1879 const int col =
J[j];
1885 rhs(rc) =
A[j] * sol;
1896 mfem_error(
"SparseMatrix::EliminateRowCol () #2");
1903 for (
int k =
I[col]; 1; k++)
1907 mfem_error(
"SparseMatrix::EliminateRowCol () #3");
1909 else if (
J[k] == rc)
1911 rhs(col) -= sol *
A[k];
1923 const int col = aux->Column;
1929 rhs(rc) = aux->Value * sol;
1940 mfem_error(
"SparseMatrix::EliminateRowCol () #4");
1951 mfem_error(
"SparseMatrix::EliminateRowCol () #5");
1953 else if (node->Column == rc)
1955 rhs(col) -= sol * node->Value;
1969 MFEM_ASSERT(rc < height && rc >= 0,
1970 "Row " << rc <<
" not in matrix of height " <<
height);
1971 MFEM_ASSERT(sol.
Size() == rhs.
Width(),
"solution size (" << sol.
Size()
1972 <<
") must match rhs width (" << rhs.
Width() <<
")");
1974 const int num_rhs = rhs.
Width();
1977 for (
int j =
I[rc]; j <
I[rc+1]; j++)
1979 const int col =
J[j];
1985 for (
int r = 0; r < num_rhs; r++)
1987 rhs(rc,r) =
A[j] * sol(r);
1992 for (
int r = 0; r < num_rhs; r++)
1999 for (
int r = 0; r < num_rhs; r++)
2005 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #3");
2012 for (
int k =
I[col]; 1; k++)
2016 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #4");
2018 else if (
J[k] == rc)
2020 for (
int r = 0; r < num_rhs; r++)
2022 rhs(col,r) -= sol(r) *
A[k];
2035 const int col = aux->Column;
2041 for (
int r = 0; r < num_rhs; r++)
2043 rhs(rc,r) = aux->Value * sol(r);
2048 for (
int r = 0; r < num_rhs; r++)
2055 for (
int r = 0; r < num_rhs; r++)
2061 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #5");
2072 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #6");
2074 else if (node->Column == rc)
2076 for (
int r = 0; r < num_rhs; r++)
2078 rhs(col,r) -= sol(r) * node->Value;
2091 MFEM_ASSERT(rc < height && rc >= 0,
2092 "Row " << rc <<
" not in matrix of height " <<
height);
2096 const auto &II = this->
I;
2097 const auto &JJ = this->
J;
2098 for (
int j = II[rc]; j < II[rc+1]; j++)
2100 const int col = JJ[j];
2115 for (
int k = II[col]; 1; k++)
2119 mfem_error(
"SparseMatrix::EliminateRowCol() #2");
2121 else if (JJ[k] == rc)
2134 for (aux =
Rows[rc]; aux != NULL; aux = aux->
Prev)
2136 const int col = aux->
Column;
2151 for (node =
Rows[col]; 1; node = node->
Prev)
2155 mfem_error(
"SparseMatrix::EliminateRowCol() #3");
2157 else if (node->
Column == rc)
2172 MFEM_ASSERT(rc < height && rc >= 0,
2173 "Row " << rc <<
" not in matrix of height " <<
height);
2177 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2179 const int col =
J[j];
2187 for (
int k =
I[col]; 1; k++)
2191 mfem_error(
"SparseMatrix::EliminateRowCol() #2");
2193 else if (
J[k] == rc)
2206 for (aux =
Rows[rc]; aux != NULL; aux = aux->
Prev)
2208 const int col = aux->
Column;
2216 for (node =
Rows[col]; 1; node = node->
Prev)
2220 mfem_error(
"SparseMatrix::EliminateRowCol() #3");
2222 else if (node->
Column == rc)
2239 for (nd =
Rows[rc]; nd != NULL; nd = nd->
Prev)
2241 const int col = nd->
Column;
2257 mfem_error(
"SparseMatrix::EliminateRowCol #1");
2265 for (nd2 =
Rows[col]; 1; nd2 = nd2->
Prev)
2269 mfem_error(
"SparseMatrix::EliminateRowCol #2");
2271 else if (nd2->
Column == rc)
2283 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2285 const int col =
J[j];
2291 Ae.
Add(rc, rc,
A[j] - 1.0);
2295 Ae.
Add(rc, rc,
A[j]);
2301 mfem_error(
"SparseMatrix::EliminateRowCol #3");
2307 Ae.
Add(rc, col,
A[j]);
2309 for (
int k =
I[col];
true; k++)
2313 mfem_error(
"SparseMatrix::EliminateRowCol #4");
2315 else if (
J[k] == rc)
2317 Ae.
Add(col, rc,
A[k]);
2330 const int n_ess_dofs = ess_dofs.
Size();
2331 const auto ess_dofs_d = ess_dofs.
Read();
2332 const auto dI =
ReadI();
2333 const auto dJ =
ReadJ();
2338 const int idof = ess_dofs_d[i];
2339 for (
int j=dI[idof]; j<dI[idof+1]; ++j)
2341 const int jdof = dJ[j];
2345 for (
int k=dI[jdof]; k<dI[jdof+1]; ++k)
2372 for (
int i = 0; i <
height; i++)
2374 if (
I[i+1] ==
I[i]+1 && fabs(
A[
I[i]]) < 1e-16)
2383 for (
int i = 0; i <
height; i++)
2386 for (
int j =
I[i]; j <
I[i+1]; j++)
2390 if (zero <= threshold)
2392 for (
int j =
I[i]; j <
I[i+1]; j++)
2394 A[j] = (
J[j] == i) ? 1.0 : 0.0;
2409 for (
int i = 0; i < s; i++)
2413 for (n_p = R[i]; n_p != NULL; n_p = n_p->
Prev)
2415 const int c = n_p->
Column;
2422 sum += n_p->
Value * yp[c];
2426 if (diag_p != NULL && diag_p->
Value != 0.0)
2428 yp[i] = (xp[i] - sum) / diag_p->
Value;
2430 else if (xp[i] == sum)
2436 mfem_error(
"SparseMatrix::Gauss_Seidel_forw()");
2450 for (
int i = 0, j = Ip[0]; i < s; i++)
2452 const int end = Ip[i+1];
2455 for ( ; j < end; j++)
2457 const int c = Jp[j];
2464 sum += Ap[j] * yp[c];
2468 if (d >= 0 && Ap[d] != 0.0)
2470 yp[i] = (xp[i] - sum) / Ap[d];
2472 else if (xp[i] == sum)
2478 mfem_error(
"SparseMatrix::Gauss_Seidel_forw(...) #2");
2492 for (
int i =
height-1; i >= 0; i--)
2496 for (n_p = R[i]; n_p != NULL; n_p = n_p->
Prev)
2498 const int c = n_p->
Column;
2505 sum += n_p->
Value * yp[c];
2509 if (diag_p != NULL && diag_p->
Value != 0.0)
2511 yp[i] = (xp[i] - sum) / diag_p->
Value;
2513 else if (xp[i] == sum)
2519 mfem_error(
"SparseMatrix::Gauss_Seidel_back()");
2533 for (
int i = s-1, j = Ip[s]-1; i >= 0; i--)
2535 const int beg = Ip[i];
2538 for ( ; j >= beg; j--)
2540 const int c = Jp[j];
2547 sum += Ap[j] * yp[c];
2551 if (d >= 0 && Ap[d] != 0.0)
2553 yp[i] = (xp[i] - sum) / Ap[d];
2555 else if (xp[i] == sum)
2561 mfem_error(
"SparseMatrix::Gauss_Seidel_back(...) #2");
2569 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2572 for (
int i = 0; i <
height; i++)
2576 for (
int j =
I[i]; j <
I[i+1]; j++)
2584 if (d >= 0 &&
A[d] != 0.0)
2594 mfem_error(
"SparseMatrix::GetJacobiScaling() #2");
2601 real_t sc,
bool use_abs_diag)
const
2603 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2605 for (
int i = 0; i <
height; i++)
2609 for (
int j =
I[i]; j <
I[i+1]; j++)
2617 sum -=
A[j] * x0(
J[j]);
2620 if (d >= 0 &&
A[d] != 0.0)
2622 const real_t diag = (use_abs_diag) ? fabs(
A[d]) :
A[d];
2623 x1(i) = sc * (sum / diag) + (1.0 - sc) * x0(i);
2633 real_t sc,
bool use_abs_diag)
const
2635 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2639 const bool use_dev =
b.UseDevice() || x.
UseDevice();
2641 const auto Ap =
Read(
A, nnz, use_dev);
2643 const auto Jp =
Read(
J, nnz, use_dev);
2645 const auto bp =
b.Read(use_dev);
2646 auto xp = x.
Write(use_dev);
2650 const int end = Ip[i+1];
2651 for (
int j = Ip[i];
true; j++)
2655 MFEM_ABORT_KERNEL(
"Diagonal not found in SparseMatrix::DiagScale");
2659 const real_t diag = (use_abs_diag) ? fabs(Ap[j]) : Ap[j];
2662 MFEM_ABORT_KERNEL(
"Zero diagonal in SparseMatrix::DiagScale");
2664 xp[i] = sc * bp[i] / diag;
2671template <
bool useFabs>
2679 const auto bp =
b.Read(useDevice);
2680 const auto x0p = x0.
Read(useDevice);
2681 auto x1p = x1.
Write(useDevice);
2683 const auto Ip =
Read(I, height+1, useDevice);
2690 for (
int j = Ip[i]; j < Ip[i+1]; j++)
2692 resi -= Ap[j] * x0p[Jp[j]];
2695 norm += fabs(Ap[j]);
2704 x1p[i] = x0p[i] + sc * resi /
norm;
2710 MFEM_ABORT_KERNEL(
"L1 norm of row is zero.");
2714 MFEM_ABORT_KERNEL(
"sum of row is zero.");
2723 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2724 JacobiDispatch<true>(
b,x0,x1,
I,
J,
A,
height,sc);
2730 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2731 JacobiDispatch<false>(
b,x0,x1,
I,
J,
A,
height,sc);
2737 int i, j, gi, gj, s, t;
2747 for (i = 0; i < rows.
Size(); i++)
2749 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2752 "Trying to insert a row " << gi <<
" outside the matrix height "
2755 for (j = 0; j < cols.
Size(); j++)
2757 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2759 MFEM_ASSERT(gj <
width,
2760 "Trying to insert a column " << gj <<
" outside the matrix width "
2763 if (skip_zeros &&
a == 0.0)
2768 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2773 if (t < 0) {
a = -
a; }
2785 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2788 "Trying to set a row " << gi <<
" outside the matrix height "
2790 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2792 MFEM_ASSERT(gj <
width,
2793 "Trying to set a column " << gj <<
" outside the matrix width "
2795 if (t < 0) {
a = -
a; }
2804 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2807 "Trying to insert a row " << gi <<
" outside the matrix height "
2809 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2811 MFEM_ASSERT(gj <
width,
2812 "Trying to insert a column " << gj <<
" outside the matrix width "
2814 if (t < 0) {
a = -
a; }
2821 int i, j, gi, gj, s, t;
2824 for (i = 0; i < rows.
Size(); i++)
2826 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2829 "Trying to set a row " << gi <<
" outside the matrix height "
2832 for (j = 0; j < cols.
Size(); j++)
2835 if (skip_zeros &&
a == 0.0)
2840 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2845 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2847 MFEM_ASSERT(gj <
width,
2848 "Trying to set a column " << gj <<
" outside the matrix width "
2850 if (t < 0) {
a = -
a; }
2862 int i, j, gi, gj, s, t;
2865 for (i = 0; i < rows.
Size(); i++)
2867 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2870 "Trying to set a row " << gi <<
" outside the matrix height "
2873 for (j = 0; j < cols.
Size(); j++)
2876 if (skip_zeros &&
a == 0.0)
2881 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2886 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2888 MFEM_ASSERT(gj <
width,
2889 "Trying to set a column " << gj <<
" outside the matrix width "
2891 if (t < 0) {
a = -
a; }
2901 int i, j, gi, gj, s, t;
2904 for (i = 0; i < rows.
Size(); i++)
2906 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2909 "Trying to read a row " << gi <<
" outside the matrix height "
2912 for (j = 0; j < cols.
Size(); j++)
2914 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2916 MFEM_ASSERT(gj <
width,
2917 "Trying to read a column " << gj <<
" outside the matrix width "
2920 subm(i, j) = (t < 0) ? (-
a) : (
a);
2935 "Trying to query a row " << gi <<
" outside the matrix height "
2939 return (
Rows[gi] == NULL);
2943 return (
I[gi] ==
I[gi+1]);
2952 if ((gi=row) < 0) { gi = -1-gi; }
2954 "Trying to read a row " << gi <<
" outside the matrix height "
2958 for (n =
Rows[gi], j = 0; n; n = n->
Prev)
2964 for (n =
Rows[gi], j = 0; n; n = n->
Prev, j++)
2979 cols.
MakeRef(
const_cast<int*
>((
const int*)
J) + j,
I[gi+1]-j);
2982 MFEM_ASSERT(row >= 0,
"Row not valid: " << row <<
", height: " <<
height);
2993 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
2996 "Trying to set a row " << gi <<
" outside the matrix height "
3002 for (
int j = 0; j < cols.
Size(); j++)
3004 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3006 MFEM_ASSERT(gj <
width,
3007 "Trying to set a column " << gj <<
" outside the matrix"
3008 " width " <<
width);
3010 if (t < 0) {
a = -
a; }
3018 MFEM_ASSERT(cols.
Size() == srow.
Size(),
"");
3020 for (
int i =
I[gi], j = 0; j < cols.
Size(); j++, i++)
3022 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3024 MFEM_ASSERT(gj <
width,
3025 "Trying to set a column " << gj <<
" outside the matrix"
3026 " width " <<
width);
3037 int j, gi, gj, s, t;
3040 MFEM_VERIFY(!
Finalized(),
"Matrix must NOT be finalized.");
3042 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
3045 "Trying to insert a row " << gi <<
" outside the matrix height "
3048 for (j = 0; j < cols.
Size(); j++)
3050 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3052 MFEM_ASSERT(gj <
width,
3053 "Trying to insert a column " << gj <<
" outside the matrix width "
3060 if (t < 0) {
a = -
a; }
3078 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3080 aux -> Value *= scale;
3085 int j, end =
I[i+1];
3087 for (j =
I[i]; j < end; j++)
3100 for (
int i=0; i <
height; ++i)
3103 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3105 aux -> Value *= scale;
3113 for (
int i=0; i <
height; ++i)
3117 for (j =
I[i]; j < end; j++)
3130 for (
int i=0; i <
height; ++i)
3132 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3134 aux -> Value *= sr(aux->
Column);
3142 for (
int i=0; i <
height; ++i)
3145 for (j =
I[i]; j < end; j++)
3156 "Mismatch of this matrix size and rhs. This height = "
3157 <<
height <<
", width = " <<
width <<
", B.height = "
3160 for (
int i = 0; i <
height; i++)
3165 for (
RowNode *aux = B.
Rows[i]; aux != NULL; aux = aux->Prev)
3167 _Add_(aux->Column, aux->Value);
3172 for (
int j = B.
I[i]; j < B.
I[i+1]; j++)
3185 for (
int i = 0; i <
height; i++)
3192 np->Value +=
a * B.
_Get_(np->Column);
3197 for (
int j =
I[i]; j <
I[i+1]; j++)
3212 for (
int i = 0; i < nnz; i++)
3219 for (
int i = 0; i <
height; i++)
3222 node_p = node_p -> Prev)
3224 node_p -> Value =
a;
3236 for (
int i = 0, nnz =
I[
height]; i < nnz; i++)
3243 for (
int i = 0; i <
height; i++)
3246 node_p = node_p -> Prev)
3248 node_p -> Value *=
a;
3263 for (i = 0; i <
height; i++)
3265 os <<
"[row " << i <<
"]\n";
3266 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3268 os <<
" (" << nd->
Column <<
"," << nd->
Value <<
")";
3269 if ( !((j+1) % width_) )
3286 for (i = 0; i <
height; i++)
3288 os <<
"[row " << i <<
"]\n";
3289 for (j =
I[i]; j <
I[i+1]; j++)
3291 os <<
" (" <<
J[j] <<
"," <<
A[j] <<
")";
3292 if ( !((j+1-
I[i]) % width_) )
3297 if ((j-
I[i]) % width_)
3306 os <<
"% size " <<
height <<
" " <<
width <<
"\n";
3310 ios::fmtflags old_fmt = os.flags();
3311 os.setf(ios::scientific);
3312 std::streamsize old_prec = os.precision(14);
3317 for (i = 0; i <
height; i++)
3319 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3321 os << i+1 <<
" " << nd->
Column+1 <<
" " << nd->
Value <<
'\n';
3331 for (i = 0; i <
height; i++)
3333 for (j =
I[i]; j <
I[i+1]; j++)
3335 os << i+1 <<
" " <<
J[j]+1 <<
" " <<
A[j] <<
'\n';
3341 os.precision(old_prec);
3348 ios::fmtflags old_fmt = os.flags();
3349 os.setf(ios::scientific);
3350 std::streamsize old_prec = os.precision(14);
3352 os <<
"(* Read file into Mathematica using: "
3353 <<
"myMat = Get[\"this_file_name\"] *)\n";
3354 os <<
"SparseArray[";
3361 for (i = 0; i <
height; i++)
3363 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++, c++)
3365 os <<
"{"<< i+1 <<
", " << nd->
Column+1
3366 <<
"} -> Internal`StringToMReal[\"" << nd->
Value <<
"\"]";
3381 for (i = 0; i <
height; i++)
3383 for (j =
I[i]; j <
I[i+1]; j++, c++)
3385 os <<
"{" << i+1 <<
", " <<
J[j]+1
3386 <<
"} -> Internal`StringToMReal[\"" <<
A[j] <<
"\"]";
3396 os.precision(old_prec);
3403 ios::fmtflags old_fmt = os.flags();
3404 os.setf(ios::scientific);
3405 std::streamsize old_prec = os.precision(14);
3407 os <<
"%%MatrixMarket matrix coordinate real general" <<
'\n'
3408 <<
"% Generated by MFEM" <<
'\n';
3415 for (i = 0; i <
height; i++)
3417 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3419 os << i+1 <<
" " << nd->
Column+1 <<
" " << nd->
Value <<
'\n';
3429 for (i = 0; i <
height; i++)
3431 for (j =
I[i]; j <
I[i+1]; j++)
3433 os << i+1 <<
" " <<
J[j]+1 <<
" " <<
A[j] <<
'\n';
3437 os.precision(old_prec);
3443 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
3453 for (i = 0; i <=
height; i++)
3455 os <<
I[i]+1 <<
'\n';
3458 for (i = 0; i <
I[
height]; i++)
3460 os <<
J[i]+1 <<
'\n';
3463 for (i = 0; i <
I[
height]; i++)
3471 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
3476 os <<
width <<
'\n';
3482 for (i = 0; i <=
height; i++)
3487 for (i = 0; i <
I[
height]; i++)
3492 for (i = 0; i <
I[
height]; i++)
3500 const real_t MiB = 1024.*1024;
3512 "SparseMatrix statistics:\n"
3515 " Dimensions : " <<
height <<
" x " <<
width <<
"\n"
3516 " Number of entries (total) : " << nnz <<
"\n"
3517 " Number of entries (per row) : " << 1.*nnz/
Height() <<
"\n"
3518 " Number of stored zeros : " << nz*pz <<
"% (" << nz <<
")\n"
3519 " Number of Inf/Nan entries : " << nnf*pz <<
"% ("<< nnf <<
")\n"
3520 " Norm, max |a_ij| : " << max_norm <<
"\n"
3521 " Symmetry, max |a_ij-a_ji| : " << symm <<
"\n"
3522 " Number of small entries:\n"
3523 " |a_ij| <= 1e-12*Norm : " << ns12*pz <<
"% (" << ns12 <<
")\n"
3524 " |a_ij| <= 1e-15*Norm : " << ns15*pz <<
"% (" << ns15 <<
")\n"
3525 " |a_ij| <= 1e-18*Norm : " << ns18*pz <<
"% (" << ns18 <<
")\n";
3528 os <<
" Memory used by CSR : " <<
3529 (
sizeof(int)*(
height+1+nnz)+
sizeof(
real_t)*nnz)/MiB <<
" MiB\n";
3534#ifdef MFEM_USE_MEMALLOC
3537 for (
int i = 0; i <
height; i++)
3545 os <<
" Memory used by LIL : " << used_mem/MiB <<
" MiB\n";
3557#if !defined(MFEM_USE_MEMALLOC)
3558 for (
int i = 0; i <
height; i++)
3561 while (node_p != NULL)
3564 node_p = node_p->
Prev;
3574#ifdef MFEM_USE_MEMALLOC
3587 const int *start_j =
J;
3589 for (
const int *jptr = start_j; jptr != end_j; ++jptr)
3591 awidth = std::max(awidth, *jptr + 1);
3597 for (
int i = 0; i <
height; i++)
3599 for (aux =
Rows[i]; aux != NULL; aux = aux->
Prev)
3601 awidth = std::max(awidth, aux->
Column + 1);
3613 for (
int i = 0; i < n; i++)
3623 "Finalize must be called before Transpose. Use TransposeRowMatrix instead");
3626 const int *A_i, *A_j;
3627 int m, n, nnz, *At_i, *At_j;
3642 for (i = 0; i <= n; i++)
3646 for (i = 0; i < nnz; i++)
3650 for (i = 1; i < n; i++)
3652 At_i[i+1] += At_i[i];
3655 for (i = j = 0; i < m; i++)
3658 for ( ; j < end; j++)
3660 At_j[At_i[A_j[j]]] = i;
3661 At_data[At_i[A_j[j]]] = A_data[j];
3666 for (i = n; i > 0; i--)
3668 At_i[i] = At_i[i-1];
3679 int m, n, nnz, *At_i, *At_j;
3689 for (i = 0; i < m; i++)
3691 A.
GetRow(i, Acols, Avals);
3713 for (i = 0; i <= n; i++)
3718 for (i = 0; i < m; i++)
3720 A.
GetRow(i, Acols, Avals);
3721 for (j = 0; j<Acols.
Size(); ++j)
3726 for (i = 1; i < n; i++)
3728 At_i[i+1] += At_i[i];
3731 for (i = 0; i < m; i++)
3733 A.
GetRow(i, Acols, Avals);
3734 for (j = 0; j<Acols.
Size(); ++j)
3736 At_j[At_i[Acols[j]]] = i;
3737 At_data[At_i[Acols[j]]] = Avals[j];
3742 for (i = n; i > 0; i--)
3744 At_i[i] = At_i[i-1];
3755 int nrowsA, ncolsA, nrowsB, ncolsB;
3756 const int *A_i, *A_j, *B_i, *B_j;
3757 int *C_i, *C_j, *B_marker;
3758 const real_t *A_data, *B_data;
3760 int ia, ib, ic, ja, jb, num_nonzeros;
3761 int row_start, counter;
3770 MFEM_VERIFY(ncolsA == nrowsB,
3771 "number of columns of A (" << ncolsA
3772 <<
") must equal number of rows of B (" << nrowsB <<
")");
3781 B_marker =
new int[ncolsB];
3783 for (ib = 0; ib < ncolsB; ib++)
3792 C_i[0] = num_nonzeros = 0;
3793 for (ic = 0; ic < nrowsA; ic++)
3795 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3798 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3801 if (B_marker[jb] != ic)
3808 C_i[ic+1] = num_nonzeros;
3814 C =
new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3816 for (ib = 0; ib < ncolsB; ib++)
3825 MFEM_VERIFY(nrowsA == C->
Height() && ncolsB == C->
Width(),
3826 "Input matrix sizes do not match output sizes"
3827 <<
" nrowsA = " << nrowsA
3828 <<
", C->Height() = " << C->
Height()
3829 <<
" ncolsB = " << ncolsB
3830 <<
", C->Width() = " << C->
Width());
3838 for (ic = 0; ic < nrowsA; ic++)
3841 row_start = counter;
3842 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3845 a_entry = A_data[ia];
3846 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3849 b_entry = B_data[ib];
3850 if (B_marker[jb] < row_start)
3852 B_marker[jb] = counter;
3857 C_data[counter] = a_entry*b_entry;
3862 C_data[B_marker[jb]] += a_entry*b_entry;
3870 "With pre-allocated output matrix, number of non-zeros ("
3872 <<
") did not match number of entries changed from matrix-matrix multiply, "
3891 int nrowsA, ncolsA, nrowsB, ncolsB;
3892 int *C_i, *C_j, *B_marker;
3894 int ia, ib, ic, ja, jb, num_nonzeros;
3895 int row_start, counter;
3904 MFEM_VERIFY(ncolsA == nrowsB,
3905 "number of columns of A (" << ncolsA
3906 <<
") must equal number of rows of B (" << nrowsB <<
")");
3908 B_marker =
new int[ncolsB];
3910 for (ib = 0; ib < ncolsB; ib++)
3917 C_i[0] = num_nonzeros = 0;
3921 for (ic = 0; ic < nrowsA; ic++)
3923 A.
GetRow(ic, colsA, dataA);
3924 for (ia = 0; ia < colsA.
Size(); ia++)
3927 B.
GetRow(ja, colsB, dataB);
3928 for (ib = 0; ib < colsB.
Size(); ib++)
3931 if (B_marker[jb] != ic)
3938 C_i[ic+1] = num_nonzeros;
3944 C =
new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3946 for (ib = 0; ib < ncolsB; ib++)
3952 for (ic = 0; ic < nrowsA; ic++)
3954 row_start = counter;
3955 A.
GetRow(ic, colsA, dataA);
3956 for (ia = 0; ia < colsA.
Size(); ia++)
3959 a_entry = dataA[ia];
3960 B.
GetRow(ja, colsB, dataB);
3961 for (ib = 0; ib < colsB.
Size(); ib++)
3964 b_entry = dataB[ib];
3965 if (B_marker[jb] < row_start)
3967 B_marker[jb] = counter;
3969 C_data[counter] = a_entry*b_entry;
3974 C_data[B_marker[jb]] += a_entry*b_entry;
3989 for (
int j = 0; j < B.
Width(); ++j)
3993 A.
Mult(columnB, columnC);
4003 Mult (R, *AP, *RAP_);
4046 int i, At_nnz, *At_j;
4050 At_nnz = At -> NumNonZeroElems();
4051 At_j = At -> GetJ();
4052 At_data = At -> GetData();
4053 for (i = 0; i < At_nnz; i++)
4055 At_data[i] *= D(At_j[i]);
4066 int ncols = A.
Width();
4080 int * marker =
new int[ncols];
4081 std::fill(marker, marker+ncols, -1);
4083 int num_nonzeros = 0, jcol;
4085 for (
int ic = 0; ic < nrows; ic++)
4087 for (
int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4093 for (
int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4096 if (marker[jcol] != ic)
4102 C_i[ic+1] = num_nonzeros;
4108 for (
int ia = 0; ia < ncols; ia++)
4114 for (
int ic = 0; ic < nrows; ic++)
4116 for (
int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4120 C_data[pos] =
a*A_data[ia];
4124 for (
int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4127 if (marker[jcol] < C_i[ic])
4130 C_data[pos] =
b*B_data[ib];
4136 C_data[marker[jcol]] +=
b*B_data[ib];
4142 return new SparseMatrix(C_i, C_j, C_data, nrows, ncols);
4147 return Add(1.,A,1.,B);
4152 MFEM_ASSERT(Ai.
Size() > 0,
"invalid size Ai.Size() = " << Ai.
Size());
4157 for (
int i=1; i < Ai.
Size(); ++i)
4159 result =
Add(*accumulate, *Ai[i]);
4165 accumulate = result;
4175 for (
int r = 0; r < B.
Height(); r++)
4179 for (
int i=0; i<A.
RowSize(r); i++)
4181 B(r, colA[i]) +=
alpha * valA[i];
4194 for (
int i=0; i<mA; i++)
4196 for (
int j=0; j<nA; j++)
4198 C->
AddMatrix(A(i,j), B, i * mB, j * nB);
4212 for (
int i=0; i<mA; i++)
4214 for (
int j=0; j<nA; j++)
4216 for (
int r=0; r<mB; r++)
4221 for (
int cj=0; cj<B.
RowSize(r); cj++)
4223 C->
Set(i * mB + r, j * nB + colB[cj], A(i,j) * valB[cj]);
4241 for (
int r=0; r<mA; r++)
4246 for (
int aj=0; aj<A.
RowSize(r); aj++)
4248 for (
int i=0; i<mB; i++)
4250 for (
int j=0; j<nB; j++)
4252 C->
Set(r * mB + i, colA[aj] * nB + j, valA[aj] * B(i, j));
4270 for (
int ar=0; ar<mA; ar++)
4275 for (
int aj=0; aj<A.
RowSize(ar); aj++)
4277 for (
int br=0; br<mB; br++)
4282 for (
int bj=0; bj<B.
RowSize(br); bj++)
4284 C->
Set(ar * mB + br, colA[aj] * nB + colB[bj],
4285 valA[aj] * valB[bj]);
4308#ifdef MFEM_USE_MEMALLOC
4318#ifdef MFEM_USE_CUDA_OR_HIP
4321#ifdef MFEM_CUDA_1897_WORKAROUND
4324 MFEM_Cu_or_Hip(MemFree)(
dBuffer);
4333 MFEM_cu_or_hip(sparseDestroy)(
handle);
4336#ifndef MFEM_CUDA_1897_WORKAROUND
4339 MFEM_Cu_or_Hip(MemFree)(
dBuffer);
Abstract data type for sparse matrices.
virtual int NumNonZeroElems() const =0
Returns the number of non-zeros in a matrix.
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const =0
Gets the columns indexes and values for row row.
Dynamic 2D array using row-major layout.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
T Max() const
Find the maximal element in the array, using the comparison operator < for class T.
void Sort()
Sorts the array in ascending order. This requires operator< to be defined for T.
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
int Size() const
Return the logical size of the array.
void MakeRef(T *data_, int size_, bool own_data=false)
Make this Array a reference to a pointer.
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Data type dense matrix using column-major storage.
void GetColumnReference(int c, Vector &col)
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
void AddMatrix(DenseMatrix &A, int ro, int co)
Perform (ro+i,co+j)+=A(i,j) for 0<=i.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Abstract data type for matrix inverse.
size_t MemoryUsage() const
Class used by MFEM to store pointers to host and/or device memory.
void SetHostPtrOwner(bool own) const
Set/clear the ownership flag for the host pointer. Ownership indicates whether the pointer will be de...
int Capacity() const
Return the size of the allocated memory.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
bool Empty() const
Return true if the Memory object is empty, see Reset().
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void Wrap(T *ptr, int size, bool own)
Wrap an externally allocated host pointer, ptr with the current host memory type returned by MemoryMa...
void Delete()
Delete the owned pointers and reset the Memory object.
void ClearOwnerFlags() const
Clear the ownership flags for the host and device pointers, as well as any internal data allocated by...
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
int width
Dimension of the input / number of columns in the matrix.
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
int height
Dimension of the output / number of rows in the matrix.
DiagonalPolicy
Defines operator diagonal policy upon elimination of rows and/or columns.
@ DIAG_ONE
Set the diagonal value to one.
@ DIAG_KEEP
Keep the diagonal value.
@ DIAG_ZERO
Set the diagonal value to zero.
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
int GetRow(const int row, Array< int > &cols, Vector &srow) const override
Extract all column indices and values from a given row.
void PrintCSR2(std::ostream &out) const
Prints a sparse matrix to stream out in CSR format.
const int * HostReadJ() const
void GetDiag(Vector &d) const
Returns the Diagonal of A.
real_t GetRowNorml1(int irow) const
For i = irow compute .
void PartMult(const Array< int > &rows, const Vector &x, Vector &y) const
bool isSorted
Are the columns sorted already.
void Gauss_Seidel_back(const Vector &x, Vector &y) const
bool RowIsEmpty(const int row) const
void ClearColPtr() const
Reset the "current row" set by calling SetColPtr(). This method must be called between any two calls ...
MatrixInverse * Inverse() const override
This virtual method is not supported: it always returns NULL.
void Jacobi3(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
real_t * ReadWriteData(bool on_dev=true)
Memory< real_t > A
Array with size I[height], containing the actual entries of the sparse matrix, as indexed by the I ar...
void EliminateRowCol(int rc, const real_t sol, Vector &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Eliminate row rc and column rc and modify the rhs using sol.
SparseMatrix & operator*=(real_t a)
void MultTranspose(const Vector &x, Vector &y) const override
Multiply a vector with the transposed matrix. y = At * x.
void SetWidth(int width_=-1)
Change the width of a SparseMatrix.
MemAlloc< RowNode, 1024 > RowNodeAlloc
bool Empty() const
Check if the SparseMatrix is empty.
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
SparseMatrix * At
Transpose of A. Owned. Used to perform MultTranspose() on devices.
void PartAddMult(const Array< int > &rows, const Vector &x, Vector &y, const real_t a=1.0) const
void GetRowSums(Vector &x) const
For all i compute .
void AbsMult(const Vector &x, Vector &y) const override
y = |A| * x, using entry-wise absolute values of matrix A
int NumNonZeroElems() const override
Returns the number of the nonzero elements in the matrix.
bool Finalized() const
Returns whether or not CSR format has been finalized.
void EliminateBC(const Array< int > &ess_dofs, DiagonalPolicy diag_policy)
Eliminate essential (Dirichlet) boundary conditions.
void PrintInfo(std::ostream &out) const
Print various sparse matrix statistics.
void MoveDiagonalFirst()
Move the diagonal entry to the first position in each row, preserving the order of the rest of the co...
void Add(const int i, const int j, const real_t val)
void EliminateRowColMultipleRHS(int rc, const Vector &sol, DenseMatrix &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Similar to EliminateRowCol(int, const double, Vector &, DiagonalPolicy), but multiple values for elim...
void Jacobi(const Vector &b, const Vector &x0, Vector &x1, real_t sc, bool use_abs_diag=false) const
void SetColPtr(const int row) const
Initialize the SparseMatrix for fast access to the entries of the given row which becomes the "curren...
void SetDiagIdentity()
If a row contains only one diag entry of zero, set it to 1.
void Gauss_Seidel_forw(const Vector &x, Vector &y) const
Gauss-Seidel forward and backward iterations over a vector x.
void MakeRef(const SparseMatrix &master)
Clear the contents of the SparseMatrix and make it a reference to master.
int CheckFinite() const
Count the number of entries that are NOT finite, i.e. Inf or Nan.
Memory< int > J
Array with size I[height], containing the column indices for all matrix entries, as indexed by the I ...
void EliminateCol(int col, DiagonalPolicy dpolicy=DIAG_ZERO)
Eliminates the column col from the matrix.
void EnsureMultTranspose() const
Ensures that the matrix is capable of performing MultTranspose(), AddMultTranspose(),...
void Print(std::ostream &out=mfem::out, int width_=4) const override
Prints matrix to stream out.
void _Add_(const int col, const real_t a)
Add a value to an entry in the "current row". See SetColPtr().
SparseMatrix()
Create an empty SparseMatrix.
cusparseSpMatDescr_t matA_descr
real_t & Elem(int i, int j) override
Returns reference to a_{ij}.
virtual void PrintMathematica(std::ostream &out=mfem::out) const
Prints matrix as a SparseArray for importing into Mathematica.
void ClearGPUSparse()
Clear the cuSPARSE/hipSPARSE descriptors. This must be called after releasing the device memory of A.
void Threshold(real_t tol, bool fix_empty_rows=false)
Remove entries smaller in absolute value than a given tolerance tol. If fix_empty_rows is true,...
void SetSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void PrintMatlab(std::ostream &out=mfem::out) const override
Prints matrix in matlab format.
const int * ReadI(bool on_dev=true) const
real_t InnerProduct(const Vector &x, const Vector &y) const
Compute y^t A x.
Memory< int > I
Array with size (height+1) containing the row offsets.
real_t & operator()(int i, int j)
Returns reference to A[i][j].
const real_t * HostReadData() const
void PrintMM(std::ostream &out=mfem::out) const
Prints matrix in Matrix Market sparse format.
void ScaleColumns(const Vector &sr)
this = this * diag(sr);
void PrintCSR(std::ostream &out) const
Prints matrix to stream out in hypre_CSRMatrix format.
void Swap(SparseMatrix &other)
int * ReadWriteJ(bool on_dev=true)
cusparseDnVecDescr_t vecY_descr
void BooleanMultTranspose(const Array< int > &x, Array< int > &y) const
y = At * x, treating all entries as booleans (zero=false, nonzero=true).
RowNode ** Rows
Array of linked lists, one for every row. This array represents the linked list (LIL) storage format.
real_t & SearchRow(const int col)
Perform a fast search for an entry in the "current row". See SetColPtr().
static int SparseMatrixCount
void AddSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void Symmetrize()
(*this) = 1/2 ((*this) + (*this)^t)
void BooleanMult(const Array< int > &x, Array< int > &y) const
y = A * x, treating all entries as booleans (zero=false, nonzero=true).
void AddMultTranspose(const Vector &x, Vector &y, const real_t a=1.0) const override
y += At * x (default) or y += a * At * x
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
real_t GetJacobiScaling() const
Determine appropriate scaling for Jacobi iteration.
int RowSize(const int i) const
Returns the number of elements in row i.
real_t * HostReadWriteData()
int CountSmallElems(real_t tol) const
Count the number of entries with |a_ij| <= tol.
const int * ReadJ(bool on_dev=true) const
int MaxRowSize() const
Returns the maximum number of elements among all rows.
real_t _Get_(const int col) const
Read the value of an entry in the "current row". See SetColPtr().
void AddMult(const Vector &x, Vector &y, const real_t a=1.0) const override
y += A * x (default) or y += a * A * x
virtual ~SparseMatrix()
Destroys sparse matrix.
SparseMatrix & operator=(const SparseMatrix &rhs)
Assignment operator: deep copy.
void Mult(const Vector &x, Vector &y) const override
Matrix vector multiplication.
void OverrideSize(int height_, int width_)
Sets the height and width of the matrix.
static cusparseHandle_t handle
void EliminateRow(int row, const real_t sol, Vector &rhs)
Eliminates a column from the transpose matrix.
void GetBlocks(Array2D< SparseMatrix * > &blocks) const
SparseMatrix & operator+=(const SparseMatrix &B)
Add the sparse matrix 'B' to '*this'. This operation will cause an error if '*this' is finalized and ...
void Clear()
Clear the contents of the SparseMatrix.
void ResetTranspose() const
void EliminateRowColDiag(int rc, real_t value)
Perform elimination and set the diagonal entry to the given value.
real_t * GetRowEntries(const int row)
Return a pointer to the entries in a row.
void ScaleRows(const Vector &sl)
this = diag(sl) * this;
void SetRow(const int row, const Array< int > &cols, const Vector &srow)
void DiagScale(const Vector &b, Vector &x, real_t sc=1.0, bool use_abs_diag=false) const
x = sc b / A_ii. When use_abs_diag = true, |A_ii| is used.
void AbsMultTranspose(const Vector &x, Vector &y) const override
y = |At| * x, using entry-wise absolute values of the transpose of matrix A
real_t * GetData()
Return the element data, i.e. the array A.
void SetSubMatrixTranspose(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void BuildTranspose() const
Build and store internally the transpose of this matrix which will be used in the methods AddMultTran...
void SortColumnIndices()
Sort the column indices corresponding to each row.
void Finalize(int skip_zeros=1) override
Finalize the matrix initialization, switching the storage format from LIL to CSR.
real_t IsSymmetric() const
Returns max_{i,j} |(i,j)-(j,i)| for a finalized matrix.
DenseMatrix * ToDenseMatrix() const
Produces a DenseMatrix from a SparseMatrix.
void _Set_(const int col, const real_t a)
Set an entry in the "current row". See SetColPtr().
void EliminateZeroRows(const real_t threshold=1e-12) override
If a row contains only zeros, set its diagonal to 1.
void EliminateCols(const Array< int > &cols, const Vector *x=NULL, Vector *b=NULL)
Eliminate all columns i for which cols[i] != 0.
cusparseDnVecDescr_t vecX_descr
const real_t * ReadData(bool on_dev=true) const
void ScaleRow(const int row, const real_t scale)
void AddRow(const int row, const Array< int > &cols, const Vector &srow)
void Jacobi2(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
int ActualWidth() const
Returns the actual Width of the matrix.
const int * HostReadI() const
void Set(const int i, const int j, const real_t val)
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void Neg()
(*this) = -(*this)
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
int Size() const
Returns the size of the vector.
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
void SetSize(int s)
Resize the vector to size s.
void NewDataAndSize(real_t *d, int s)
Set the Vector data and size, deleting the old data, if owned.
real_t * GetData() const
Return a pointer to the beginning of the Vector data.
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
DenseMatrix * OuterProduct(const DenseMatrix &A, const DenseMatrix &B)
Produces a block matrix with blocks A_{ij}*B.
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
void mfem_error(const char *msg)
void Mult(const Table &A, const Table &B, Table &C)
C = A * B (as boolean matrices)
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory<T> &mem, int size, false)
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
bool IsFinite(const real_t &val)
void Transpose(const Table &A, Table &At, int ncols_A_)
Transpose a Table.
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
void Swap(T &a, T &b)
Swap objects of type T. The operation is performed using the most specialized swap function from the ...
int CheckFinite(const real_t *v, const int n)
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory<T> &mem, int size, false)
SparseMatrix * MultAbstractSparseMatrix(const AbstractSparseMatrix &A, const AbstractSparseMatrix &B)
Matrix product of sparse matrices. A and B do not need to be CSR matrices.
SparseMatrix * TransposeAbstractSparseMatrix(const AbstractSparseMatrix &A, int useActualWidth)
Transpose of a sparse matrix. A does not need to be a CSR matrix.
SparseMatrix * Mult_AtDA(const SparseMatrix &A, const Vector &D, SparseMatrix *OAtDA)
Matrix multiplication A^t D A. All matrices must be finalized.
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
SparseMatrix * TransposeMult(const SparseMatrix &A, const SparseMatrix &B)
C = A^T B.
void forall(int N, lambda &&body)
void forall_switch(bool use_dev, int N, lambda &&body)
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
void SparseMatrixFunction(SparseMatrix &S, real_t(*f)(real_t))
Applies f() to each element of the matrix (after it is finalized).
MFEM_HOST_DEVICE real_t norm(const Complex &z)
@ HIP_MASK
Biwise-OR of all HIP backends.
@ CPU_MASK
Biwise-OR of all CPU backends.
@ CUDA_MASK
Biwise-OR of all CUDA backends.