27#if defined(MFEM_USE_CUDA)
28#define MFEM_cu_or_hip(stub) cu##stub
29#define MFEM_Cu_or_Hip(stub) Cu##stub
30#define MFEM_CU_or_HIP(stub) CU##stub
31#define MFEM_CUDA_or_HIP(stub) CUDA##stub
33#if CUSPARSE_VERSION >= 11400
34#define MFEM_GPUSPARSE_ALG CUSPARSE_SPMV_CSR_ALG1
36#define MFEM_GPUSPARSE_ALG CUSPARSE_CSRMV_ALG1
39#elif defined(MFEM_USE_HIP)
40#define MFEM_cu_or_hip(stub) hip##stub
41#define MFEM_Cu_or_Hip(stub) Hip##stub
42#define MFEM_CU_or_HIP(stub) HIP##stub
43#define MFEM_CUDA_or_HIP(stub) HIP##stub
46#define MFEM_GPUSPARSE_ALG HIPSPARSE_CSRMV_ALG1
54#ifdef MFEM_USE_CUDA_OR_HIP
67#ifdef MFEM_USE_CUDA_OR_HIP
83#ifdef MFEM_USE_CUDA_OR_HIP
86#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
87 MFEM_cu_or_hip(sparseDestroySpMat)(
matA_descr);
88 MFEM_cu_or_hip(sparseDestroyDnVec)(
vecX_descr);
89 MFEM_cu_or_hip(sparseDestroyDnVec)(
vecY_descr);
112 for (
int i = 0; i < nrows; i++)
117#ifdef MFEM_USE_MEMALLOC
136#ifdef MFEM_USE_MEMALLOC
144 bool ownij,
bool owna,
bool issorted)
155#ifdef MFEM_USE_MEMALLOC
167 for (
int ii=0; ii<nnz; ++ii)
184#ifdef MFEM_USE_MEMALLOC
188 J.
New(nrows * rowsize);
189 A.
New(nrows * rowsize);
191 for (
int i = 0; i <= nrows; i++)
225#ifdef MFEM_USE_MEMALLOC
231#ifdef MFEM_USE_MEMALLOC
235 for (
int i = 0; i <
height; i++)
238 for (
RowNode *node_p = mat.
Rows[i]; node_p; node_p = node_p->Prev)
240#ifdef MFEM_USE_MEMALLOC
245 new_node_p->
Value = node_p->Value;
246 new_node_p->
Column = node_p->Column;
247 *node_pp = new_node_p;
248 node_pp = &new_node_p->
Prev;
276#ifdef MFEM_USE_MEMALLOC
283 for (
int i = 0; i <=
height; i++)
288 for (
int r=0; r<
height; r++)
315 MFEM_ASSERT(master.
Finalized(),
"'master' must be finalized");
336#ifdef MFEM_USE_MEMALLOC
354 return I[gi+1]-
I[gi];
359 for ( ; row != NULL; row = row->
Prev)
360 if (row->
Value != 0.0)
373 for (
int i=0; i <
height; ++i)
375 rowSize =
I[i+1]-
I[i];
376 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
381 for (
int i=0; i <
height; ++i)
384 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
393 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
400 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
407 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
414 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
421 if (newWidth ==
width)
426 else if (newWidth == -1)
432 else if (newWidth >
width)
443 ColPtrJ =
static_cast<int *
>(NULL);
451 "The new width needs to be bigger or equal to the actual width");
459 MFEM_VERIFY(
Finalized(),
"Matrix is not Finalized!");
466#ifdef MFEM_USE_CUDA_OR_HIP
469#if defined(MFEM_USE_CUDA)
470 size_t pBufferSizeInBytes = 0;
471 void *pBuffer = NULL;
474 const int m =
Width();
477 const int * d_ia =
ReadI();
479 csru2csrInfo_t sortInfoA;
483 cusparseSetMatIndexBase(
matA_descr, CUSPARSE_INDEX_BASE_ZERO );
484 cusparseSetMatType(
matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL );
486 cusparseCreateCsru2csrInfo( &sortInfoA );
488#ifdef MFEM_USE_SINGLE
489 cusparseScsru2csr_bufferSizeExt(
handle, n, m, nnzA, d_a_sorted, d_ia,
490 d_ja_sorted, sortInfoA,
491 &pBufferSizeInBytes);
492#elif defined MFEM_USE_DOUBLE
493 cusparseDcsru2csr_bufferSizeExt(
handle, n, m, nnzA, d_a_sorted, d_ia,
494 d_ja_sorted, sortInfoA,
495 &pBufferSizeInBytes);
497 MFEM_ABORT(
"Floating point type undefined");
502#ifdef MFEM_USE_SINGLE
504 d_ja_sorted, sortInfoA, pBuffer);
505#elif defined MFEM_USE_DOUBLE
507 d_ja_sorted, sortInfoA, pBuffer);
509 MFEM_ABORT(
"Floating point type undefined");
516 cusparseDestroyCsru2csrInfo( sortInfoA );
524#if defined(MFEM_USE_HIP)
525 size_t pBufferSizeInBytes = 0;
526 void *pBuffer = NULL;
530 const int m =
Width();
533 const int * d_ia =
ReadI();
536 hipsparseMatDescr_t descrA;
537 hipsparseCreateMatDescr( &descrA );
545 hipsparseXcsrsort_bufferSizeExt(
handle, n, m, nnzA, d_ia, d_ja_sorted,
546 &pBufferSizeInBytes);
551 hipsparseCreateIdentityPermutation(
handle, nnzA, P);
552 hipsparseXcsrsort(
handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P, pBuffer);
554#if defined(MFEM_USE_SINGLE)
555 hipsparseSgthr(
handle, nnzA, d_a_sorted, d_a_tmp, P,
556 HIPSPARSE_INDEX_BASE_ZERO);
557#elif defined(MFEM_USE_DOUBLE)
558 hipsparseDgthr(
handle, nnzA, d_a_sorted, d_a_tmp, P,
559 HIPSPARSE_INDEX_BASE_ZERO);
561 MFEM_ABORT(
"Unsupported floating point type!");
565 hipsparseDestroyMatDescr( descrA );
579 for (
int j = 0, i = 0; i <
height; i++)
583 for (
int k = 0; k < row.
Size(); k++)
589 for (
int k = 0; k < row.
Size(); k++, j++)
601 MFEM_VERIFY(
Finalized(),
"Matrix is not Finalized!");
603 for (
int row = 0, end = 0; row <
height; row++)
607 for (j = start;
true; j++)
609 MFEM_VERIFY(j < end,
"diagonal entry not found in row = " << row);
610 if (
J[j] == row) {
break; }
613 for ( ; j > start; j--)
635 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
636 "Trying to access element outside of the matrix. "
637 <<
"height = " <<
height <<
", "
638 <<
"width = " <<
width <<
", "
639 <<
"i = " << i <<
", "
642 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
644 for (
int k =
I[i], end =
I[i+1]; k < end; k++)
652 MFEM_ABORT(
"Did not find i = " << i <<
", j = " << j <<
" in matrix.");
658 static const real_t zero = 0.0;
660 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
661 "Trying to access element outside of the matrix. "
662 <<
"height = " <<
height <<
", "
663 <<
"width = " <<
width <<
", "
664 <<
"i = " << i <<
", "
669 for (
int k =
I[i], end =
I[i+1]; k < end; k++)
681 if (node_p->Column == j)
683 return node_p->Value;
693 MFEM_VERIFY(
height ==
width,
"Matrix must be square, not height = "
695 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
699 const auto II = this->
ReadI();
700 const auto JJ = this->
ReadJ();
706 const int begin = II[i];
707 const int end = II[i+1];
709 for (j = begin; j < end; j++)
727 int num_rows = this->
Height();
728 int num_cols = this->
Width();
743 for (
int r=0; r<
height; r++)
748 for (
int cj=0; cj<this->
RowSize(r); cj++)
750 B(r, col[cj]) = val[cj];
764 MFEM_ASSERT(
width == x.
Size(),
"Input vector size (" << x.
Size()
765 <<
") must match matrix width (" <<
width <<
")");
766 MFEM_ASSERT(
height == y.
Size(),
"Output vector size (" << y.
Size()
767 <<
") must match matrix height (" <<
height <<
")");
775 for (
int i = 0; i <
height; i++)
779 for ( ; row != NULL; row = row->
Prev)
789#ifndef MFEM_USE_LEGACY_OPENMP
793 auto d_J =
Read(
J, nnz);
794 auto d_A =
Read(
A, nnz);
799 if (nnz == 0) {
return;}
802#ifdef MFEM_USE_CUDA_OR_HIP
809#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
811 MFEM_cu_or_hip(sparseCreateCsr)(
815 const_cast<int *
>(d_I),
816 const_cast<int *
>(d_J),
817 const_cast<real_t *
>(d_A),
818 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
819 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
820 MFEM_CU_or_HIP(SPARSE_INDEX_BASE_ZERO),
821#ifdef MFEM_USE_SINGLE
822 MFEM_CUDA_or_HIP(_R_32F));
824 MFEM_CUDA_or_HIP(_R_64F));
828 MFEM_cu_or_hip(sparseCreateDnVec)(&
vecX_descr,
830 const_cast<real_t *
>(d_x),
831#ifdef MFEM_USE_SINGLE
832 MFEM_CUDA_or_HIP(_R_32F));
834 MFEM_CUDA_or_HIP(_R_64F));
837#ifdef MFEM_USE_SINGLE
838 MFEM_CUDA_or_HIP(_R_32F));
840 MFEM_CUDA_or_HIP(_R_64F));
844 cusparseSetMatIndexBase(
matA_descr, CUSPARSE_INDEX_BASE_ZERO);
845 cusparseSetMatType(
matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
850 size_t newBufferSize = 0;
852 MFEM_cu_or_hip(sparseSpMV_bufferSize)(
854 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
860#ifdef MFEM_USE_SINGLE
861 MFEM_CUDA_or_HIP(_R_32F),
863 MFEM_CUDA_or_HIP(_R_64F),
876#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
878 MFEM_cu_or_hip(sparseDnVecSetValues)(
vecX_descr,
879 const_cast<real_t *
>(d_x));
880 MFEM_cu_or_hip(sparseDnVecSetValues)(
vecY_descr, d_y);
883 MFEM_cu_or_hip(sparseSpMV)(
885 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
891#ifdef MFEM_USE_SINGLE
892 MFEM_CUDA_or_HIP(_R_32F),
894 MFEM_CUDA_or_HIP(_R_64F),
899#ifdef MFEM_USE_SINGLE
904 CUSPARSE_OPERATION_NON_TRANSPOSE,
910 const_cast<real_t *
>(d_A),
911 const_cast<int *
>(d_I),
912 const_cast<int *
>(d_J),
913 const_cast<real_t *
>(d_x),
925 const int end = d_I[i+1];
926 for (
int j = d_I[i]; j < end; j++)
928 d += d_A[j] * d_x[d_J[j]];
938 const int *Jp =
J, *Ip =
I;
940 #pragma omp parallel for
941 for (
int i = 0; i <
height; i++)
944 const int end = Ip[i+1];
945 for (
int j = Ip[i]; j < end; j++)
947 d += Ap[j] * xp[Jp[j]];
965 <<
") must match matrix height (" <<
height <<
")");
966 MFEM_ASSERT(
width == y.
Size(),
"Output vector size (" << y.
Size()
967 <<
") must match matrix width (" <<
width <<
")");
974 for (
int i = 0; i <
height; i++)
978 for ( ; row != NULL; row = row->
Prev)
997 const int nnz = Ip[
height];
1001 for (
int i = 0; i <
height; i++)
1004 const int end = Ip[i+1];
1005 for (
int j = Ip[i]; j < end; j++)
1007 const int Jj = Jp[j];
1008 yp[Jj] += Ap[j] * xi;
1039 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1041 const int n = rows.
Size();
1043 auto d_rows = rows.
Read();
1045 auto d_J =
Read(
J, nnz);
1046 auto d_A =
Read(
A, nnz);
1047 auto d_x = x.
Read();
1048 auto d_y = y.
Write();
1051 const int r = d_rows[i];
1052 const int end = d_I[r + 1];
1054 for (
int j = d_I[r]; j < end; j++)
1056 a += d_A[j] * d_x[d_J[j]];
1065 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1067 for (
int i = 0; i < rows.
Size(); i++)
1072 for (
int j =
I[r]; j < end; j++)
1074 val +=
A[j] * x(
J[j]);
1082 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1083 MFEM_ASSERT(x.
Size() ==
Width(),
"Input vector size (" << x.
Size()
1084 <<
") must match matrix width (" <<
Width() <<
")");
1091 auto d_J =
Read(
J, nnz);
1097 const int end = d_I[i+1];
1098 for (
int j = d_I[i]; j < end; j++)
1113 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1114 MFEM_ASSERT(x.
Size() ==
Height(),
"Input vector size (" << x.
Size()
1115 <<
") must match matrix height (" <<
Height() <<
")");
1120 for (
int i = 0; i <
Height(); i++)
1125 for (
int j =
I[i]; j < end; j++)
1135 MFEM_ASSERT(
width == x.
Size(),
"Input vector size (" << x.
Size()
1136 <<
") must match matrix width (" <<
width <<
")");
1137 MFEM_ASSERT(
height == y.
Size(),
"Output vector size (" << y.
Size()
1138 <<
") must match matrix height (" <<
height <<
")");
1149 for (
int i = 0; i <
height; i++)
1153 for ( ; row != NULL; row = row->
Prev)
1166 auto d_J =
Read(
J, nnz);
1167 auto d_A =
Read(
A, nnz);
1168 auto d_x = x.
Read();
1173 const int end = d_I[i+1];
1174 for (
int j = d_I[i]; j < end; j++)
1176 d += std::abs(d_A[j]) * d_x[d_J[j]];
1184 MFEM_ASSERT(
height == x.
Size(),
"Input vector size (" << x.
Size()
1185 <<
") must match matrix height (" <<
height <<
")");
1186 MFEM_ASSERT(
width == y.
Size(),
"Output vector size (" << y.
Size()
1187 <<
") must match matrix width (" <<
width <<
")");
1195 for (
int i = 0; i <
height; i++)
1199 for ( ; row != NULL; row = row->
Prev)
1214 for (
int i = 0; i <
height; i++)
1217 const int end =
I[i+1];
1218 for (
int j =
I[i]; j < end; j++)
1220 const int Jj =
J[j];
1221 y[Jj] += std::abs(
A[j]) * xi;
1230 <<
" must be equal to Width() = " <<
Width());
1232 <<
" must be equal to Height() = " <<
Height());
1245 for (
int i = 0; i <
height; i++)
1250 for (
int j =
I[i], end =
I[i+1]; j < end; j++)
1252 a +=
A[j] * x(
J[j]);
1259 a += np->Value * x(np->Column);
1270 for (
int i = 0; i <
height; i++)
1275 for (
int j =
I[i], end =
I[i+1]; j < end; j++)
1293 MFEM_VERIFY(irow <
height,
1294 "row " << irow <<
" not in matrix with height " <<
height);
1299 for (
int j =
I[irow], end =
I[irow+1]; j < end; j++)
1308 a += fabs(np->Value);
1317 MFEM_ASSERT(
Finalized(),
"Matrix must be finalized.");
1319 atol = std::abs(tol);
1321 fix_empty_rows =
height ==
width ? fix_empty_rows :
false;
1329 for (i = 0, nz = 0; i <
height; i++)
1332 for (j =
I[i]; j <
I[i+1]; j++)
1333 if (std::abs(
A[j]) > atol)
1338 if (fix_empty_rows && !found) { nz++; }
1346 for (i = 0, nz = 0; i <
height; i++)
1350 for (j =
I[i]; j <
I[i+1]; j++)
1351 if (std::abs(
A[j]) > atol)
1356 if ( lastCol > newJ[nz] )
1363 if (fix_empty_rows && !found)
1391 for (i = 1; i <=
height; i++)
1394 for (aux =
Rows[i-1]; aux != NULL; aux = aux->
Prev)
1396 if (skip_zeros && aux->
Value == 0.0)
1398 if (skip_zeros == 2) {
continue; }
1399 if ((i-1) != aux->
Column) {
continue; }
1405 if (other->Column == (i-1))
1408 found_val = other->Value;
1412 if (found && found_val == 0.0) {
continue; }
1417 if (fix_empty_rows && !nr) { nr = 1; }
1426 for (j = i = 0; i <
height; i++)
1430 for (aux =
Rows[i]; aux != NULL; aux = aux->
Prev)
1432 if (skip_zeros && aux->
Value == 0.0)
1434 if (skip_zeros == 2) {
continue; }
1435 if (i != aux->
Column) {
continue; }
1441 if (other->Column == i)
1444 found_val = other->Value;
1448 if (found && found_val == 0.0) {
continue; }
1454 if ( lastCol >
J[j] )
1463 if (fix_empty_rows && !nr)
1471#ifdef MFEM_USE_MEMALLOC
1475 for (i = 0; i <
height; i++)
1478 while (node_p != NULL)
1481 node_p = node_p->
Prev;
1494 int nr = (
height + br - 1)/br, nc = (
width + bc - 1)/bc;
1496 for (
int j = 0; j < bc; j++)
1498 for (
int i = 0; i < br; i++)
1501 for (
int k = 0; k <= nr; k++)
1505 blocks(i,j) =
new SparseMatrix(bI, NULL, NULL, nr, nc);
1509 for (
int gr = 0; gr <
height; gr++)
1511 int bi = gr/nr, i = gr%nr + 1;
1514 for (
int j =
I[gr]; j <
I[gr+1]; j++)
1518 blocks(bi,
J[j]/nc)->I[i]++;
1526 if (n_p->Value != 0.0)
1528 blocks(bi, n_p->Column/nc)->I[i]++;
1534 for (
int j = 0; j < bc; j++)
1536 for (
int i = 0; i < br; i++)
1540 for (
int k = 1; k <= nr; k++)
1542 rs =
b.I[k],
b.I[k] = nnz, nnz += rs;
1549 for (
int gr = 0; gr <
height; gr++)
1551 int bi = gr/nr, i = gr%nr + 1;
1554 for (
int j =
I[gr]; j <
I[gr+1]; j++)
1559 b.J[
b.I[i]] =
J[j] % nc;
1569 if (n_p->Value != 0.0)
1572 b.J[
b.I[i]] = n_p->Column % nc;
1573 b.A[
b.I[i]] = n_p->Value;
1595 for (
int i = 1; i <
height; i++)
1597 for (
int j =
I[i]; j <
I[i+1]; j++)
1601 symm = std::max(symm, std::abs(
A[j]-(*
this)(
J[j],i)));
1608 for (
int i = 0; i <
height; i++)
1610 for (
RowNode *node_p =
Rows[i]; node_p != NULL; node_p = node_p->
Prev)
1612 int col = node_p->Column;
1615 symm = std::max(symm, std::abs(node_p->Value-(*
this)(col,i)));
1625 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
1628 for (i = 1; i <
height; i++)
1630 for (j =
I[i]; j <
I[i+1]; j++)
1634 A[j] += (*this)(
J[j],i);
1636 (*this)(
J[j],i) =
A[j];
1653 for (
int i = 0; i <
height; i++)
1655 for (
RowNode *node_p =
Rows[i]; node_p != NULL; node_p = node_p->
Prev)
1672 for (
int j = 0; j < nnz; j++)
1674 m = std::max(m, std::abs(
A[j]));
1679 for (
int i = 0; i <
height; i++)
1683 m = std::max(m, std::abs(n_p->Value));
1699 for (
int i = 0; i < nz; i++)
1701 counter += (std::abs(Ap[i]) <= tol);
1706 for (
int i = 0; i <
height; i++)
1710 counter += (std::abs(aux->Value) <= tol);
1731 for (
int i = 0; i <
height; i++)
1751 MFEM_ASSERT(row < height && row >= 0,
1752 "Row " << row <<
" not in matrix of height " <<
height);
1754 MFEM_VERIFY(!
Finalized(),
"Matrix must NOT be finalized.");
1756 for (aux =
Rows[row]; aux != NULL; aux = aux->
Prev)
1767 MFEM_ASSERT(row < height && row >= 0,
1768 "Row " << row <<
" not in matrix of height " <<
height);
1769 MFEM_ASSERT(dpolicy !=
DIAG_KEEP,
"Diagonal policy must not be DIAG_KEEP");
1771 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1776 for (
int i=
I[row]; i <
I[row+1]; ++i)
1783 for (aux =
Rows[row]; aux != NULL; aux = aux->
Prev)
1797 MFEM_ASSERT(col < width && col >= 0,
1798 "Col " << col <<
" not in matrix of width " <<
width);
1799 MFEM_ASSERT(dpolicy !=
DIAG_KEEP,
"Diagonal policy must not be DIAG_KEEP");
1801 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1807 for (
int jpos = 0; jpos != nnz; ++jpos)
1817 for (
int i = 0; i <
height; i++)
1821 if (aux->Column == col)
1841 for (
int i = 0; i <
height; i++)
1843 for (
int jpos =
I[i]; jpos !=
I[i+1]; ++jpos)
1849 (*b)(i) -=
A[jpos] * (*x)(
J[jpos] );
1858 for (
int i = 0; i <
height; i++)
1862 if (cols[aux -> Column])
1866 (*b)(i) -= aux -> Value * (*x)(aux -> Column);
1880 for (
int row = 0; row <
height; row++)
1882 for (nd =
Rows[row]; nd != NULL; nd = nd->
Prev)
1884 if (col_marker[nd->
Column])
1894 for (
int row = 0; row <
height; row++)
1896 for (
int j =
I[row]; j <
I[row+1]; j++)
1898 if (col_marker[
J[j]])
1900 Ae.
Add(row,
J[j],
A[j]);
1912 MFEM_ASSERT(rc < height && rc >= 0,
1913 "Row " << rc <<
" not in matrix of height " <<
height);
1920 for (
int j =
I[rc]; j <
I[rc+1]; j++)
1922 const int col =
J[j];
1928 rhs(rc) =
A[j] * sol;
1939 mfem_error(
"SparseMatrix::EliminateRowCol () #2");
1946 for (
int k =
I[col]; 1; k++)
1950 mfem_error(
"SparseMatrix::EliminateRowCol () #3");
1952 else if (
J[k] == rc)
1954 rhs(col) -= sol *
A[k];
1966 const int col = aux->Column;
1972 rhs(rc) = aux->Value * sol;
1983 mfem_error(
"SparseMatrix::EliminateRowCol () #4");
1994 mfem_error(
"SparseMatrix::EliminateRowCol () #5");
1996 else if (node->Column == rc)
1998 rhs(col) -= sol * node->Value;
2012 MFEM_ASSERT(rc < height && rc >= 0,
2013 "Row " << rc <<
" not in matrix of height " <<
height);
2014 MFEM_ASSERT(sol.
Size() == rhs.
Width(),
"solution size (" << sol.
Size()
2015 <<
") must match rhs width (" << rhs.
Width() <<
")");
2017 const int num_rhs = rhs.
Width();
2020 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2022 const int col =
J[j];
2028 for (
int r = 0; r < num_rhs; r++)
2030 rhs(rc,r) =
A[j] * sol(r);
2035 for (
int r = 0; r < num_rhs; r++)
2042 for (
int r = 0; r < num_rhs; r++)
2048 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #3");
2055 for (
int k =
I[col]; 1; k++)
2059 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #4");
2061 else if (
J[k] == rc)
2063 for (
int r = 0; r < num_rhs; r++)
2065 rhs(col,r) -= sol(r) *
A[k];
2078 const int col = aux->Column;
2084 for (
int r = 0; r < num_rhs; r++)
2086 rhs(rc,r) = aux->Value * sol(r);
2091 for (
int r = 0; r < num_rhs; r++)
2098 for (
int r = 0; r < num_rhs; r++)
2104 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #5");
2115 mfem_error(
"SparseMatrix::EliminateRowColMultipleRHS() #6");
2117 else if (node->Column == rc)
2119 for (
int r = 0; r < num_rhs; r++)
2121 rhs(col,r) -= sol(r) * node->Value;
2134 MFEM_ASSERT(rc < height && rc >= 0,
2135 "Row " << rc <<
" not in matrix of height " <<
height);
2139 const auto &II = this->
I;
2140 const auto &JJ = this->
J;
2141 for (
int j = II[rc]; j < II[rc+1]; j++)
2143 const int col = JJ[j];
2158 for (
int k = II[col]; 1; k++)
2162 mfem_error(
"SparseMatrix::EliminateRowCol() #2");
2164 else if (JJ[k] == rc)
2177 for (aux =
Rows[rc]; aux != NULL; aux = aux->
Prev)
2179 const int col = aux->
Column;
2194 for (node =
Rows[col]; 1; node = node->
Prev)
2198 mfem_error(
"SparseMatrix::EliminateRowCol() #3");
2200 else if (node->
Column == rc)
2215 MFEM_ASSERT(rc < height && rc >= 0,
2216 "Row " << rc <<
" not in matrix of height " <<
height);
2220 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2222 const int col =
J[j];
2230 for (
int k =
I[col]; 1; k++)
2234 mfem_error(
"SparseMatrix::EliminateRowCol() #2");
2236 else if (
J[k] == rc)
2249 for (aux =
Rows[rc]; aux != NULL; aux = aux->
Prev)
2251 const int col = aux->
Column;
2259 for (node =
Rows[col]; 1; node = node->
Prev)
2263 mfem_error(
"SparseMatrix::EliminateRowCol() #3");
2265 else if (node->
Column == rc)
2282 for (nd =
Rows[rc]; nd != NULL; nd = nd->
Prev)
2284 const int col = nd->
Column;
2300 mfem_error(
"SparseMatrix::EliminateRowCol #1");
2308 for (nd2 =
Rows[col]; 1; nd2 = nd2->
Prev)
2312 mfem_error(
"SparseMatrix::EliminateRowCol #2");
2314 else if (nd2->
Column == rc)
2326 for (
int j =
I[rc]; j <
I[rc+1]; j++)
2328 const int col =
J[j];
2334 Ae.
Add(rc, rc,
A[j] - 1.0);
2338 Ae.
Add(rc, rc,
A[j]);
2344 mfem_error(
"SparseMatrix::EliminateRowCol #3");
2350 Ae.
Add(rc, col,
A[j]);
2352 for (
int k =
I[col];
true; k++)
2356 mfem_error(
"SparseMatrix::EliminateRowCol #4");
2358 else if (
J[k] == rc)
2360 Ae.
Add(col, rc,
A[k]);
2373 const int n_ess_dofs = ess_dofs.
Size();
2374 const auto ess_dofs_d = ess_dofs.
Read();
2375 const auto dI =
ReadI();
2376 const auto dJ =
ReadJ();
2381 const int idof = ess_dofs_d[i];
2382 for (
int j=dI[idof]; j<dI[idof+1]; ++j)
2384 const int jdof = dJ[j];
2388 for (
int k=dI[jdof]; k<dI[jdof+1]; ++k)
2415 for (
int i = 0; i <
height; i++)
2417 if (
I[i+1] ==
I[i]+1 && fabs(
A[
I[i]]) < 1e-16)
2426 for (
int i = 0; i <
height; i++)
2429 for (
int j =
I[i]; j <
I[i+1]; j++)
2433 if (zero <= threshold)
2435 for (
int j =
I[i]; j <
I[i+1]; j++)
2437 A[j] = (
J[j] == i) ? 1.0 : 0.0;
2452 for (
int i = 0; i <
s; i++)
2456 for (n_p = R[i]; n_p != NULL; n_p = n_p->
Prev)
2458 const int c = n_p->
Column;
2465 sum += n_p->
Value * yp[c];
2469 if (diag_p != NULL && diag_p->
Value != 0.0)
2471 yp[i] = (xp[i] - sum) / diag_p->
Value;
2473 else if (xp[i] == sum)
2479 mfem_error(
"SparseMatrix::Gauss_Seidel_forw()");
2493 for (
int i = 0, j = Ip[0]; i <
s; i++)
2495 const int end = Ip[i+1];
2498 for ( ; j < end; j++)
2500 const int c = Jp[j];
2507 sum += Ap[j] * yp[c];
2511 if (d >= 0 && Ap[d] != 0.0)
2513 yp[i] = (xp[i] - sum) / Ap[d];
2515 else if (xp[i] == sum)
2521 mfem_error(
"SparseMatrix::Gauss_Seidel_forw(...) #2");
2535 for (
int i =
height-1; i >= 0; i--)
2539 for (n_p = R[i]; n_p != NULL; n_p = n_p->
Prev)
2541 const int c = n_p->
Column;
2548 sum += n_p->
Value * yp[c];
2552 if (diag_p != NULL && diag_p->
Value != 0.0)
2554 yp[i] = (xp[i] - sum) / diag_p->
Value;
2556 else if (xp[i] == sum)
2562 mfem_error(
"SparseMatrix::Gauss_Seidel_back()");
2576 for (
int i =
s-1, j = Ip[
s]-1; i >= 0; i--)
2578 const int beg = Ip[i];
2581 for ( ; j >= beg; j--)
2583 const int c = Jp[j];
2590 sum += Ap[j] * yp[c];
2594 if (d >= 0 && Ap[d] != 0.0)
2596 yp[i] = (xp[i] - sum) / Ap[d];
2598 else if (xp[i] == sum)
2604 mfem_error(
"SparseMatrix::Gauss_Seidel_back(...) #2");
2612 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2615 for (
int i = 0; i <
height; i++)
2619 for (
int j =
I[i]; j <
I[i+1]; j++)
2627 if (d >= 0 &&
A[d] != 0.0)
2629 real_t a = 1.8 * fabs(
A[d]) / norm;
2637 mfem_error(
"SparseMatrix::GetJacobiScaling() #2");
2644 real_t sc,
bool use_abs_diag)
const
2646 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2648 for (
int i = 0; i <
height; i++)
2652 for (
int j =
I[i]; j <
I[i+1]; j++)
2660 sum -=
A[j] * x0(
J[j]);
2663 if (d >= 0 &&
A[d] != 0.0)
2665 const real_t diag = (use_abs_diag) ? fabs(
A[d]) :
A[d];
2666 x1(i) = sc * (sum / diag) + (1.0 - sc) * x0(i);
2676 real_t sc,
bool use_abs_diag)
const
2678 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2682 const bool use_dev =
b.UseDevice() || x.
UseDevice();
2684 const auto Ap =
Read(
A, nnz, use_dev);
2686 const auto Jp =
Read(
J, nnz, use_dev);
2688 const auto bp =
b.Read(use_dev);
2689 auto xp = x.
Write(use_dev);
2693 const int end = Ip[i+1];
2694 for (
int j = Ip[i];
true; j++)
2698 MFEM_ABORT_KERNEL(
"Diagonal not found in SparseMatrix::DiagScale");
2702 const real_t diag = (use_abs_diag) ? fabs(Ap[j]) : Ap[j];
2705 MFEM_ABORT_KERNEL(
"Zero diagonal in SparseMatrix::DiagScale");
2707 xp[i] = sc * bp[i] / diag;
2714template <
bool useFabs>
2722 const auto bp =
b.Read(useDevice);
2723 const auto x0p = x0.
Read(useDevice);
2724 auto x1p = x1.
Write(useDevice);
2726 const auto Ip =
Read(I, height+1, useDevice);
2732 real_t resi = bp[i], norm = 0.0;
2733 for (
int j = Ip[i]; j < Ip[i+1]; j++)
2735 resi -= Ap[j] * x0p[Jp[j]];
2738 norm += fabs(Ap[j]);
2747 x1p[i] = x0p[i] + sc * resi / norm;
2753 MFEM_ABORT_KERNEL(
"L1 norm of row is zero.");
2757 MFEM_ABORT_KERNEL(
"sum of row is zero.");
2766 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2767 JacobiDispatch<true>(
b,x0,x1,
I,
J,
A,
height,sc);
2773 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
2774 JacobiDispatch<false>(
b,x0,x1,
I,
J,
A,
height,sc);
2780 int i, j, gi, gj,
s,
t;
2790 for (i = 0; i < rows.
Size(); i++)
2792 if ((gi=rows[i]) < 0) { gi = -1-gi,
s = -1; }
2795 "Trying to insert a row " << gi <<
" outside the matrix height "
2798 for (j = 0; j < cols.
Size(); j++)
2800 if ((gj=cols[j]) < 0) { gj = -1-gj,
t = -
s; }
2802 MFEM_ASSERT(gj <
width,
2803 "Trying to insert a column " << gj <<
" outside the matrix width "
2806 if (skip_zeros &&
a == 0.0)
2811 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2816 if (
t < 0) {
a = -
a; }
2828 if ((gi=i) < 0) { gi = -1-gi,
s = -1; }
2831 "Trying to set a row " << gi <<
" outside the matrix height "
2833 if ((gj=j) < 0) { gj = -1-gj,
t = -
s; }
2835 MFEM_ASSERT(gj <
width,
2836 "Trying to set a column " << gj <<
" outside the matrix width "
2838 if (
t < 0) {
a = -
a; }
2847 if ((gi=i) < 0) { gi = -1-gi,
s = -1; }
2850 "Trying to insert a row " << gi <<
" outside the matrix height "
2852 if ((gj=j) < 0) { gj = -1-gj,
t = -
s; }
2854 MFEM_ASSERT(gj <
width,
2855 "Trying to insert a column " << gj <<
" outside the matrix width "
2857 if (
t < 0) {
a = -
a; }
2864 int i, j, gi, gj,
s,
t;
2867 for (i = 0; i < rows.
Size(); i++)
2869 if ((gi=rows[i]) < 0) { gi = -1-gi,
s = -1; }
2872 "Trying to set a row " << gi <<
" outside the matrix height "
2875 for (j = 0; j < cols.
Size(); j++)
2878 if (skip_zeros &&
a == 0.0)
2883 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2888 if ((gj=cols[j]) < 0) { gj = -1-gj,
t = -
s; }
2890 MFEM_ASSERT(gj <
width,
2891 "Trying to set a column " << gj <<
" outside the matrix width "
2893 if (
t < 0) {
a = -
a; }
2905 int i, j, gi, gj,
s,
t;
2908 for (i = 0; i < rows.
Size(); i++)
2910 if ((gi=rows[i]) < 0) { gi = -1-gi,
s = -1; }
2913 "Trying to set a row " << gi <<
" outside the matrix height "
2916 for (j = 0; j < cols.
Size(); j++)
2919 if (skip_zeros &&
a == 0.0)
2924 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2929 if ((gj=cols[j]) < 0) { gj = -1-gj,
t = -
s; }
2931 MFEM_ASSERT(gj <
width,
2932 "Trying to set a column " << gj <<
" outside the matrix width "
2934 if (
t < 0) {
a = -
a; }
2944 int i, j, gi, gj,
s,
t;
2947 for (i = 0; i < rows.
Size(); i++)
2949 if ((gi=rows[i]) < 0) { gi = -1-gi,
s = -1; }
2952 "Trying to read a row " << gi <<
" outside the matrix height "
2955 for (j = 0; j < cols.
Size(); j++)
2957 if ((gj=cols[j]) < 0) { gj = -1-gj,
t = -
s; }
2959 MFEM_ASSERT(gj <
width,
2960 "Trying to read a column " << gj <<
" outside the matrix width "
2963 subm(i, j) = (
t < 0) ? (-
a) : (
a);
2978 "Trying to query a row " << gi <<
" outside the matrix height "
2982 return (
Rows[gi] == NULL);
2986 return (
I[gi] ==
I[gi+1]);
2995 if ((gi=row) < 0) { gi = -1-gi; }
2997 "Trying to read a row " << gi <<
" outside the matrix height "
3001 for (n =
Rows[gi], j = 0; n; n = n->
Prev)
3007 for (n =
Rows[gi], j = 0; n; n = n->
Prev, j++)
3022 cols.
MakeRef(
const_cast<int*
>((
const int*)
J) + j,
I[gi+1]-j);
3025 MFEM_ASSERT(row >= 0,
"Row not valid: " << row <<
", height: " <<
height);
3036 if ((gi=row) < 0) { gi = -1-gi,
s = -1; }
3039 "Trying to set a row " << gi <<
" outside the matrix height "
3045 for (
int j = 0; j < cols.
Size(); j++)
3047 if ((gj=cols[j]) < 0) { gj = -1-gj,
t = -
s; }
3049 MFEM_ASSERT(gj <
width,
3050 "Trying to set a column " << gj <<
" outside the matrix"
3051 " width " <<
width);
3053 if (
t < 0) {
a = -
a; }
3061 MFEM_ASSERT(cols.
Size() == srow.
Size(),
"");
3063 for (
int i =
I[gi], j = 0; j < cols.
Size(); j++, i++)
3065 if ((gj=cols[j]) < 0) { gj = -1-gj,
t = -
s; }
3067 MFEM_ASSERT(gj <
width,
3068 "Trying to set a column " << gj <<
" outside the matrix"
3069 " width " <<
width);
3080 int j, gi, gj,
s,
t;
3083 MFEM_VERIFY(!
Finalized(),
"Matrix must NOT be finalized.");
3085 if ((gi=row) < 0) { gi = -1-gi,
s = -1; }
3088 "Trying to insert a row " << gi <<
" outside the matrix height "
3091 for (j = 0; j < cols.
Size(); j++)
3093 if ((gj=cols[j]) < 0) { gj = -1-gj,
t = -
s; }
3095 MFEM_ASSERT(gj <
width,
3096 "Trying to insert a column " << gj <<
" outside the matrix width "
3103 if (
t < 0) {
a = -
a; }
3121 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3123 aux -> Value *= scale;
3128 int j, end =
I[i+1];
3130 for (j =
I[i]; j < end; j++)
3143 for (
int i=0; i <
height; ++i)
3146 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3148 aux -> Value *= scale;
3156 for (
int i=0; i <
height; ++i)
3160 for (j =
I[i]; j < end; j++)
3173 for (
int i=0; i <
height; ++i)
3175 for (aux =
Rows[i]; aux != NULL; aux = aux -> Prev)
3177 aux -> Value *= sr(aux->
Column);
3185 for (
int i=0; i <
height; ++i)
3188 for (j =
I[i]; j < end; j++)
3199 "Mismatch of this matrix size and rhs. This height = "
3200 <<
height <<
", width = " <<
width <<
", B.height = "
3203 for (
int i = 0; i <
height; i++)
3208 for (
RowNode *aux = B.
Rows[i]; aux != NULL; aux = aux->Prev)
3210 _Add_(aux->Column, aux->Value);
3215 for (
int j = B.
I[i]; j < B.
I[i+1]; j++)
3228 for (
int i = 0; i <
height; i++)
3235 np->Value +=
a * B.
_Get_(np->Column);
3240 for (
int j =
I[i]; j <
I[i+1]; j++)
3255 for (
int i = 0; i < nnz; i++)
3262 for (
int i = 0; i <
height; i++)
3265 node_p = node_p -> Prev)
3267 node_p -> Value =
a;
3279 for (
int i = 0, nnz =
I[
height]; i < nnz; i++)
3286 for (
int i = 0; i <
height; i++)
3289 node_p = node_p -> Prev)
3291 node_p -> Value *=
a;
3306 for (i = 0; i <
height; i++)
3308 os <<
"[row " << i <<
"]\n";
3309 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3311 os <<
" (" << nd->
Column <<
"," << nd->
Value <<
")";
3312 if ( !((j+1) % width_) )
3329 for (i = 0; i <
height; i++)
3331 os <<
"[row " << i <<
"]\n";
3332 for (j =
I[i]; j <
I[i+1]; j++)
3334 os <<
" (" <<
J[j] <<
"," <<
A[j] <<
")";
3335 if ( !((j+1-
I[i]) % width_) )
3340 if ((j-
I[i]) % width_)
3349 os <<
"% size " <<
height <<
" " <<
width <<
"\n";
3353 ios::fmtflags old_fmt = os.flags();
3354 os.setf(ios::scientific);
3355 std::streamsize old_prec = os.precision(14);
3360 for (i = 0; i <
height; i++)
3362 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3364 os << i+1 <<
" " << nd->
Column+1 <<
" " << nd->
Value <<
'\n';
3374 for (i = 0; i <
height; i++)
3376 for (j =
I[i]; j <
I[i+1]; j++)
3378 os << i+1 <<
" " <<
J[j]+1 <<
" " <<
A[j] <<
'\n';
3384 os.precision(old_prec);
3391 ios::fmtflags old_fmt = os.flags();
3392 os.setf(ios::scientific);
3393 std::streamsize old_prec = os.precision(14);
3395 os <<
"%%MatrixMarket matrix coordinate real general" <<
'\n'
3396 <<
"% Generated by MFEM" <<
'\n';
3403 for (i = 0; i <
height; i++)
3405 for (nd =
Rows[i], j = 0; nd != NULL; nd = nd->
Prev, j++)
3407 os << i+1 <<
" " << nd->
Column+1 <<
" " << nd->
Value <<
'\n';
3417 for (i = 0; i <
height; i++)
3419 for (j =
I[i]; j <
I[i+1]; j++)
3421 os << i+1 <<
" " <<
J[j]+1 <<
" " <<
A[j] <<
'\n';
3425 os.precision(old_prec);
3431 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
3441 for (i = 0; i <=
height; i++)
3443 os <<
I[i]+1 <<
'\n';
3446 for (i = 0; i <
I[
height]; i++)
3448 os <<
J[i]+1 <<
'\n';
3451 for (i = 0; i <
I[
height]; i++)
3459 MFEM_VERIFY(
Finalized(),
"Matrix must be finalized.");
3464 os <<
width <<
'\n';
3470 for (i = 0; i <=
height; i++)
3475 for (i = 0; i <
I[
height]; i++)
3480 for (i = 0; i <
I[
height]; i++)
3488 const real_t MiB = 1024.*1024;
3500 "SparseMatrix statistics:\n"
3503 " Dimensions : " <<
height <<
" x " <<
width <<
"\n"
3504 " Number of entries (total) : " << nnz <<
"\n"
3505 " Number of entries (per row) : " << 1.*nnz/
Height() <<
"\n"
3506 " Number of stored zeros : " << nz*pz <<
"% (" << nz <<
")\n"
3507 " Number of Inf/Nan entries : " << nnf*pz <<
"% ("<< nnf <<
")\n"
3508 " Norm, max |a_ij| : " << max_norm <<
"\n"
3509 " Symmetry, max |a_ij-a_ji| : " << symm <<
"\n"
3510 " Number of small entries:\n"
3511 " |a_ij| <= 1e-12*Norm : " << ns12*pz <<
"% (" << ns12 <<
")\n"
3512 " |a_ij| <= 1e-15*Norm : " << ns15*pz <<
"% (" << ns15 <<
")\n"
3513 " |a_ij| <= 1e-18*Norm : " << ns18*pz <<
"% (" << ns18 <<
")\n";
3516 os <<
" Memory used by CSR : " <<
3517 (
sizeof(int)*(
height+1+nnz)+
sizeof(
real_t)*nnz)/MiB <<
" MiB\n";
3522#ifdef MFEM_USE_MEMALLOC
3525 for (
int i = 0; i <
height; i++)
3533 os <<
" Memory used by LIL : " << used_mem/MiB <<
" MiB\n";
3545#if !defined(MFEM_USE_MEMALLOC)
3546 for (
int i = 0; i <
height; i++)
3549 while (node_p != NULL)
3552 node_p = node_p->
Prev;
3562#ifdef MFEM_USE_MEMALLOC
3575 const int *start_j =
J;
3577 for (
const int *jptr = start_j; jptr != end_j; ++jptr)
3579 awidth = std::max(awidth, *jptr + 1);
3585 for (
int i = 0; i <
height; i++)
3587 for (aux =
Rows[i]; aux != NULL; aux = aux->
Prev)
3589 awidth = std::max(awidth, aux->
Column + 1);
3601 for (
int i = 0; i < n; i++)
3611 "Finalize must be called before Transpose. Use TransposeRowMatrix instead");
3614 const int *A_i, *A_j;
3615 int m, n, nnz, *At_i, *At_j;
3630 for (i = 0; i <= n; i++)
3634 for (i = 0; i < nnz; i++)
3638 for (i = 1; i < n; i++)
3640 At_i[i+1] += At_i[i];
3643 for (i = j = 0; i < m; i++)
3646 for ( ; j < end; j++)
3648 At_j[At_i[A_j[j]]] = i;
3649 At_data[At_i[A_j[j]]] = A_data[j];
3654 for (i = n; i > 0; i--)
3656 At_i[i] = At_i[i-1];
3667 int m, n, nnz, *At_i, *At_j;
3677 for (i = 0; i < m; i++)
3679 A.
GetRow(i, Acols, Avals);
3701 for (i = 0; i <= n; i++)
3706 for (i = 0; i < m; i++)
3708 A.
GetRow(i, Acols, Avals);
3709 for (j = 0; j<Acols.
Size(); ++j)
3714 for (i = 1; i < n; i++)
3716 At_i[i+1] += At_i[i];
3719 for (i = 0; i < m; i++)
3721 A.
GetRow(i, Acols, Avals);
3722 for (j = 0; j<Acols.
Size(); ++j)
3724 At_j[At_i[Acols[j]]] = i;
3725 At_data[At_i[Acols[j]]] = Avals[j];
3730 for (i = n; i > 0; i--)
3732 At_i[i] = At_i[i-1];
3743 int nrowsA, ncolsA, nrowsB, ncolsB;
3744 const int *A_i, *A_j, *B_i, *B_j;
3745 int *C_i, *C_j, *B_marker;
3746 const real_t *A_data, *B_data;
3748 int ia, ib, ic, ja, jb, num_nonzeros;
3749 int row_start, counter;
3758 MFEM_VERIFY(ncolsA == nrowsB,
3759 "number of columns of A (" << ncolsA
3760 <<
") must equal number of rows of B (" << nrowsB <<
")");
3769 B_marker =
new int[ncolsB];
3771 for (ib = 0; ib < ncolsB; ib++)
3780 C_i[0] = num_nonzeros = 0;
3781 for (ic = 0; ic < nrowsA; ic++)
3783 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3786 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3789 if (B_marker[jb] != ic)
3796 C_i[ic+1] = num_nonzeros;
3802 C =
new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3804 for (ib = 0; ib < ncolsB; ib++)
3813 MFEM_VERIFY(nrowsA == C->
Height() && ncolsB == C->
Width(),
3814 "Input matrix sizes do not match output sizes"
3815 <<
" nrowsA = " << nrowsA
3816 <<
", C->Height() = " << C->
Height()
3817 <<
" ncolsB = " << ncolsB
3818 <<
", C->Width() = " << C->
Width());
3826 for (ic = 0; ic < nrowsA; ic++)
3829 row_start = counter;
3830 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3833 a_entry = A_data[ia];
3834 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3837 b_entry = B_data[ib];
3838 if (B_marker[jb] < row_start)
3840 B_marker[jb] = counter;
3845 C_data[counter] = a_entry*b_entry;
3850 C_data[B_marker[jb]] += a_entry*b_entry;
3858 "With pre-allocated output matrix, number of non-zeros ("
3860 <<
") did not match number of entries changed from matrix-matrix multiply, "
3879 int nrowsA, ncolsA, nrowsB, ncolsB;
3880 int *C_i, *C_j, *B_marker;
3882 int ia, ib, ic, ja, jb, num_nonzeros;
3883 int row_start, counter;
3892 MFEM_VERIFY(ncolsA == nrowsB,
3893 "number of columns of A (" << ncolsA
3894 <<
") must equal number of rows of B (" << nrowsB <<
")");
3896 B_marker =
new int[ncolsB];
3898 for (ib = 0; ib < ncolsB; ib++)
3905 C_i[0] = num_nonzeros = 0;
3909 for (ic = 0; ic < nrowsA; ic++)
3911 A.
GetRow(ic, colsA, dataA);
3912 for (ia = 0; ia < colsA.
Size(); ia++)
3915 B.
GetRow(ja, colsB, dataB);
3916 for (ib = 0; ib < colsB.
Size(); ib++)
3919 if (B_marker[jb] != ic)
3926 C_i[ic+1] = num_nonzeros;
3932 C =
new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3934 for (ib = 0; ib < ncolsB; ib++)
3940 for (ic = 0; ic < nrowsA; ic++)
3942 row_start = counter;
3943 A.
GetRow(ic, colsA, dataA);
3944 for (ia = 0; ia < colsA.
Size(); ia++)
3947 a_entry = dataA[ia];
3948 B.
GetRow(ja, colsB, dataB);
3949 for (ib = 0; ib < colsB.
Size(); ib++)
3952 b_entry = dataB[ib];
3953 if (B_marker[jb] < row_start)
3955 B_marker[jb] = counter;
3957 C_data[counter] = a_entry*b_entry;
3962 C_data[B_marker[jb]] += a_entry*b_entry;
3977 for (
int j = 0; j < B.
Width(); ++j)
3981 A.
Mult(columnB, columnC);
3991 Mult (R, *AP, *RAP_);
4034 int i, At_nnz, *At_j;
4038 At_nnz = At -> NumNonZeroElems();
4039 At_j = At -> GetJ();
4040 At_data = At -> GetData();
4041 for (i = 0; i < At_nnz; i++)
4043 At_data[i] *= D(At_j[i]);
4054 int ncols = A.
Width();
4060 const int *A_i = A.
GetI();
4061 const int *A_j = A.
GetJ();
4064 const int *B_i = B.
GetI();
4065 const int *B_j = B.
GetJ();
4068 int * marker =
new int[ncols];
4069 std::fill(marker, marker+ncols, -1);
4071 int num_nonzeros = 0, jcol;
4073 for (
int ic = 0; ic < nrows; ic++)
4075 for (
int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4081 for (
int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4084 if (marker[jcol] != ic)
4090 C_i[ic+1] = num_nonzeros;
4096 for (
int ia = 0; ia < ncols; ia++)
4102 for (
int ic = 0; ic < nrows; ic++)
4104 for (
int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4108 C_data[pos] =
a*A_data[ia];
4112 for (
int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4115 if (marker[jcol] < C_i[ic])
4118 C_data[pos] =
b*B_data[ib];
4124 C_data[marker[jcol]] +=
b*B_data[ib];
4130 return new SparseMatrix(C_i, C_j, C_data, nrows, ncols);
4135 return Add(1.,A,1.,B);
4140 MFEM_ASSERT(Ai.
Size() > 0,
"invalid size Ai.Size() = " << Ai.
Size());
4145 for (
int i=1; i < Ai.
Size(); ++i)
4147 result =
Add(*accumulate, *Ai[i]);
4153 accumulate = result;
4163 for (
int r = 0; r < B.
Height(); r++)
4167 for (
int i=0; i<A.
RowSize(r); i++)
4169 B(r, colA[i]) +=
alpha * valA[i];
4182 for (
int i=0; i<mA; i++)
4184 for (
int j=0; j<nA; j++)
4186 C->
AddMatrix(A(i,j), B, i * mB, j * nB);
4200 for (
int i=0; i<mA; i++)
4202 for (
int j=0; j<nA; j++)
4204 for (
int r=0; r<mB; r++)
4209 for (
int cj=0; cj<B.
RowSize(r); cj++)
4211 C->
Set(i * mB + r, j * nB + colB[cj], A(i,j) * valB[cj]);
4229 for (
int r=0; r<mA; r++)
4234 for (
int aj=0; aj<A.
RowSize(r); aj++)
4236 for (
int i=0; i<mB; i++)
4238 for (
int j=0; j<nB; j++)
4240 C->
Set(r * mB + i, colA[aj] * nB + j, valA[aj] * B(i, j));
4258 for (
int ar=0; ar<mA; ar++)
4263 for (
int aj=0; aj<A.
RowSize(ar); aj++)
4265 for (
int br=0; br<mB; br++)
4270 for (
int bj=0; bj<B.
RowSize(br); bj++)
4272 C->
Set(ar * mB + br, colA[aj] * nB + colB[bj],
4273 valA[aj] * valB[bj]);
4296#ifdef MFEM_USE_MEMALLOC
4306#ifdef MFEM_USE_CUDA_OR_HIP
4313 MFEM_cu_or_hip(sparseDestroy)(
handle);
4318 MFEM_Cu_or_Hip(MemFree)(
dBuffer);
Abstract data type for sparse matrices.
virtual int NumNonZeroElems() const =0
Returns the number of non-zeros in a matrix.
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const =0
Gets the columns indexes and values for row row.
Dynamic 2D array using row-major layout.
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
T Max() const
Find the maximal element in the array, using the comparison operator < for class T.
void Sort()
Sorts the array in ascending order. This requires operator< to be defined for T.
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
int Size() const
Return the logical size of the array.
void MakeRef(T *data_, int size_, bool own_data=false)
Make this Array a reference to a pointer.
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Data type dense matrix using column-major storage.
void GetColumnReference(int c, Vector &col)
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
void AddMatrix(DenseMatrix &A, int ro, int co)
Perform (ro+i,co+j)+=A(i,j) for 0<=i.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Abstract data type for matrix inverse.
size_t MemoryUsage() const
Class used by MFEM to store pointers to host and/or device memory.
void SetHostPtrOwner(bool own) const
Set/clear the ownership flag for the host pointer. Ownership indicates whether the pointer will be de...
int Capacity() const
Return the size of the allocated memory.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void Wrap(T *ptr, int size, bool own)
Wrap an externally allocated host pointer, ptr with the current host memory type returned by MemoryMa...
void Delete()
Delete the owned pointers and reset the Memory object.
void ClearOwnerFlags() const
Clear the ownership flags for the host and device pointers, as well as any internal data allocated by...
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
int width
Dimension of the input / number of columns in the matrix.
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
int height
Dimension of the output / number of rows in the matrix.
DiagonalPolicy
Defines operator diagonal policy upon elimination of rows and/or columns.
@ DIAG_ONE
Set the diagonal value to one.
@ DIAG_KEEP
Keep the diagonal value.
@ DIAG_ZERO
Set the diagonal value to zero.
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
void PrintCSR2(std::ostream &out) const
Prints a sparse matrix to stream out in CSR format.
const int * HostReadJ() const
void GetDiag(Vector &d) const
Returns the Diagonal of A.
real_t GetRowNorml1(int irow) const
For i = irow compute .
virtual void MultTranspose(const Vector &x, Vector &y) const
Multiply a vector with the transposed matrix. y = At * x.
void PartMult(const Array< int > &rows, const Vector &x, Vector &y) const
bool isSorted
Are the columns sorted already.
void Gauss_Seidel_back(const Vector &x, Vector &y) const
bool RowIsEmpty(const int row) const
void ClearColPtr() const
Reset the "current row" set by calling SetColPtr(). This method must be called between any two calls ...
void Jacobi3(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
real_t * ReadWriteData(bool on_dev=true)
Memory< real_t > A
Array with size I[height], containing the actual entries of the sparse matrix, as indexed by the I ar...
void EliminateRowCol(int rc, const real_t sol, Vector &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Eliminate row rc and column rc and modify the rhs using sol.
SparseMatrix & operator*=(real_t a)
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const
Extract all column indices and values from a given row.
void SetWidth(int width_=-1)
Change the width of a SparseMatrix.
MemAlloc< RowNode, 1024 > RowNodeAlloc
bool Empty() const
Check if the SparseMatrix is empty.
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
SparseMatrix * At
Transpose of A. Owned. Used to perform MultTranspose() on devices.
virtual MatrixInverse * Inverse() const
This virtual method is not supported: it always returns NULL.
void PartAddMult(const Array< int > &rows, const Vector &x, Vector &y, const real_t a=1.0) const
void GetRowSums(Vector &x) const
For all i compute .
bool Finalized() const
Returns whether or not CSR format has been finalized.
void EliminateBC(const Array< int > &ess_dofs, DiagonalPolicy diag_policy)
Eliminate essential (Dirichlet) boundary conditions.
void PrintInfo(std::ostream &out) const
Print various sparse matrix statistics.
virtual void AddMultTranspose(const Vector &x, Vector &y, const real_t a=1.0) const
y += At * x (default) or y += a * At * x
void MoveDiagonalFirst()
Move the diagonal entry to the first position in each row, preserving the order of the rest of the co...
void Add(const int i, const int j, const real_t val)
void EliminateRowColMultipleRHS(int rc, const Vector &sol, DenseMatrix &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Similar to EliminateRowCol(int, const double, Vector &, DiagonalPolicy), but multiple values for elim...
void Jacobi(const Vector &b, const Vector &x0, Vector &x1, real_t sc, bool use_abs_diag=false) const
void SetColPtr(const int row) const
Initialize the SparseMatrix for fast access to the entries of the given row which becomes the "curren...
void SetDiagIdentity()
If a row contains only one diag entry of zero, set it to 1.
void AbsMultTranspose(const Vector &x, Vector &y) const
y = |At| * x, using entry-wise absolute values of the transpose of matrix A
void Gauss_Seidel_forw(const Vector &x, Vector &y) const
Gauss-Seidel forward and backward iterations over a vector x.
void MakeRef(const SparseMatrix &master)
Clear the contents of the SparseMatrix and make it a reference to master.
int CheckFinite() const
Count the number of entries that are NOT finite, i.e. Inf or Nan.
Memory< int > J
Array with size I[height], containing the column indices for all matrix entries, as indexed by the I ...
void EliminateCol(int col, DiagonalPolicy dpolicy=DIAG_ZERO)
Eliminates the column col from the matrix.
void EnsureMultTranspose() const
Ensures that the matrix is capable of performing MultTranspose(), AddMultTranspose(),...
void _Add_(const int col, const real_t a)
Add a value to an entry in the "current row". See SetColPtr().
SparseMatrix()
Create an empty SparseMatrix.
cusparseSpMatDescr_t matA_descr
void AbsMult(const Vector &x, Vector &y) const
y = |A| * x, using entry-wise absolute values of matrix A
void ClearGPUSparse()
Clear the cuSPARSE/hipSPARSE descriptors. This must be called after releasing the device memory of A.
void Threshold(real_t tol, bool fix_empty_rows=false)
Remove entries smaller in absolute value than a given tolerance tol. If fix_empty_rows is true,...
void SetSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
const int * ReadI(bool on_dev=true) const
real_t InnerProduct(const Vector &x, const Vector &y) const
Compute y^t A x.
Memory< int > I
Array with size (height+1) containing the row offsets.
real_t & operator()(int i, int j)
Returns reference to A[i][j].
const real_t * HostReadData() const
void PrintMM(std::ostream &out=mfem::out) const
Prints matrix in Matrix Market sparse format.
void ScaleColumns(const Vector &sr)
this = this * diag(sr);
virtual void PrintMatlab(std::ostream &out=mfem::out) const
Prints matrix in matlab format.
void PrintCSR(std::ostream &out) const
Prints matrix to stream out in hypre_CSRMatrix format.
void Swap(SparseMatrix &other)
int * ReadWriteJ(bool on_dev=true)
cusparseDnVecDescr_t vecY_descr
void BooleanMultTranspose(const Array< int > &x, Array< int > &y) const
y = At * x, treating all entries as booleans (zero=false, nonzero=true).
RowNode ** Rows
Array of linked lists, one for every row. This array represents the linked list (LIL) storage format.
real_t & SearchRow(const int col)
Perform a fast search for an entry in the "current row". See SetColPtr().
static int SparseMatrixCount
void AddSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void Symmetrize()
(*this) = 1/2 ((*this) + (*this)^t)
void BooleanMult(const Array< int > &x, Array< int > &y) const
y = A * x, treating all entries as booleans (zero=false, nonzero=true).
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
real_t GetJacobiScaling() const
Determine appropriate scaling for Jacobi iteration.
int RowSize(const int i) const
Returns the number of elements in row i.
real_t * HostReadWriteData()
int CountSmallElems(real_t tol) const
Count the number of entries with |a_ij| <= tol.
const int * ReadJ(bool on_dev=true) const
virtual void Mult(const Vector &x, Vector &y) const
Matrix vector multiplication.
virtual real_t & Elem(int i, int j)
Returns reference to a_{ij}.
int MaxRowSize() const
Returns the maximum number of elements among all rows.
real_t _Get_(const int col) const
Read the value of an entry in the "current row". See SetColPtr().
virtual ~SparseMatrix()
Destroys sparse matrix.
SparseMatrix & operator=(const SparseMatrix &rhs)
Assignment operator: deep copy.
virtual void EliminateZeroRows(const real_t threshold=1e-12)
If a row contains only zeros, set its diagonal to 1.
void OverrideSize(int height_, int width_)
Sets the height and width of the matrix.
static cusparseHandle_t handle
void EliminateRow(int row, const real_t sol, Vector &rhs)
Eliminates a column from the transpose matrix.
void GetBlocks(Array2D< SparseMatrix * > &blocks) const
SparseMatrix & operator+=(const SparseMatrix &B)
Add the sparse matrix 'B' to '*this'. This operation will cause an error if '*this' is finalized and ...
void Clear()
Clear the contents of the SparseMatrix.
void ResetTranspose() const
virtual void Finalize(int skip_zeros=1)
Finalize the matrix initialization, switching the storage format from LIL to CSR.
void EliminateRowColDiag(int rc, real_t value)
Perform elimination and set the diagonal entry to the given value.
real_t * GetRowEntries(const int row)
Return a pointer to the entries in a row.
void ScaleRows(const Vector &sl)
this = diag(sl) * this;
void SetRow(const int row, const Array< int > &cols, const Vector &srow)
virtual void AddMult(const Vector &x, Vector &y, const real_t a=1.0) const
y += A * x (default) or y += a * A * x
void DiagScale(const Vector &b, Vector &x, real_t sc=1.0, bool use_abs_diag=false) const
x = sc b / A_ii. When use_abs_diag = true, |A_ii| is used.
real_t * GetData()
Return the element data, i.e. the array A.
void SetSubMatrixTranspose(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void BuildTranspose() const
Build and store internally the transpose of this matrix which will be used in the methods AddMultTran...
void SortColumnIndices()
Sort the column indices corresponding to each row.
real_t IsSymmetric() const
Returns max_{i,j} |(i,j)-(j,i)| for a finalized matrix.
DenseMatrix * ToDenseMatrix() const
Produces a DenseMatrix from a SparseMatrix.
void _Set_(const int col, const real_t a)
Set an entry in the "current row". See SetColPtr().
void EliminateCols(const Array< int > &cols, const Vector *x=NULL, Vector *b=NULL)
Eliminate all columns i for which cols[i] != 0.
int * GetJ()
Return the array J.
cusparseDnVecDescr_t vecX_descr
const real_t * ReadData(bool on_dev=true) const
virtual int NumNonZeroElems() const
Returns the number of the nonzero elements in the matrix.
void ScaleRow(const int row, const real_t scale)
void AddRow(const int row, const Array< int > &cols, const Vector &srow)
void Jacobi2(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
int * GetI()
Return the array I.
int ActualWidth() const
Returns the actual Width of the matrix.
const int * HostReadI() const
void Set(const int i, const int j, const real_t val)
void Print(std::ostream &out=mfem::out, int width_=4) const
Prints matrix to stream out.
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
void Neg()
(*this) = -(*this)
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
int Size() const
Returns the size of the vector.
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
void SetSize(int s)
Resize the vector to size s.
void NewDataAndSize(real_t *d, int s)
Set the Vector data and size, deleting the old data, if owned.
real_t * GetData() const
Return a pointer to the beginning of the Vector data.
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
DenseMatrix * OuterProduct(const DenseMatrix &A, const DenseMatrix &B)
Produces a block matrix with blocks A_{ij}*B.
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
void Swap(Array< T > &, Array< T > &)
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
void mfem_error(const char *msg)
void Mult(const Table &A, const Table &B, Table &C)
C = A * B (as boolean matrices)
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory<T> &mem, int size, false)
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
bool IsFinite(const real_t &val)
void Transpose(const Table &A, Table &At, int ncols_A_)
Transpose a Table.
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
int CheckFinite(const real_t *v, const int n)
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory<T> &mem, int size, false)
SparseMatrix * MultAbstractSparseMatrix(const AbstractSparseMatrix &A, const AbstractSparseMatrix &B)
Matrix product of sparse matrices. A and B do not need to be CSR matrices.
void * HipMemFree(void *dptr)
Frees device memory.
SparseMatrix * TransposeAbstractSparseMatrix(const AbstractSparseMatrix &A, int useActualWidth)
Transpose of a sparse matrix. A does not need to be a CSR matrix.
SparseMatrix * Mult_AtDA(const SparseMatrix &A, const Vector &D, SparseMatrix *OAtDA)
Matrix multiplication A^t D A. All matrices must be finalized.
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
SparseMatrix * TransposeMult(const SparseMatrix &A, const SparseMatrix &B)
C = A^T B.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
void forall(int N, lambda &&body)
void forall_switch(bool use_dev, int N, lambda &&body)
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
void SparseMatrixFunction(SparseMatrix &S, real_t(*f)(real_t))
Applies f() to each element of the matrix (after it is finalized).
@ HIP_MASK
Biwise-OR of all HIP backends.
@ CPU_MASK
Biwise-OR of all CPU backends.
@ CUDA_MASK
Biwise-OR of all CUDA backends.