4.5/sparsemat_8cpp_source.html

 // Copyright (c) 2010-2022, Lawrence Livermore National Security, LLC. Produced

 // at the Lawrence Livermore National Laboratory. All Rights reserved. See files

 // LICENSE and NOTICE for details. LLNL-CODE-806117.

 //

 // This file is part of the MFEM library. For more information and source code

 // availability visit https://mfem.org.

 //

 // MFEM is free software; you can redistribute it and/or modify it under the

 // terms of the BSD-3 license. We welcome feedback and contributions, see file

 // CONTRIBUTING.md for details.


 // Implementation of sparse matrix


 #include "linalg.hpp"

 #include "../general/forall.hpp"

 #include "../general/table.hpp"

 #include "../general/sort_pairs.hpp"

 #include "../general/backends.hpp"


 #include <iostream>

 #include <iomanip>

 #include <cmath>

 #include <algorithm>

 #include <limits>

 #include <cstring>


 #if defined(MFEM_USE_CUDA)

 #define MFEM_cu_or_hip(stub) cu##stub

 #define MFEM_Cu_or_Hip(stub) Cu##stub

 #define MFEM_CU_or_HIP(stub) CU##stub

 #define MFEM_CUDA_or_HIP(stub) CUDA##stub


 #if CUSPARSE_VERSION >=  11400

 #define MFEM_GPUSPARSE_ALG CUSPARSE_SPMV_CSR_ALG1

 #else // CUSPARSE_VERSION >= 11400

 #define MFEM_GPUSPARSE_ALG CUSPARSE_CSRMV_ALG1

 #endif // CUSPARSE_VERSION >= 11400


 #elif defined(MFEM_USE_HIP)

 #define MFEM_cu_or_hip(stub) hip##stub

 #define MFEM_Cu_or_Hip(stub) Hip##stub

 #define MFEM_CU_or_HIP(stub) HIP##stub

 #define MFEM_CUDA_or_HIP(stub) HIP##stub


 // https://hipsparse.readthedocs.io/en/latest/usermanual.html#hipsparsespmvalg-t

 #define MFEM_GPUSPARSE_ALG HIPSPARSE_CSRMV_ALG1

 #endif // defined(MFEM_USE_CUDA)


 namespace mfem

 {


 using namespace std;


 #ifdef MFEM_USE_CUDA_OR_HIP

 int SparseMatrix::SparseMatrixCount = 0;

 // doxygen doesn't like the macro-assisted typename so let's skip parsing it:

 // \cond false

 MFEM_cu_or_hip(sparseHandle_t) SparseMatrix::handle = nullptr;

 // \endcond

 size_t SparseMatrix::bufferSize = 0;

 void * SparseMatrix::dBuffer = nullptr;

 #endif // MFEM_USE_CUDA_OR_HIP


 void SparseMatrix::InitGPUSparse()

 {

    // Initialize cuSPARSE/hipSPARSE library

 #ifdef MFEM_USE_CUDA_OR_HIP

    if (Device::Allows(Backend::CUDA_MASK | Backend::HIP_MASK))

    {

       if (!handle) { MFEM_cu_or_hip(sparseCreate)(&handle); }

       useGPUSparse=true;

       SparseMatrixCount++;

    }

    else

    {

       useGPUSparse=false;

    }

 #endif // MFEM_USE_CUDA_OR_HIP

 }


 void SparseMatrix::ClearGPUSparse()

 {

 #ifdef MFEM_USE_CUDA_OR_HIP

    if (initBuffers)

    {

 #if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)

       MFEM_cu_or_hip(sparseDestroySpMat)(matA_descr);

       MFEM_cu_or_hip(sparseDestroyDnVec)(vecX_descr);

       MFEM_cu_or_hip(sparseDestroyDnVec)(vecY_descr);

 #else

       cusparseDestroyMatDescr(matA_descr);

 #endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)

       initBuffers = false;

    }

 #endif // MFEM_USE_CUDA_OR_HIP

 }


 SparseMatrix::SparseMatrix(int nrows, int ncols)

    : AbstractSparseMatrix(nrows, (ncols >= 0) ? ncols : nrows),

      Rows(new RowNode *[nrows]),

      current_row(-1),

      ColPtrJ(NULL),

      ColPtrNode(NULL),

      At(NULL),

      isSorted(false)

 {

    // We probably do not need to set the ownership flags here.

    I.SetHostPtrOwner(true);

    J.SetHostPtrOwner(true);

    A.SetHostPtrOwner(true);


    for (int i = 0; i < nrows; i++)

    {

       Rows[i] = NULL;

    }


 #ifdef MFEM_USE_MEMALLOC

    NodesMem = new RowNodeAlloc;

 #endif


    InitGPUSparse();

 }


 SparseMatrix::SparseMatrix(int *i, int *j, double *data, int m, int n)

    : AbstractSparseMatrix(m, n),

      Rows(NULL),

      ColPtrJ(NULL),

      ColPtrNode(NULL),

      At(NULL),

      isSorted(false)

 {

    I.Wrap(i, height+1, true);

    J.Wrap(j, I[height], true);

    A.Wrap(data, I[height], true);


 #ifdef MFEM_USE_MEMALLOC

    NodesMem = NULL;

 #endif


    InitGPUSparse();

 }


 SparseMatrix::SparseMatrix(int *i, int *j, double *data, int m, int n,

                            bool ownij, bool owna, bool issorted)

    : AbstractSparseMatrix(m, n),

      Rows(NULL),

      ColPtrJ(NULL),

      ColPtrNode(NULL),

      At(NULL),

      isSorted(issorted)

 {

    I.Wrap(i, height+1, ownij);

    J.Wrap(j, I[height], ownij);


 #ifdef MFEM_USE_MEMALLOC

    NodesMem = NULL;

 #endif


    if (data)

    {

       A.Wrap(data, I[height], owna);

    }

    else

    {

       const int nnz = I[height];

       A.New(nnz);

       for (int ii=0; ii<nnz; ++ii)

       {

          A[ii] = 0.0;

       }

    }


    InitGPUSparse();

 }


 SparseMatrix::SparseMatrix(int nrows, int ncols, int rowsize)

    : AbstractSparseMatrix(nrows, ncols)

    , Rows(NULL)

    , ColPtrJ(NULL)

    , ColPtrNode(NULL)

    , At(NULL)

    , isSorted(false)

 {

 #ifdef MFEM_USE_MEMALLOC

    NodesMem = NULL;

 #endif

    I.New(nrows + 1);

    J.New(nrows * rowsize);

    A.New(nrows * rowsize);


    for (int i = 0; i <= nrows; i++)

    {

       I[i] = i * rowsize;

    }


    InitGPUSparse();

 }


 SparseMatrix::SparseMatrix(const SparseMatrix &mat, bool copy_graph,

                            MemoryType mt)

    : AbstractSparseMatrix(mat.Height(), mat.Width())

 {

    if (mat.Finalized())

    {

       mat.HostReadI();

       const int nnz = mat.I[height];

       if (copy_graph)

       {

          I.New(height+1, mt == MemoryType::PRESERVE ? mat.I.GetMemoryType() : mt);

          J.New(nnz, mt == MemoryType::PRESERVE ? mat.J.GetMemoryType() : mt);

          I.CopyFrom(mat.I, height+1);

          J.CopyFrom(mat.J, nnz);

       }

       else

       {

          I = mat.I;

          J = mat.J;

          I.ClearOwnerFlags();

          J.ClearOwnerFlags();

       }

       A.New(nnz, mt == MemoryType::PRESERVE ? mat.A.GetMemoryType() : mt);

       A.CopyFrom(mat.A, nnz);


       Rows = NULL;

 #ifdef MFEM_USE_MEMALLOC

       NodesMem = NULL;

 #endif

    }

    else

    {

 #ifdef MFEM_USE_MEMALLOC

       NodesMem = new RowNodeAlloc;

 #endif

       Rows = new RowNode *[height];

       for (int i = 0; i < height; i++)

       {

          RowNode **node_pp = &Rows[i];

          for (RowNode *node_p = mat.Rows[i]; node_p; node_p = node_p->Prev)

          {

 #ifdef MFEM_USE_MEMALLOC

             RowNode *new_node_p = NodesMem->Alloc();

 #else

             RowNode *new_node_p = new RowNode;

 #endif

             new_node_p->Value = node_p->Value;

             new_node_p->Column = node_p->Column;

             *node_pp = new_node_p;

             node_pp = &new_node_p->Prev;

          }

          *node_pp = NULL;

       }


       // We probably do not need to set the ownership flags here.

       I.SetHostPtrOwner(true);

       J.SetHostPtrOwner(true);

       A.SetHostPtrOwner(true);

    }


    current_row = -1;

    ColPtrJ = NULL;

    ColPtrNode = NULL;

    At = NULL;

    isSorted = mat.isSorted;


    InitGPUSparse();

 }


 SparseMatrix::SparseMatrix(const Vector &v)

    : AbstractSparseMatrix(v.Size(), v.Size())

    , Rows(NULL)

    , ColPtrJ(NULL)

    , ColPtrNode(NULL)

    , At(NULL)

    , isSorted(true)

 {

 #ifdef MFEM_USE_MEMALLOC

    NodesMem = NULL;

 #endif

    I.New(height + 1);

    J.New(height);

    A.New(height);


    for (int i = 0; i <= height; i++)

    {

       I[i] = i;

    }


    for (int r=0; r<height; r++)

    {

       J[r] = r;

       A[r] = v[r];

    }


    InitGPUSparse();

 }


 void SparseMatrix::OverrideSize(int height_, int width_)

 {

    height = height_;

    width = width_;

 }


 SparseMatrix& SparseMatrix::operator=(const SparseMatrix &rhs)

 {

    Clear();


    SparseMatrix copy(rhs);

    Swap(copy);


    return *this;

 }


 void SparseMatrix::MakeRef(const SparseMatrix &master)

 {

    MFEM_ASSERT(master.Finalized(), "'master' must be finalized");

    Clear();

    height = master.Height();

    width = master.Width();

    I = master.I; I.ClearOwnerFlags();

    J = master.J; J.ClearOwnerFlags();

    A = master.A; A.ClearOwnerFlags();

    isSorted = master.isSorted;

 }


 void SparseMatrix::SetEmpty()

 {

    height = width = 0;

    I.Reset();

    J.Reset();

    A.Reset();

    Rows = NULL;

    current_row = -1;

    ColPtrJ = NULL;

    ColPtrNode = NULL;

    At = NULL;

 #ifdef MFEM_USE_MEMALLOC

    NodesMem = NULL;

 #endif

    isSorted = false;


    ClearGPUSparse();

 }


 int SparseMatrix::RowSize(const int i) const

 {

    int gi = i;

    if (gi < 0)

    {

       gi = -1-gi;

    }


    if (I)

    {

       return I[gi+1]-I[gi];

    }


    int s = 0;

    RowNode *row = Rows[gi];

    for ( ; row != NULL; row = row->Prev)

       if (row->Value != 0.0)

       {

          s++;

       }

    return s;

 }


 int SparseMatrix::MaxRowSize() const

 {

    int max_row_size=0;

    int rowSize=0;

    if (I)

    {

       for (int i=0; i < height; ++i)

       {

          rowSize = I[i+1]-I[i];

          max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;

       }

    }

    else

    {

       for (int i=0; i < height; ++i)

       {

          rowSize = RowSize(i);

          max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;

       }

    }


    return max_row_size;

 }


 int *SparseMatrix::GetRowColumns(const int row)

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    return J + I[row];

 }


 const int *SparseMatrix::GetRowColumns(const int row) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    return J + I[row];

 }


 double *SparseMatrix::GetRowEntries(const int row)

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    return A + I[row];

 }


 const double *SparseMatrix::GetRowEntries(const int row) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    return A + I[row];

 }


 void SparseMatrix::SetWidth(int newWidth)

 {

    if (newWidth == width)

    {

       // Nothing to be done here

       return;

    }

    else if (newWidth == -1)

    {

       // Compute the actual width

       width = ActualWidth();

       // No need to reset the ColPtr, since the new ColPtr will be shorter.

    }

    else if (newWidth > width)

    {

       // We need to reset ColPtr, since now we may have additional columns.

       if (Rows != NULL)

       {

          delete [] ColPtrNode;

          ColPtrNode = static_cast<RowNode **>(NULL);

       }

       else

       {

          delete [] ColPtrJ;

          ColPtrJ = static_cast<int *>(NULL);

       }

       width = newWidth;

    }

    else

    {

       // Check that the new width is bigger or equal to the actual width.

       MFEM_ASSERT(newWidth >= ActualWidth(),

                   "The new width needs to be bigger or equal to the actual width");

       width = newWidth;

    }

 }


 void SparseMatrix::SortColumnIndices()

 {

    MFEM_VERIFY(Finalized(), "Matrix is not Finalized!");


    if (isSorted)

    {

       return;

    }


 #ifdef MFEM_USE_CUDA_OR_HIP

    if ( Device::Allows( Backend::CUDA_MASK ))

    {

 #if defined(MFEM_USE_CUDA)

       size_t pBufferSizeInBytes = 0;

       void *pBuffer = NULL;


       const int n = Height();

       const int m = Width();

       const int nnzA = J.Capacity();

       double * d_a_sorted = ReadWriteData();

       const int * d_ia = ReadI();

       int * d_ja_sorted = ReadWriteJ();

       csru2csrInfo_t sortInfoA;


       cusparseMatDescr_t matA_descr;

       cusparseCreateMatDescr( &matA_descr );

       cusparseSetMatIndexBase( matA_descr, CUSPARSE_INDEX_BASE_ZERO );

       cusparseSetMatType( matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL );


       cusparseCreateCsru2csrInfo( &sortInfoA );


       cusparseDcsru2csr_bufferSizeExt( handle, n, m, nnzA, d_a_sorted, d_ia,

                                        d_ja_sorted, sortInfoA,

                                        &pBufferSizeInBytes);


       CuMemAlloc( &pBuffer, pBufferSizeInBytes );


       cusparseDcsru2csr( handle, n, m, nnzA, matA_descr, d_a_sorted, d_ia,

                          d_ja_sorted, sortInfoA, pBuffer);


       cusparseDestroyCsru2csrInfo( sortInfoA );

       cusparseDestroyMatDescr( matA_descr );


       CuMemFree( pBuffer );

 #endif

    }

    else if ( Device::Allows( Backend::HIP_MASK ))

    {

 #if defined(MFEM_USE_HIP)

       size_t pBufferSizeInBytes = 0;

       void *pBuffer = NULL;

       int *P = NULL;


       const int n = Height();

       const int m = Width();

       const int nnzA = J.Capacity();

       double * d_a_sorted = ReadWriteData();

       const int * d_ia = ReadI();

       int * d_ja_sorted = ReadWriteJ();


       hipsparseMatDescr_t descrA;

       hipsparseCreateMatDescr( &descrA );

       // FIXME: There is not in-place version of csr sort in hipSPARSE currently, so we make

       //        a temporary copy of the data for gthr, sort that, and then copy the sorted values

       //        back to the array being returned. Where there is an in-place version available,

       //        we should use it.

       Array< double > a_tmp( nnzA );

       double *d_a_tmp = a_tmp.Write();


       hipsparseXcsrsort_bufferSizeExt(handle, n, m, nnzA, d_ia, d_ja_sorted,

                                       &pBufferSizeInBytes);


       HipMemAlloc( &pBuffer, pBufferSizeInBytes );

       HipMemAlloc( &P, nnzA * sizeof(int) );


       hipsparseCreateIdentityPermutation(handle, nnzA, P);

       hipsparseXcsrsort(handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P, pBuffer);


       hipsparseDgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,

                      HIPSPARSE_INDEX_BASE_ZERO);


       A.CopyFrom( a_tmp.GetMemory(), nnzA );

       hipsparseDestroyMatDescr( descrA );


       HipMemFree( pBuffer );

       HipMemFree( P );

 #endif

    }

    else

 #endif // MFEM_USE_CUDA_OR_HIP

    {

       const int * Ip=HostReadI();

       HostReadWriteJ();

       HostReadWriteData();


       Array<Pair<int,double> > row;

       for (int j = 0, i = 0; i < height; i++)

       {

          int end = Ip[i+1];

          row.SetSize(end - j);

          for (int k = 0; k < row.Size(); k++)

          {

             row[k].one = J[j+k];

             row[k].two = A[j+k];

          }

          row.Sort();

          for (int k = 0; k < row.Size(); k++, j++)

          {

             J[j] = row[k].one;

             A[j] = row[k].two;

          }

       }

    }

    isSorted = true;

 }


 void SparseMatrix::MoveDiagonalFirst()

 {

    MFEM_VERIFY(Finalized(), "Matrix is not Finalized!");


    for (int row = 0, end = 0; row < height; row++)

    {

       int start = end, j;

       end = I[row+1];

       for (j = start; true; j++)

       {

          MFEM_VERIFY(j < end, "diagonal entry not found in row = " << row);

          if (J[j] == row) { break; }

       }

       const double diag = A[j];

       for ( ; j > start; j--)

       {

          J[j] = J[j-1];

          A[j] = A[j-1];

       }

       J[start] = row;

       A[start] = diag;

    }

 }


 double &SparseMatrix::Elem(int i, int j)

 {

    return operator()(i,j);

 }


 const double &SparseMatrix::Elem(int i, int j) const

 {

    return operator()(i,j);

 }


 double &SparseMatrix::operator()(int i, int j)

 {

    MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,

                "Trying to access element outside of the matrix.  "

                << "height = " << height << ", "

                << "width = " << width << ", "

                << "i = " << i << ", "

                << "j = " << j);


    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    for (int k = I[i], end = I[i+1]; k < end; k++)

    {

       if (J[k] == j)

       {

          return A[k];

       }

    }


    MFEM_ABORT("Did not find i = " << i << ", j = " << j << " in matrix.");

    return A[0];

 }


 const double &SparseMatrix::operator()(int i, int j) const

 {

    static const double zero = 0.0;


    MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,

                "Trying to access element outside of the matrix.  "

                << "height = " << height << ", "

                << "width = " << width << ", "

                << "i = " << i << ", "

                << "j = " << j);


    if (Finalized())

    {

       for (int k = I[i], end = I[i+1]; k < end; k++)

       {

          if (J[k] == j)

          {

             return A[k];

          }

       }

    }

    else

    {

       for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)

       {

          if (node_p->Column == j)

          {

             return node_p->Value;

          }

       }

    }


    return zero;

 }


 void SparseMatrix::GetDiag(Vector & d) const

 {

    MFEM_VERIFY(height == width, "Matrix must be square, not height = "

                << height << ", width = " << width);

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    d.SetSize(height);


    const auto II = this->ReadI();

    const auto JJ = this->ReadJ();

    const auto AA = this->ReadData();

    auto dd = d.Write();


    MFEM_FORALL(i, height,

    {

       const int begin = II[i];

       const int end = II[i+1];

       int j;

       for (j = begin; j < end; j++)

       {

          if (JJ[j] == i)

          {

             dd[i] = AA[j];

             break;

          }

       }

       if (j == end)

       {

          dd[i] = 0.;

       }

    });

 }


 /// Produces a DenseMatrix from a SparseMatrix

 DenseMatrix *SparseMatrix::ToDenseMatrix() const

 {

    int num_rows = this->Height();

    int num_cols = this->Width();


    DenseMatrix * B = new DenseMatrix(num_rows, num_cols);


    this->ToDenseMatrix(*B);


    return B;

 }


 /// Produces a DenseMatrix from a SparseMatrix

 void SparseMatrix::ToDenseMatrix(DenseMatrix & B) const

 {

    B.SetSize(height, width);

    B = 0.0;


    for (int r=0; r<height; r++)

    {

       const int    * col = this->GetRowColumns(r);

       const double * val = this->GetRowEntries(r);


       for (int cj=0; cj<this->RowSize(r); cj++)

       {

          B(r, col[cj]) = val[cj];

       }

    }

 }


 void SparseMatrix::Mult(const Vector &x, Vector &y) const

 {

    if (Finalized()) { y.UseDevice(true); }

    y = 0.0;

    AddMult(x, y);

 }


 void SparseMatrix::AddMult(const Vector &x, Vector &y, const double a) const

 {

    MFEM_ASSERT(width == x.Size(), "Input vector size (" << x.Size()

                << ") must match matrix width (" << width << ")");

    MFEM_ASSERT(height == y.Size(), "Output vector size (" << y.Size()

                << ") must match matrix height (" << height << ")");


    if (!Finalized())

    {

       const double *xp = x.HostRead();

       double *yp = y.HostReadWrite();


       // The matrix is not finalized, but multiplication is still possible

       for (int i = 0; i < height; i++)

       {

          RowNode *row = Rows[i];

          double b = 0.0;

          for ( ; row != NULL; row = row->Prev)

          {

             b += row->Value * xp[row->Column];

          }

          *yp += a * b;

          yp++;

       }

       return;

    }


 #ifndef MFEM_USE_LEGACY_OPENMP

    const int height = this->height;

    const int nnz = J.Capacity();

    auto d_I = Read(I, height+1);

    auto d_J = Read(J, nnz);

    auto d_A = Read(A, nnz);

    auto d_x = x.Read();

    auto d_y = y.ReadWrite();


    // Skip if matrix has no non-zeros

    if (nnz == 0) {return;}

    if ((Device::Allows(Backend::CUDA_MASK | Backend::HIP_MASK)) && useGPUSparse)

    {

 #ifdef MFEM_USE_CUDA_OR_HIP

       const double alpha = a;

       const double beta  = 1.0;


       // Setup descriptors

       if (!initBuffers)

       {

 #if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)

          // Setup matrix descriptor

          MFEM_cu_or_hip(sparseCreateCsr)(

             &matA_descr,Height(),

             Width(),

             J.Capacity(),

             const_cast<int *>(d_I),

             const_cast<int *>(d_J),

             const_cast<double *>(d_A),

             MFEM_CU_or_HIP(SPARSE_INDEX_32I),

             MFEM_CU_or_HIP(SPARSE_INDEX_32I),

             MFEM_CU_or_HIP(SPARSE_INDEX_BASE_ZERO),

             MFEM_CUDA_or_HIP(_R_64F));


          // Create handles for input/output vectors

          MFEM_cu_or_hip(sparseCreateDnVec)(&vecX_descr,

                                            x.Size(),

                                            const_cast<double *>(d_x),

                                            MFEM_CUDA_or_HIP(_R_64F));

          MFEM_cu_or_hip(sparseCreateDnVec)(&vecY_descr, y.Size(), d_y,

                                            MFEM_CUDA_or_HIP(_R_64F));

 #else

          cusparseCreateMatDescr(&matA_descr);

          cusparseSetMatIndexBase(matA_descr, CUSPARSE_INDEX_BASE_ZERO);

          cusparseSetMatType(matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL);

 #endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)

          initBuffers = true;

       }

       // Allocate kernel space. Buffer is shared between different sparsemats

       size_t newBufferSize = 0;


       MFEM_cu_or_hip(sparseSpMV_bufferSize)(

          handle,

          MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),

          &alpha,

          matA_descr,

          vecX_descr,

          &beta,

          vecY_descr,

          MFEM_CUDA_or_HIP(_R_64F),

          MFEM_GPUSPARSE_ALG,

          &newBufferSize);


       // Check if we need to resize

       if (newBufferSize > bufferSize)

       {

          bufferSize = newBufferSize;

          if (dBuffer != nullptr) { MFEM_Cu_or_Hip(MemFree)(dBuffer); }

          MFEM_Cu_or_Hip(MemAlloc)(&dBuffer, bufferSize);

       }


 #if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)

       // Update input/output vectors

       MFEM_cu_or_hip(sparseDnVecSetValues)(vecX_descr,

                                            const_cast<double *>(d_x));

       MFEM_cu_or_hip(sparseDnVecSetValues)(vecY_descr, d_y);


       // Y = alpha A * X + beta * Y

       MFEM_cu_or_hip(sparseSpMV)(

          handle,

          MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),

          &alpha,

          matA_descr,

          vecX_descr,

          &beta,

          vecY_descr,

          MFEM_CUDA_or_HIP(_R_64F),

          MFEM_GPUSPARSE_ALG,

          dBuffer);

 #else

       cusparseDcsrmv(handle,

                      CUSPARSE_OPERATION_NON_TRANSPOSE,

                      Height(),

                      Width(),

                      J.Capacity(),

                      &alpha,

                      matA_descr,

                      const_cast<double *>(d_A),

                      const_cast<int *>(d_I),

                      const_cast<int *>(d_J),

                      const_cast<double *>(d_x),

                      &beta,

                      d_y);

 #endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)

 #endif // MFEM_USE_CUDA_OR_HIP

    }

    else

    {

       // Native version

       MFEM_FORALL(i, height,

       {

          double d = 0.0;

          const int end = d_I[i+1];

          for (int j = d_I[i]; j < end; j++)

          {

             d += d_A[j] * d_x[d_J[j]];

          }

          d_y[i] += a * d;

       });


    }


 #else // MFEM_USE_LEGACY_OPENMP

    const double *Ap = A, *xp = x.GetData();

    double *yp = y.GetData();

    const int *Jp = J, *Ip = I;


    #pragma omp parallel for

    for (int i = 0; i < height; i++)

    {

       double d = 0.0;

       const int end = Ip[i+1];

       for (int j = Ip[i]; j < end; j++)

       {

          d += Ap[j] * xp[Jp[j]];

       }

       yp[i] += a * d;

    }

 #endif // MFEM_USE_LEGACY_OPENMP

 }


 void SparseMatrix::MultTranspose(const Vector &x, Vector &y) const

 {

    if (Finalized()) { y.UseDevice(true); }

    y = 0.0;

    AddMultTranspose(x, y);

 }


 void SparseMatrix::AddMultTranspose(const Vector &x, Vector &y,

                                     const double a) const

 {

    MFEM_ASSERT(height == x.Size(), "Input vector size (" << x.Size()

                << ") must match matrix height (" << height << ")");

    MFEM_ASSERT(width == y.Size(), "Output vector size (" << y.Size()

                << ") must match matrix width (" << width << ")");


    if (!Finalized())

    {

       double *yp = y.HostReadWrite();

       const double *xp = x.HostRead();

       // The matrix is not finalized, but multiplication is still possible

       for (int i = 0; i < height; i++)

       {

          RowNode *row = Rows[i];

          double b = a * xp[i];

          for ( ; row != NULL; row = row->Prev)

          {

             yp[row->Column] += row->Value * b;

          }

       }

       return;

    }


    EnsureMultTranspose();

    if (At)

    {

       At->AddMult(x, y, a);

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          const double xi = a * x[i];

          const int end = I[i+1];

          for (int j = I[i]; j < end; j++)

          {

             const int Jj = J[j];

             y[Jj] += A[j] * xi;

          }

       }

    }

 }


 void SparseMatrix::BuildTranspose() const

 {

    if (At == NULL)

    {

       At = Transpose(*this);

    }

 }


 void SparseMatrix::ResetTranspose() const

 {

    delete At;

    At = NULL;

 }


 void SparseMatrix::EnsureMultTranspose() const

 {

    if (Device::Allows(~Backend::CPU_MASK))

    {

       BuildTranspose();

    }

 }


 void SparseMatrix::PartMult(

    const Array<int> &rows, const Vector &x, Vector &y) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    const int n = rows.Size();

    const int nnz = J.Capacity();

    auto d_rows = rows.Read();

    auto d_I = Read(I, height+1);

    auto d_J = Read(J, nnz);

    auto d_A = Read(A, nnz);

    auto d_x = x.Read();

    auto d_y = y.Write();

    MFEM_FORALL(i, n,

    {

       const int r = d_rows[i];

       const int end = d_I[r + 1];

       double a = 0.0;

       for (int j = d_I[r]; j < end; j++)

       {

          a += d_A[j] * d_x[d_J[j]];

       }

       d_y[r] = a;

    });

 }


 void SparseMatrix::PartAddMult(

    const Array<int> &rows, const Vector &x, Vector &y, const double a) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    for (int i = 0; i < rows.Size(); i++)

    {

       int r = rows[i];

       int end = I[r + 1];

       double val = 0.0;

       for (int j = I[r]; j < end; j++)

       {

          val += A[j] * x(J[j]);

       }

       y(r) += a * val;

    }

 }


 void SparseMatrix::BooleanMult(const Array<int> &x, Array<int> &y) const

 {

    MFEM_ASSERT(Finalized(), "Matrix must be finalized.");

    MFEM_ASSERT(x.Size() == Width(), "Input vector size (" << x.Size()

                << ") must match matrix width (" << Width() << ")");


    y.SetSize(Height(), Device::GetDeviceMemoryType());


    const int height = Height();

    const int nnz = J.Capacity();

    auto d_I = Read(I, height+1);

    auto d_J = Read(J, nnz);

    auto d_x = Read(x.GetMemory(), x.Size());

    auto d_y = Write(y.GetMemory(), y.Size());

    MFEM_FORALL(i, height,

    {

       bool d_yi = false;

       const int end = d_I[i+1];

       for (int j = d_I[i]; j < end; j++)

       {

          if (d_x[d_J[j]])

          {

             d_yi = true;

             break;

          }

       }

       d_y[i] = d_yi;

    });

 }


 void SparseMatrix::BooleanMultTranspose(const Array<int> &x,

                                         Array<int> &y) const

 {

    MFEM_ASSERT(Finalized(), "Matrix must be finalized.");

    MFEM_ASSERT(x.Size() == Height(), "Input vector size (" << x.Size()

                << ") must match matrix height (" << Height() << ")");


    y.SetSize(Width());

    y = 0;


    for (int i = 0; i < Height(); i++)

    {

       if (x[i])

       {

          int end = I[i+1];

          for (int j = I[i]; j < end; j++)

          {

             y[J[j]] = x[i];

          }

       }

    }

 }


 void SparseMatrix::AbsMult(const Vector &x, Vector &y) const

 {

    MFEM_ASSERT(width == x.Size(), "Input vector size (" << x.Size()

                << ") must match matrix width (" << width << ")");

    MFEM_ASSERT(height == y.Size(), "Output vector size (" << y.Size()

                << ") must match matrix height (" << height << ")");


    if (Finalized()) { y.UseDevice(true); }

    y = 0.0;


    if (!Finalized())

    {

       const double *xp = x.HostRead();

       double *yp = y.HostReadWrite();


       // The matrix is not finalized, but multiplication is still possible

       for (int i = 0; i < height; i++)

       {

          RowNode *row = Rows[i];

          double b = 0.0;

          for ( ; row != NULL; row = row->Prev)

          {

             b += std::abs(row->Value) * xp[row->Column];

          }

          *yp += b;

          yp++;

       }

       return;

    }


    const int height = this->height;

    const int nnz = J.Capacity();

    auto d_I = Read(I, height+1);

    auto d_J = Read(J, nnz);

    auto d_A = Read(A, nnz);

    auto d_x = x.Read();

    auto d_y = y.ReadWrite();

    MFEM_FORALL(i, height,

    {

       double d = 0.0;

       const int end = d_I[i+1];

       for (int j = d_I[i]; j < end; j++)

       {

          d += std::abs(d_A[j]) * d_x[d_J[j]];

       }

       d_y[i] += d;

    });

 }


 void SparseMatrix::AbsMultTranspose(const Vector &x, Vector &y) const

 {

    MFEM_ASSERT(height == x.Size(), "Input vector size (" << x.Size()

                << ") must match matrix height (" << height << ")");

    MFEM_ASSERT(width == y.Size(), "Output vector size (" << y.Size()

                << ") must match matrix width (" << width << ")");


    y = 0.0;


    if (!Finalized())

    {

       double *yp = y.GetData();

       // The matrix is not finalized, but multiplication is still possible

       for (int i = 0; i < height; i++)

       {

          RowNode *row = Rows[i];

          double b = x(i);

          for ( ; row != NULL; row = row->Prev)

          {

             yp[row->Column] += fabs(row->Value) * b;

          }

       }

       return;

    }


    EnsureMultTranspose();

    if (At)

    {

       At->AbsMult(x, y);

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          const double xi = x[i];

          const int end = I[i+1];

          for (int j = I[i]; j < end; j++)

          {

             const int Jj = J[j];

             y[Jj] += std::abs(A[j]) * xi;

          }

       }

    }

 }


 double SparseMatrix::InnerProduct(const Vector &x, const Vector &y) const

 {

    MFEM_ASSERT(x.Size() == Width(), "x.Size() = " << x.Size()

                << " must be equal to Width() = " << Width());

    MFEM_ASSERT(y.Size() == Height(), "y.Size() = " << y.Size()

                << " must be equal to Height() = " << Height());


    x.HostRead();

    y.HostRead();

    if (Finalized())

    {

       const int nnz = J.Capacity();

       HostRead(I, height+1);

       HostRead(J, nnz);

       HostRead(A, nnz);

    }


    double prod = 0.0;

    for (int i = 0; i < height; i++)

    {

       double a = 0.0;

       if (A)

       {

          for (int j = I[i], end = I[i+1]; j < end; j++)

          {

             a += A[j] * x(J[j]);

          }

       }

       else

       {

          for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)

          {

             a += np->Value * x(np->Column);

          }

       }

       prod += a * y(i);

    }


    return prod;

 }


 void SparseMatrix::GetRowSums(Vector &x) const

 {

    for (int i = 0; i < height; i++)

    {

       double a = 0.0;

       if (A)

       {

          for (int j = I[i], end = I[i+1]; j < end; j++)

          {

             a += A[j];

          }

       }

       else

       {

          for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)

          {

             a += np->Value;

          }

       }

       x(i) = a;

    }

 }


 double SparseMatrix::GetRowNorml1(int irow) const

 {

    MFEM_VERIFY(irow < height,

                "row " << irow << " not in matrix with height " << height);


    double a = 0.0;

    if (A)

    {

       for (int j = I[irow], end = I[irow+1]; j < end; j++)

       {

          a += fabs(A[j]);

       }

    }

    else

    {

       for (RowNode *np = Rows[irow]; np != NULL; np = np->Prev)

       {

          a += fabs(np->Value);

       }

    }


    return a;

 }


 void SparseMatrix::Threshold(double tol, bool fix_empty_rows)

 {

    MFEM_ASSERT(Finalized(), "Matrix must be finalized.");

    double atol;

    atol = std::abs(tol);


    fix_empty_rows = height == width ? fix_empty_rows : false;


    double *newA;

    int *newI, *newJ;

    int i, j, nz;


    newI = Memory<int>(height+1);

    newI[0] = 0;

    for (i = 0, nz = 0; i < height; i++)

    {

       bool found = false;

       for (j = I[i]; j < I[i+1]; j++)

          if (std::abs(A[j]) > atol)

          {

             found = true;

             nz++;

          }

       if (fix_empty_rows && !found) { nz++; }

       newI[i+1] = nz;

    }


    newJ = Memory<int>(nz);

    newA = Memory<double>(nz);

    // Assume we're sorted until we find out otherwise

    isSorted = true;

    for (i = 0, nz = 0; i < height; i++)

    {

       bool found = false;

       int lastCol = -1;

       for (j = I[i]; j < I[i+1]; j++)

          if (std::abs(A[j]) > atol)

          {

             found = true;

             newJ[nz] = J[j];

             newA[nz] = A[j];

             if ( lastCol > newJ[nz] )

             {

                isSorted = false;

             }

             lastCol = newJ[nz];

             nz++;

          }

       if (fix_empty_rows && !found)

       {

          newJ[nz] = i;

          newA[nz] = 0.0;

          nz++;

       }

    }

    Destroy();

    I.Wrap(newI, height+1, true);

    J.Wrap(newJ, I[height], true);

    A.Wrap(newA, I[height], true);

 }


 void SparseMatrix::Finalize(int skip_zeros, bool fix_empty_rows)

 {

    int i, j, nr, nz;

    RowNode *aux;


    if (Finalized())

    {

       return;

    }


    delete [] ColPtrNode;

    ColPtrNode = NULL;


    I.New(height+1);

    I[0] = 0;

    for (i = 1; i <= height; i++)

    {

       nr = 0;

       for (aux = Rows[i-1]; aux != NULL; aux = aux->Prev)

       {

          if (skip_zeros && aux->Value == 0.0)

          {

             if (skip_zeros == 2) { continue; }

             if ((i-1) != aux->Column) { continue; }


             bool found = false;

             double found_val;

             for (RowNode *other = Rows[aux->Column]; other != NULL; other = other->Prev)

             {

                if (other->Column == (i-1))

                {

                   found = true;

                   found_val = other->Value;

                   break;

                }

             }

             if (found && found_val == 0.0) { continue; }


          }

          nr++;

       }

       if (fix_empty_rows && !nr) { nr = 1; }

       I[i] = I[i-1] + nr;

    }


    nz = I[height];

    J.New(nz);

    A.New(nz);

    // Assume we're sorted until we find out otherwise

    isSorted = true;

    for (j = i = 0; i < height; i++)

    {

       int lastCol = -1;

       nr = 0;

       for (aux = Rows[i]; aux != NULL; aux = aux->Prev)

       {

          if (skip_zeros && aux->Value == 0.0)

          {

             if (skip_zeros == 2) { continue; }

             if (i != aux->Column) { continue; }


             bool found = false;

             double found_val;

             for (RowNode *other = Rows[aux->Column]; other != NULL; other = other->Prev)

             {

                if (other->Column == i)

                {

                   found = true;

                   found_val = other->Value;

                   break;

                }

             }

             if (found && found_val == 0.0) { continue; }

          }


          J[j] = aux->Column;

          A[j] = aux->Value;


          if ( lastCol > J[j] )

          {

             isSorted = false;

          }

          lastCol = J[j];


          j++;

          nr++;

       }

       if (fix_empty_rows && !nr)

       {

          J[j] = i;

          A[j] = 1.0;

          j++;

       }

    }


 #ifdef MFEM_USE_MEMALLOC

    delete NodesMem;

    NodesMem = NULL;

 #else

    for (i = 0; i < height; i++)

    {

       RowNode *node_p = Rows[i];

       while (node_p != NULL)

       {

          aux = node_p;

          node_p = node_p->Prev;

          delete aux;

       }

    }

 #endif


    delete [] Rows;

    Rows = NULL;

 }


 void SparseMatrix::GetBlocks(Array2D<SparseMatrix *> &blocks) const

 {

    int br = blocks.NumRows(), bc = blocks.NumCols();

    int nr = (height + br - 1)/br, nc = (width + bc - 1)/bc;


    for (int j = 0; j < bc; j++)

    {

       for (int i = 0; i < br; i++)

       {

          int *bI = Memory<int>(nr + 1);

          for (int k = 0; k <= nr; k++)

          {

             bI[k] = 0;

          }

          blocks(i,j) = new SparseMatrix(bI, NULL, NULL, nr, nc);

       }

    }


    for (int gr = 0; gr < height; gr++)

    {

       int bi = gr/nr, i = gr%nr + 1;

       if (Finalized())

       {

          for (int j = I[gr]; j < I[gr+1]; j++)

          {

             if (A[j] != 0.0)

             {

                blocks(bi, J[j]/nc)->I[i]++;

             }

          }

       }

       else

       {

          for (RowNode *n_p = Rows[gr]; n_p != NULL; n_p = n_p->Prev)

          {

             if (n_p->Value != 0.0)

             {

                blocks(bi, n_p->Column/nc)->I[i]++;

             }

          }

       }

    }


    for (int j = 0; j < bc; j++)

    {

       for (int i = 0; i < br; i++)

       {

          SparseMatrix &b = *blocks(i,j);

          int nnz = 0, rs;

          for (int k = 1; k <= nr; k++)

          {

             rs = b.I[k], b.I[k] = nnz, nnz += rs;

          }

          b.J.New(nnz);

          b.A.New(nnz);

       }

    }


    for (int gr = 0; gr < height; gr++)

    {

       int bi = gr/nr, i = gr%nr + 1;

       if (Finalized())

       {

          for (int j = I[gr]; j < I[gr+1]; j++)

          {

             if (A[j] != 0.0)

             {

                SparseMatrix &b = *blocks(bi, J[j]/nc);

                b.J[b.I[i]] = J[j] % nc;

                b.A[b.I[i]] = A[j];

                b.I[i]++;

             }

          }

       }

       else

       {

          for (RowNode *n_p = Rows[gr]; n_p != NULL; n_p = n_p->Prev)

          {

             if (n_p->Value != 0.0)

             {

                SparseMatrix &b = *blocks(bi, n_p->Column/nc);

                b.J[b.I[i]] = n_p->Column % nc;

                b.A[b.I[i]] = n_p->Value;

                b.I[i]++;

             }

          }

       }

    }

 }


 double SparseMatrix::IsSymmetric() const

 {

    if (height != width)

    {

       return infinity();

    }


    double symm = 0.0;

    if (Empty())

    {

       // return 0.0;

    }

    else if (Finalized())

    {

       for (int i = 1; i < height; i++)

       {

          for (int j = I[i]; j < I[i+1]; j++)

          {

             if (J[j] < i)

             {

                symm = std::max(symm, std::abs(A[j]-(*this)(J[j],i)));

             }

          }

       }

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)

          {

             int col = node_p->Column;

             if (col < i)

             {

                symm = std::max(symm, std::abs(node_p->Value-(*this)(col,i)));

             }

          }

       }

    }

    return symm;

 }


 void SparseMatrix::Symmetrize()

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    int i, j;

    for (i = 1; i < height; i++)

    {

       for (j = I[i]; j < I[i+1]; j++)

       {

          if (J[j] < i)

          {

             A[j] += (*this)(J[j],i);

             A[j] *= 0.5;

             (*this)(J[j],i) = A[j];

          }

       }

    }

 }


 int SparseMatrix::NumNonZeroElems() const

 {

    if (Finalized())

    {

       HostReadI();

       return I[height];

    }

    else

    {

       int nnz = 0;


       for (int i = 0; i < height; i++)

       {

          for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)

          {

             nnz++;

          }

       }


       return nnz;

    }

 }


 double SparseMatrix::MaxNorm() const

 {

    double m = 0.0;


    if (A)

    {

       int nnz = I[height];

       for (int j = 0; j < nnz; j++)

       {

          m = std::max(m, std::abs(A[j]));

       }

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          for (RowNode *n_p = Rows[i]; n_p != NULL; n_p = n_p->Prev)

          {

             m = std::max(m, std::abs(n_p->Value));

          }

       }

    }

    return m;

 }


 int SparseMatrix::CountSmallElems(double tol) const

 {

    int counter = 0;


    if (A)

    {

       const int nz = I[height];

       const double *Ap = A;


       for (int i = 0; i < nz; i++)

       {

          counter += (std::abs(Ap[i]) <= tol);

       }

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)

          {

             counter += (std::abs(aux->Value) <= tol);

          }

       }

    }


    return counter;

 }


 int SparseMatrix::CheckFinite() const

 {

    if (Empty())

    {

       return 0;

    }

    else if (Finalized())

    {

       return mfem::CheckFinite(A, I[height]);

    }

    else

    {

       int counter = 0;

       for (int i = 0; i < height; i++)

       {

          for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)

          {

             counter += !IsFinite(aux->Value);

          }

       }

       return counter;

    }

 }


 MatrixInverse *SparseMatrix::Inverse() const

 {

    return NULL;

 }


 void SparseMatrix::EliminateRow(int row, const double sol, Vector &rhs)

 {

    RowNode *aux;


    MFEM_ASSERT(row < height && row >= 0,

                "Row " << row << " not in matrix of height " << height);


    MFEM_VERIFY(!Finalized(), "Matrix must NOT be finalized.");


    for (aux = Rows[row]; aux != NULL; aux = aux->Prev)

    {

       rhs(aux->Column) -= sol * aux->Value;

       aux->Value = 0.0;

    }

 }


 void SparseMatrix::EliminateRow(int row, DiagonalPolicy dpolicy)

 {

    RowNode *aux;


    MFEM_ASSERT(row < height && row >= 0,

                "Row " << row << " not in matrix of height " << height);

    MFEM_ASSERT(dpolicy != DIAG_KEEP, "Diagonal policy must not be DIAG_KEEP");

    MFEM_ASSERT(dpolicy != DIAG_ONE || height == width,

                "if dpolicy == DIAG_ONE, matrix must be square, not height = "

                << height << ",  width = " << width);


    if (Rows == NULL)

    {

       for (int i=I[row]; i < I[row+1]; ++i)

       {

          A[i]=0.0;

       }

    }

    else

    {

       for (aux = Rows[row]; aux != NULL; aux = aux->Prev)

       {

          aux->Value = 0.0;

       }

    }


    if (dpolicy == DIAG_ONE)

    {

       SearchRow(row, row) = 1.;

    }

 }


 void SparseMatrix::EliminateCol(int col, DiagonalPolicy dpolicy)

 {

    MFEM_ASSERT(col < width && col >= 0,

                "Col " << col << " not in matrix of width " << width);

    MFEM_ASSERT(dpolicy != DIAG_KEEP, "Diagonal policy must not be DIAG_KEEP");

    MFEM_ASSERT(dpolicy != DIAG_ONE || height == width,

                "if dpolicy == DIAG_ONE, matrix must be square, not height = "

                << height << ",  width = " << width);


    if (Rows == NULL)

    {

       const int nnz = I[height];

       for (int jpos = 0; jpos != nnz; ++jpos)

       {

          if (J[jpos] == col)

          {

             A[jpos] = 0.0;

          }

       }

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)

          {

             if (aux->Column == col)

             {

                aux->Value = 0.0;

             }

          }

       }

    }


    if (dpolicy == DIAG_ONE)

    {

       SearchRow(col, col) = 1.0;

    }

 }


 void SparseMatrix::EliminateCols(const Array<int> &cols, const Vector *x,

                                  Vector *b)

 {

    if (Rows == NULL)

    {

       for (int i = 0; i < height; i++)

       {

          for (int jpos = I[i]; jpos != I[i+1]; ++jpos)

          {

             if (cols[ J[jpos]])

             {

                if (x && b)

                {

                   (*b)(i) -= A[jpos] * (*x)( J[jpos] );

                }

                A[jpos] = 0.0;

             }

          }

       }

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)

          {

             if (cols[aux -> Column])

             {

                if (x && b)

                {

                   (*b)(i) -= aux -> Value * (*x)(aux -> Column);

                }

                aux->Value = 0.0;

             }

          }

       }

    }

 }


 void SparseMatrix::EliminateCols(const Array<int> &col_marker, SparseMatrix &Ae)

 {

    if (Rows)

    {

       RowNode *nd;

       for (int row = 0; row < height; row++)

       {

          for (nd = Rows[row]; nd != NULL; nd = nd->Prev)

          {

             if (col_marker[nd->Column])

             {

                Ae.Add(row, nd->Column, nd->Value);

                nd->Value = 0.0;

             }

          }

       }

    }

    else

    {

       for (int row = 0; row < height; row++)

       {

          for (int j = I[row]; j < I[row+1]; j++)

          {

             if (col_marker[J[j]])

             {

                Ae.Add(row, J[j], A[j]);

                A[j] = 0.0;

             }

          }

       }

    }

 }


 void SparseMatrix::EliminateRowCol(int rc, const double sol, Vector &rhs,

                                    DiagonalPolicy dpolicy)

 {

    MFEM_ASSERT(rc < height && rc >= 0,

                "Row " << rc << " not in matrix of height " << height);

    HostReadWriteI();

    HostReadWriteJ();

    HostReadWriteData();


    if (Rows == NULL)

    {

       for (int j = I[rc]; j < I[rc+1]; j++)

       {

          const int col = J[j];

          if (col == rc)

          {

             switch (dpolicy)

             {

                case DIAG_KEEP:

                   rhs(rc) = A[j] * sol;

                   break;

                case DIAG_ONE:

                   A[j] = 1.0;

                   rhs(rc) = sol;

                   break;

                case DIAG_ZERO:

                   A[j] = 0.;

                   rhs(rc) = 0.;

                   break;

                default:

                   mfem_error("SparseMatrix::EliminateRowCol () #2");

                   break;

             }

          }

          else

          {

             A[j] = 0.0;

             for (int k = I[col]; 1; k++)

             {

                if (k == I[col+1])

                {

                   mfem_error("SparseMatrix::EliminateRowCol () #3");

                }

                else if (J[k] == rc)

                {

                   rhs(col) -= sol * A[k];

                   A[k] = 0.0;

                   break;

                }

             }

          }

       }

    }

    else

    {

       for (RowNode *aux = Rows[rc]; aux != NULL; aux = aux->Prev)

       {

          const int col = aux->Column;

          if (col == rc)

          {

             switch (dpolicy)

             {

                case DIAG_KEEP:

                   rhs(rc) = aux->Value * sol;

                   break;

                case DIAG_ONE:

                   aux->Value = 1.0;

                   rhs(rc) = sol;

                   break;

                case DIAG_ZERO:

                   aux->Value = 0.;

                   rhs(rc) = 0.;

                   break;

                default:

                   mfem_error("SparseMatrix::EliminateRowCol () #4");

                   break;

             }

          }

          else

          {

             aux->Value = 0.0;

             for (RowNode *node = Rows[col]; 1; node = node->Prev)

             {

                if (node == NULL)

                {

                   mfem_error("SparseMatrix::EliminateRowCol () #5");

                }

                else if (node->Column == rc)

                {

                   rhs(col) -= sol * node->Value;

                   node->Value = 0.0;

                   break;

                }

             }

          }

       }

    }

 }


 void SparseMatrix::EliminateRowColMultipleRHS(int rc, const Vector &sol,

                                               DenseMatrix &rhs,

                                               DiagonalPolicy dpolicy)

 {

    MFEM_ASSERT(rc < height && rc >= 0,

                "Row " << rc << " not in matrix of height " << height);

    MFEM_ASSERT(sol.Size() == rhs.Width(), "solution size (" << sol.Size()

                << ") must match rhs width (" << rhs.Width() << ")");


    const int num_rhs = rhs.Width();

    if (Rows == NULL)

    {

       for (int j = I[rc]; j < I[rc+1]; j++)

       {

          const int col = J[j];

          if (col == rc)

          {

             switch (dpolicy)

             {

                case DIAG_KEEP:

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(rc,r) = A[j] * sol(r);

                   }

                   break;

                case DIAG_ONE:

                   A[j] = 1.0;

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(rc,r) = sol(r);

                   }

                   break;

                case DIAG_ZERO:

                   A[j] = 0.;

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(rc,r) = 0.;

                   }

                   break;

                default:

                   mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #3");

                   break;

             }

          }

          else

          {

             A[j] = 0.0;

             for (int k = I[col]; 1; k++)

             {

                if (k == I[col+1])

                {

                   mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #4");

                }

                else if (J[k] == rc)

                {

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(col,r) -= sol(r) * A[k];

                   }

                   A[k] = 0.0;

                   break;

                }

             }

          }

       }

    }

    else

    {

       for (RowNode *aux = Rows[rc]; aux != NULL; aux = aux->Prev)

       {

          const int col = aux->Column;

          if (col == rc)

          {

             switch (dpolicy)

             {

                case DIAG_KEEP:

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(rc,r) = aux->Value * sol(r);

                   }

                   break;

                case DIAG_ONE:

                   aux->Value = 1.0;

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(rc,r) = sol(r);

                   }

                   break;

                case DIAG_ZERO:

                   aux->Value = 0.;

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(rc,r) = 0.;

                   }

                   break;

                default:

                   mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #5");

                   break;

             }

          }

          else

          {

             aux->Value = 0.0;

             for (RowNode *node = Rows[col]; 1; node = node->Prev)

             {

                if (node == NULL)

                {

                   mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #6");

                }

                else if (node->Column == rc)

                {

                   for (int r = 0; r < num_rhs; r++)

                   {

                      rhs(col,r) -= sol(r) * node->Value;

                   }

                   node->Value = 0.0;

                   break;

                }

             }

          }

       }

    }

 }


 void SparseMatrix::EliminateRowCol(int rc, DiagonalPolicy dpolicy)

 {

    MFEM_ASSERT(rc < height && rc >= 0,

                "Row " << rc << " not in matrix of height " << height);


    if (Rows == NULL)

    {

       const auto &II = this->I; // only use const access for I

       const auto &JJ = this->J; // only use const access for J

       for (int j = II[rc]; j < II[rc+1]; j++)

       {

          const int col = JJ[j];

          if (col == rc)

          {

             if (dpolicy == DIAG_ONE)

             {

                A[j] = 1.0;

             }

             else if (dpolicy == DIAG_ZERO)

             {

                A[j] = 0.0;

             }

          }

          else

          {

             A[j] = 0.0;

             for (int k = II[col]; 1; k++)

             {

                if (k == II[col+1])

                {

                   mfem_error("SparseMatrix::EliminateRowCol() #2");

                }

                else if (JJ[k] == rc)

                {

                   A[k] = 0.0;

                   break;

                }

             }

          }

       }

    }

    else

    {

       RowNode *aux, *node;


       for (aux = Rows[rc]; aux != NULL; aux = aux->Prev)

       {

          const int col = aux->Column;

          if (col == rc)

          {

             if (dpolicy == DIAG_ONE)

             {

                aux->Value = 1.0;

             }

             else if (dpolicy == DIAG_ZERO)

             {

                aux->Value = 0.;

             }

          }

          else

          {

             aux->Value = 0.0;

             for (node = Rows[col]; 1; node = node->Prev)

             {

                if (node == NULL)

                {

                   mfem_error("SparseMatrix::EliminateRowCol() #3");

                }

                else if (node->Column == rc)

                {

                   node->Value = 0.0;

                   break;

                }

             }

          }

       }

    }

 }


 // This is almost identical to EliminateRowCol(int, int), except for

 // the A[j] = value; and aux->Value = value; lines.

 void SparseMatrix::EliminateRowColDiag(int rc, double value)

 {

    MFEM_ASSERT(rc < height && rc >= 0,

                "Row " << rc << " not in matrix of height " << height);


    if (Rows == NULL)

    {

       for (int j = I[rc]; j < I[rc+1]; j++)

       {

          const int col = J[j];

          if (col == rc)

          {

             A[j] = value;

          }

          else

          {

             A[j] = 0.0;

             for (int k = I[col]; 1; k++)

             {

                if (k == I[col+1])

                {

                   mfem_error("SparseMatrix::EliminateRowCol() #2");

                }

                else if (J[k] == rc)

                {

                   A[k] = 0.0;

                   break;

                }

             }

          }

       }

    }

    else

    {

       RowNode *aux, *node;


       for (aux = Rows[rc]; aux != NULL; aux = aux->Prev)

       {

          const int col = aux->Column;

          if (col == rc)

          {

             aux->Value = value;

          }

          else

          {

             aux->Value = 0.0;

             for (node = Rows[col]; 1; node = node->Prev)

             {

                if (node == NULL)

                {

                   mfem_error("SparseMatrix::EliminateRowCol() #3");

                }

                else if (node->Column == rc)

                {

                   node->Value = 0.0;

                   break;

                }

             }

          }

       }

    }

 }


 void SparseMatrix::EliminateRowCol(int rc, SparseMatrix &Ae,

                                    DiagonalPolicy dpolicy)

 {

    if (Rows)

    {

       RowNode *nd, *nd2;

       for (nd = Rows[rc]; nd != NULL; nd = nd->Prev)

       {

          const int col = nd->Column;

          if (col == rc)

          {

             switch (dpolicy)

             {

                case DIAG_ONE:

                   Ae.Add(rc, rc, nd->Value - 1.0);

                   nd->Value = 1.0;

                   break;

                case DIAG_ZERO:

                   Ae.Add(rc, rc, nd->Value);

                   nd->Value = 0.;

                   break;

                case DIAG_KEEP:

                   break;

                default:

                   mfem_error("SparseMatrix::EliminateRowCol #1");

                   break;

             }

          }

          else

          {

             Ae.Add(rc, col, nd->Value);

             nd->Value = 0.0;

             for (nd2 = Rows[col]; 1; nd2 = nd2->Prev)

             {

                if (nd2 == NULL)

                {

                   mfem_error("SparseMatrix::EliminateRowCol #2");

                }

                else if (nd2->Column == rc)

                {

                   Ae.Add(col, rc, nd2->Value);

                   nd2->Value = 0.0;

                   break;

                }

             }

          }

       }

    }

    else

    {

       for (int j = I[rc]; j < I[rc+1]; j++)

       {

          const int col = J[j];

          if (col == rc)

          {

             switch (dpolicy)

             {

                case DIAG_ONE:

                   Ae.Add(rc, rc, A[j] - 1.0);

                   A[j] = 1.0;

                   break;

                case DIAG_ZERO:

                   Ae.Add(rc, rc, A[j]);

                   A[j] = 0.;

                   break;

                case DIAG_KEEP:

                   break;

                default:

                   mfem_error("SparseMatrix::EliminateRowCol #3");

                   break;

             }

          }

          else

          {

             Ae.Add(rc, col, A[j]);

             A[j] = 0.0;

             for (int k = I[col]; true; k++)

             {

                if (k == I[col+1])

                {

                   mfem_error("SparseMatrix::EliminateRowCol #4");

                }

                else if (J[k] == rc)

                {

                   Ae.Add(col, rc, A[k]);

                   A[k] = 0.0;

                   break;

                }

             }

          }

       }

    }

 }


 void SparseMatrix::EliminateBC(const Array<int> &ess_dofs,

                                DiagonalPolicy diag_policy)

 {

    const int n_ess_dofs = ess_dofs.Size();

    const auto ess_dofs_d = ess_dofs.Read();

    const auto dI = ReadI();

    const auto dJ = ReadJ();

    auto dA = ReadWriteData();


    MFEM_FORALL(i, n_ess_dofs,

    {

       const int idof = ess_dofs_d[i];

       for (int j=dI[idof]; j<dI[idof+1]; ++j)

       {

          const int jdof = dJ[j];

          if (jdof != idof)

          {

             dA[j] = 0.0;

             for (int k=dI[jdof]; k<dI[jdof+1]; ++k)

             {

                if (dJ[k] == idof)

                {

                   dA[k] = 0.0;

                   break;

                }

             }

          }

          else

          {

             if (diag_policy == DiagonalPolicy::DIAG_ONE)

             {

                dA[j] = 1.0;

             }

             else if (diag_policy == DiagonalPolicy::DIAG_ZERO)

             {

                dA[j] = 0.0;

             }

             // else (diag_policy == DiagonalPolicy::DIAG_KEEP)

          }

       }

    });

 }


 void SparseMatrix::SetDiagIdentity()

 {

    for (int i = 0; i < height; i++)

    {

       if (I[i+1] == I[i]+1 && fabs(A[I[i]]) < 1e-16)

       {

          A[I[i]] = 1.0;

       }

    }

 }


 void SparseMatrix::EliminateZeroRows(const double threshold)

 {

    for (int i = 0; i < height; i++)

    {

       double zero = 0.0;

       for (int j = I[i]; j < I[i+1]; j++)

       {

          zero += fabs(A[j]);

       }

       if (zero <= threshold)

       {

          for (int j = I[i]; j < I[i+1]; j++)

          {

             A[j] = (J[j] == i) ? 1.0 : 0.0;

          }

       }

    }

 }


 void SparseMatrix::Gauss_Seidel_forw(const Vector &x, Vector &y) const

 {

    if (!Finalized())

    {

       double *yp = y.GetData();

       const double *xp = x.GetData();

       RowNode *diag_p, *n_p, **R = Rows;


       const int s = height;

       for (int i = 0; i < s; i++)

       {

          double sum = 0.0;

          diag_p = NULL;

          for (n_p = R[i]; n_p != NULL; n_p = n_p->Prev)

          {

             const int c = n_p->Column;

             if (c == i)

             {

                diag_p = n_p;

             }

             else

             {

                sum += n_p->Value * yp[c];

             }

          }


          if (diag_p != NULL && diag_p->Value != 0.0)

          {

             yp[i] = (xp[i] - sum) / diag_p->Value;

          }

          else if (xp[i] == sum)

          {

             yp[i] = sum;

          }

          else

          {

             mfem_error("SparseMatrix::Gauss_Seidel_forw()");

          }

       }

    }

    else

    {

       const int s = height;

       const int nnz = J.Capacity();

       const int *Ip = HostRead(I, s+1);

       const int *Jp = HostRead(J, nnz);

       const double *Ap = HostRead(A, nnz);

       double *yp = y.HostReadWrite();

       const double *xp = x.HostRead();


       for (int i = 0, j = Ip[0]; i < s; i++)

       {

          const int end = Ip[i+1];

          double sum = 0.0;

          int d = -1;

          for ( ; j < end; j++)

          {

             const int c = Jp[j];

             if (c == i)

             {

                d = j;

             }

             else

             {

                sum += Ap[j] * yp[c];

             }

          }


          if (d >= 0 && Ap[d] != 0.0)

          {

             yp[i] = (xp[i] - sum) / Ap[d];

          }

          else if (xp[i] == sum)

          {

             yp[i] = sum;

          }

          else

          {

             mfem_error("SparseMatrix::Gauss_Seidel_forw(...) #2");

          }

       }

    }

 }


 void SparseMatrix::Gauss_Seidel_back(const Vector &x, Vector &y) const

 {

    if (!Finalized())

    {

       double *yp = y.GetData();

       const double *xp = x.GetData();

       RowNode *diag_p, *n_p, **R = Rows;


       for (int i = height-1; i >= 0; i--)

       {

          double sum = 0.;

          diag_p = NULL;

          for (n_p = R[i]; n_p != NULL; n_p = n_p->Prev)

          {

             const int c = n_p->Column;

             if (c == i)

             {

                diag_p = n_p;

             }

             else

             {

                sum += n_p->Value * yp[c];

             }

          }


          if (diag_p != NULL && diag_p->Value != 0.0)

          {

             yp[i] = (xp[i] - sum) / diag_p->Value;

          }

          else if (xp[i] == sum)

          {

             yp[i] = sum;

          }

          else

          {

             mfem_error("SparseMatrix::Gauss_Seidel_back()");

          }

       }

    }

    else

    {

       const int s = height;

       const int nnz = J.Capacity();

       const int *Ip = HostRead(I, s+1);

       const int *Jp = HostRead(J, nnz);

       const double *Ap = HostRead(A, nnz);

       double *yp = y.HostReadWrite();

       const double *xp = x.HostRead();


       for (int i = s-1, j = Ip[s]-1; i >= 0; i--)

       {

          const int beg = Ip[i];

          double sum = 0.;

          int d = -1;

          for ( ; j >= beg; j--)

          {

             const int c = Jp[j];

             if (c == i)

             {

                d = j;

             }

             else

             {

                sum += Ap[j] * yp[c];

             }

          }


          if (d >= 0 && Ap[d] != 0.0)

          {

             yp[i] = (xp[i] - sum) / Ap[d];

          }

          else if (xp[i] == sum)

          {

             yp[i] = sum;

          }

          else

          {

             mfem_error("SparseMatrix::Gauss_Seidel_back(...) #2");

          }

       }

    }

 }


 double SparseMatrix::GetJacobiScaling() const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    double sc = 1.0;

    for (int i = 0; i < height; i++)

    {

       int d = -1;

       double norm = 0.0;

       for (int j = I[i]; j < I[i+1]; j++)

       {

          if (J[j] == i)

          {

             d = j;

          }

          norm += fabs(A[j]);

       }

       if (d >= 0 && A[d] != 0.0)

       {

          double a = 1.8 * fabs(A[d]) / norm;

          if (a < sc)

          {

             sc = a;

          }

       }

       else

       {

          mfem_error("SparseMatrix::GetJacobiScaling() #2");

       }

    }

    return sc;

 }


 void SparseMatrix::Jacobi(const Vector &b, const Vector &x0, Vector &x1,

                           double sc, bool use_abs_diag) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    for (int i = 0; i < height; i++)

    {

       int d = -1;

       double sum = b(i);

       for (int j = I[i]; j < I[i+1]; j++)

       {

          if (J[j] == i)

          {

             d = j;

          }

          else

          {

             sum -= A[j] * x0(J[j]);

          }

       }

       if (d >= 0 && A[d] != 0.0)

       {

          const double diag = (use_abs_diag) ? fabs(A[d]) : A[d];

          x1(i) = sc * (sum / diag) + (1.0 - sc) * x0(i);

       }

       else

       {

          mfem_error("SparseMatrix::Jacobi(...) #2");

       }

    }

 }


 void SparseMatrix::DiagScale(const Vector &b, Vector &x,

                              double sc, bool use_abs_diag) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    const int H = height;

    const int nnz = J.Capacity();

    const bool use_dev = b.UseDevice() || x.UseDevice();


    const auto Ap = Read(A, nnz, use_dev);

    const auto Ip = Read(I, height+1, use_dev);

    const auto Jp = Read(J, nnz, use_dev);


    const auto bp = b.Read(use_dev);

    auto xp = x.Write(use_dev);


    MFEM_FORALL_SWITCH(use_dev, i, H,

    {

       const int end = Ip[i+1];

       for (int j = Ip[i]; true; j++)

       {

          if (j == end)

          {

             MFEM_ABORT_KERNEL("Diagonal not found in SparseMatrix::DiagScale");

          }

          if (Jp[j] == i)

          {

             const double diag = (use_abs_diag) ? fabs(Ap[j]) : Ap[j];

             if (diag == 0.0)

             {

                MFEM_ABORT_KERNEL("Zero diagonal in SparseMatrix::DiagScale");

             }

             xp[i] = sc * bp[i] / diag;

             break;

          }

       }

    });

 }


 template <bool useFabs>

 static void JacobiDispatch(const Vector &b, const Vector &x0, Vector &x1,

                            const Memory<int> &I, const Memory<int> &J,

                            const Memory<double> &A, const int height,

                            const double sc)

 {

    const bool useDevice = b.UseDevice() || x0.UseDevice() || x1.UseDevice();


    const auto bp  = b.Read(useDevice);

    const auto x0p = x0.Read(useDevice);

    auto       x1p = x1.Write(useDevice);


    const auto Ip = Read(I, height+1, useDevice);

    const auto Jp = Read(J, J.Capacity(), useDevice);

    const auto Ap = Read(A, J.Capacity(), useDevice);


    MFEM_FORALL_SWITCH(useDevice, i, height,

    {

       double resi = bp[i], norm = 0.0;

       for (int j = Ip[i]; j < Ip[i+1]; j++)

       {

          resi -= Ap[j] * x0p[Jp[j]];

          if (useFabs)

          {

             norm += fabs(Ap[j]);

          }

          else

          {

             norm += Ap[j];

          }

       }

       if (norm > 0.0)

       {

          x1p[i] = x0p[i] + sc * resi / norm;

       }

       else

       {

          if (useFabs)

          {

             MFEM_ABORT_KERNEL("L1 norm of row is zero.");

          }

          else

          {

             MFEM_ABORT_KERNEL("sum of row is zero.");

          }

       }

    });

 }


 void SparseMatrix::Jacobi2(const Vector &b, const Vector &x0, Vector &x1,

                            double sc) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");

    JacobiDispatch<true>(b,x0,x1,I,J,A,height,sc);

 }


 void SparseMatrix::Jacobi3(const Vector &b, const Vector &x0, Vector &x1,

                            double sc) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");

    JacobiDispatch<false>(b,x0,x1,I,J,A,height,sc);

 }


 void SparseMatrix::AddSubMatrix(const Array<int> &rows, const Array<int> &cols,

                                 const DenseMatrix &subm, int skip_zeros)

 {

    int i, j, gi, gj, s, t;

    double a;


    if (Finalized())

    {

       HostReadI();

       HostReadJ();

       HostReadWriteData();

    }


    for (i = 0; i < rows.Size(); i++)

    {

       if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }

       else { s = 1; }

       MFEM_ASSERT(gi < height,

                   "Trying to insert a row " << gi << " outside the matrix height "

                   << height);

       SetColPtr(gi);

       for (j = 0; j < cols.Size(); j++)

       {

          if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }

          else { t = s; }

          MFEM_ASSERT(gj < width,

                      "Trying to insert a column " << gj << " outside the matrix width "

                      << width);

          a = subm(i, j);

          if (skip_zeros && a == 0.0)

          {

             // Skip assembly of zero elements if either:

             // (i) user specified to skip zeros regardless of symmetry, or

             // (ii) symmetry is not broken.

             if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)

             {

                continue;

             }

          }

          if (t < 0) { a = -a; }

          _Add_(gj, a);

       }

       ClearColPtr();

    }

 }


 void SparseMatrix::Set(const int i, const int j, const double val)

 {

    double a = val;

    int gi, gj, s, t;


    if ((gi=i) < 0) { gi = -1-gi, s = -1; }

    else { s = 1; }

    MFEM_ASSERT(gi < height,

                "Trying to set a row " << gi << " outside the matrix height "

                << height);

    if ((gj=j) < 0) { gj = -1-gj, t = -s; }

    else { t = s; }

    MFEM_ASSERT(gj < width,

                "Trying to set a column " << gj << " outside the matrix width "

                << width);

    if (t < 0) { a = -a; }

    _Set_(gi, gj, a);

 }


 void SparseMatrix::Add(const int i, const int j, const double val)

 {

    int gi, gj, s, t;

    double a = val;


    if ((gi=i) < 0) { gi = -1-gi, s = -1; }

    else { s = 1; }

    MFEM_ASSERT(gi < height,

                "Trying to insert a row " << gi << " outside the matrix height "

                << height);

    if ((gj=j) < 0) { gj = -1-gj, t = -s; }

    else { t = s; }

    MFEM_ASSERT(gj < width,

                "Trying to insert a column " << gj << " outside the matrix width "

                << width);

    if (t < 0) { a = -a; }

    _Add_(gi, gj, a);

 }


 void SparseMatrix::SetSubMatrix(const Array<int> &rows, const Array<int> &cols,

                                 const DenseMatrix &subm, int skip_zeros)

 {

    int i, j, gi, gj, s, t;

    double a;


    for (i = 0; i < rows.Size(); i++)

    {

       if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }

       else { s = 1; }

       MFEM_ASSERT(gi < height,

                   "Trying to set a row " << gi << " outside the matrix height "

                   << height);

       SetColPtr(gi);

       for (j = 0; j < cols.Size(); j++)

       {

          a = subm(i, j);

          if (skip_zeros && a == 0.0)

          {

             // Skip assembly of zero elements if either:

             // (i) user specified to skip zeros regardless of symmetry, or

             // (ii) symmetry is not broken.

             if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)

             {

                continue;

             }

          }

          if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }

          else { t = s; }

          MFEM_ASSERT(gj < width,

                      "Trying to set a column " << gj << " outside the matrix width "

                      << width);

          if (t < 0) { a = -a; }

          _Set_(gj, a);

       }

       ClearColPtr();

    }

 }


 void SparseMatrix::SetSubMatrixTranspose(const Array<int> &rows,

                                          const Array<int> &cols,

                                          const DenseMatrix &subm,

                                          int skip_zeros)

 {

    int i, j, gi, gj, s, t;

    double a;


    for (i = 0; i < rows.Size(); i++)

    {

       if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }

       else { s = 1; }

       MFEM_ASSERT(gi < height,

                   "Trying to set a row " << gi << " outside the matrix height "

                   << height);

       SetColPtr(gi);

       for (j = 0; j < cols.Size(); j++)

       {

          a = subm(j, i);

          if (skip_zeros && a == 0.0)

          {

             // Skip assembly of zero elements if either:

             // (i) user specified to skip zeros regardless of symmetry, or

             // (ii) symmetry is not broken.

             if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)

             {

                continue;

             }

          }

          if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }

          else { t = s; }

          MFEM_ASSERT(gj < width,

                      "Trying to set a column " << gj << " outside the matrix width "

                      << width);

          if (t < 0) { a = -a; }

          _Set_(gj, a);

       }

       ClearColPtr();

    }

 }


 void SparseMatrix::GetSubMatrix(const Array<int> &rows, const Array<int> &cols,

                                 DenseMatrix &subm) const

 {

    int i, j, gi, gj, s, t;

    double a;


    for (i = 0; i < rows.Size(); i++)

    {

       if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }

       else { s = 1; }

       MFEM_ASSERT(gi < height,

                   "Trying to read a row " << gi << " outside the matrix height "

                   << height);

       SetColPtr(gi);

       for (j = 0; j < cols.Size(); j++)

       {

          if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }

          else { t = s; }

          MFEM_ASSERT(gj < width,

                      "Trying to read a column " << gj << " outside the matrix width "

                      << width);

          a = _Get_(gj);

          subm(i, j) = (t < 0) ? (-a) : (a);

       }

       ClearColPtr();

    }

 }


 bool SparseMatrix::RowIsEmpty(const int row) const

 {

    int gi;


    if ((gi=row) < 0)

    {

       gi = -1-gi;

    }

    MFEM_ASSERT(gi < height,

                "Trying to query a row " << gi << " outside the matrix height "

                << height);

    if (Rows)

    {

       return (Rows[gi] == NULL);

    }

    else

    {

       return (I[gi] == I[gi+1]);

    }

 }


 int SparseMatrix::GetRow(const int row, Array<int> &cols, Vector &srow) const

 {

    RowNode *n;

    int j, gi;


    if ((gi=row) < 0) { gi = -1-gi; }

    MFEM_ASSERT(gi < height,

                "Trying to read a row " << gi << " outside the matrix height "

                << height);

    if (Rows)

    {

       for (n = Rows[gi], j = 0; n; n = n->Prev)

       {

          j++;

       }

       cols.SetSize(j);

       srow.SetSize(j);

       for (n = Rows[gi], j = 0; n; n = n->Prev, j++)

       {

          cols[j] = n->Column;

          srow(j) = n->Value;

       }

       if (row < 0)

       {

          srow.Neg();

       }


       return 0;

    }

    else

    {

       j = I[gi];

       cols.MakeRef(const_cast<int*>((const int*)J) + j, I[gi+1]-j);

       srow.NewDataAndSize(

          const_cast<double*>((const double*)A) + j, cols.Size());

       MFEM_ASSERT(row >= 0, "Row not valid: " << row << ", height: " << height);

       return 1;

    }

 }


 void SparseMatrix::SetRow(const int row, const Array<int> &cols,

                           const Vector &srow)

 {

    int gi, gj, s, t;

    double a;


    if ((gi=row) < 0) { gi = -1-gi, s = -1; }

    else { s = 1; }

    MFEM_ASSERT(gi < height,

                "Trying to set a row " << gi << " outside the matrix height "

                << height);


    if (!Finalized())

    {

       SetColPtr(gi);

       for (int j = 0; j < cols.Size(); j++)

       {

          if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }

          else { t = s; }

          MFEM_ASSERT(gj < width,

                      "Trying to set a column " << gj << " outside the matrix"

                      " width " << width);

          a = srow(j);

          if (t < 0) { a = -a; }

          _Set_(gj, a);

       }

       ClearColPtr();

    }

    else

    {

       MFEM_ASSERT(cols.Size() == RowSize(gi), "");

       MFEM_ASSERT(cols.Size() == srow.Size(), "");


       for (int i = I[gi], j = 0; j < cols.Size(); j++, i++)

       {

          if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }

          else { t = s; }

          MFEM_ASSERT(gj < width,

                      "Trying to set a column " << gj << " outside the matrix"

                      " width " << width);


          J[i] = gj;

          A[i] = srow[j] * t;

       }

    }

 }


 void SparseMatrix::AddRow(const int row, const Array<int> &cols,

                           const Vector &srow)

 {

    int j, gi, gj, s, t;

    double a;


    MFEM_VERIFY(!Finalized(), "Matrix must NOT be finalized.");


    if ((gi=row) < 0) { gi = -1-gi, s = -1; }

    else { s = 1; }

    MFEM_ASSERT(gi < height,

                "Trying to insert a row " << gi << " outside the matrix height "

                << height);

    SetColPtr(gi);

    for (j = 0; j < cols.Size(); j++)

    {

       if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }

       else { t = s; }

       MFEM_ASSERT(gj < width,

                   "Trying to insert a column " << gj << " outside the matrix width "

                   << width);

       a = srow(j);

       if (a == 0.0)

       {

          continue;

       }

       if (t < 0) { a = -a; }

       _Add_(gj, a);

    }

    ClearColPtr();

 }


 void SparseMatrix::ScaleRow(const int row, const double scale)

 {

    int i;


    if ((i=row) < 0)

    {

       i = -1-i;

    }

    if (Rows != NULL)

    {

       RowNode *aux;


       for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)

       {

          aux -> Value *= scale;

       }

    }

    else

    {

       int j, end = I[i+1];


       for (j = I[i]; j < end; j++)

       {

          A[j] *= scale;

       }

    }

 }


 void SparseMatrix::ScaleRows(const Vector & sl)

 {

    double scale;

    if (Rows != NULL)

    {

       RowNode *aux;

       for (int i=0; i < height; ++i)

       {

          scale = sl(i);

          for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)

          {

             aux -> Value *= scale;

          }

       }

    }

    else

    {

       int j, end;


       for (int i=0; i < height; ++i)

       {

          end = I[i+1];

          scale = sl(i);

          for (j = I[i]; j < end; j++)

          {

             A[j] *= scale;

          }

       }

    }

 }


 void SparseMatrix::ScaleColumns(const Vector & sr)

 {

    if (Rows != NULL)

    {

       RowNode *aux;

       for (int i=0; i < height; ++i)

       {

          for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)

          {

             aux -> Value *= sr(aux->Column);

          }

       }

    }

    else

    {

       int j, end;


       for (int i=0; i < height; ++i)

       {

          end = I[i+1];

          for (j = I[i]; j < end; j++)

          {

             A[j] *= sr(J[j]);

          }

       }

    }

 }


 SparseMatrix &SparseMatrix::operator+=(const SparseMatrix &B)

 {

    MFEM_ASSERT(height == B.height && width == B.width,

                "Mismatch of this matrix size and rhs.  This height = "

                << height << ", width = " << width << ", B.height = "

                << B.height << ", B.width = " << B.width);


    for (int i = 0; i < height; i++)

    {

       SetColPtr(i);

       if (B.Rows)

       {

          for (RowNode *aux = B.Rows[i]; aux != NULL; aux = aux->Prev)

          {

             _Add_(aux->Column, aux->Value);

          }

       }

       else

       {

          for (int j = B.I[i]; j < B.I[i+1]; j++)

          {

             _Add_(B.J[j], B.A[j]);

          }

       }

       ClearColPtr();

    }


    return (*this);

 }


 void SparseMatrix::Add(const double a, const SparseMatrix &B)

 {

    for (int i = 0; i < height; i++)

    {

       B.SetColPtr(i);

       if (Rows)

       {

          for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)

          {

             np->Value += a * B._Get_(np->Column);

          }

       }

       else

       {

          for (int j = I[i]; j < I[i+1]; j++)

          {

             A[j] += a * B._Get_(J[j]);

          }

       }

       B.ClearColPtr();

    }

 }


 SparseMatrix &SparseMatrix::operator=(double a)

 {

    if (Rows == NULL)

    {

       const int nnz = J.Capacity();

       double *h_A = HostWrite(A, nnz);

       for (int i = 0; i < nnz; i++)

       {

          h_A[i] = a;

       }

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          for (RowNode *node_p = Rows[i]; node_p != NULL;

               node_p = node_p -> Prev)

          {

             node_p -> Value = a;

          }

       }

    }


    return (*this);

 }


 SparseMatrix &SparseMatrix::operator*=(double a)

 {

    if (Rows == NULL)

    {

       for (int i = 0, nnz = I[height]; i < nnz; i++)

       {

          A[i] *= a;

       }

    }

    else

    {

       for (int i = 0; i < height; i++)

       {

          for (RowNode *node_p = Rows[i]; node_p != NULL;

               node_p = node_p -> Prev)

          {

             node_p -> Value *= a;

          }

       }

    }


    return (*this);

 }


 void SparseMatrix::Print(std::ostream & os, int width_) const

 {

    int i, j;


    if (A == NULL)

    {

       RowNode *nd;

       for (i = 0; i < height; i++)

       {

          os << "[row " << i << "]\n";

          for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++)

          {

             os << " (" << nd->Column << "," << nd->Value << ")";

             if ( !((j+1) % width_) )

             {

                os << '\n';

             }

          }

          if (j % width_)

          {

             os << '\n';

          }

       }

       return;

    }


    // HostRead forces synchronization

    HostReadI();

    HostReadJ();

    HostReadData();

    for (i = 0; i < height; i++)

    {

       os << "[row " << i << "]\n";

       for (j = I[i]; j < I[i+1]; j++)

       {

          os << " (" << J[j] << "," << A[j] << ")";

          if ( !((j+1-I[i]) % width_) )

          {

             os << '\n';

          }

       }

       if ((j-I[i]) % width_)

       {

          os << '\n';

       }

    }

 }


 void SparseMatrix::PrintMatlab(std::ostream & os) const

 {

    os << "% size " << height << " " << width << "\n";

    os << "% Non Zeros " << NumNonZeroElems() << "\n";

    int i, j;

    ios::fmtflags old_fmt = os.flags();

    os.setf(ios::scientific);

    std::streamsize old_prec = os.precision(14);


    for (i = 0; i < height; i++)

    {

       for (j = I[i]; j < I[i+1]; j++)

       {

          os << i+1 << " " << J[j]+1 << " " << A[j] << '\n';

       }

    }

    // Write a zero entry at (m,n) to make sure MATLAB doesn't shrink the matrix

    os << height << " " << width << " 0.0\n";

    os.precision(old_prec);

    os.flags(old_fmt);

 }


 void SparseMatrix::PrintMM(std::ostream & os) const

 {

    int i, j;

    ios::fmtflags old_fmt = os.flags();

    os.setf(ios::scientific);

    std::streamsize old_prec = os.precision(14);


    os << "%%MatrixMarket matrix coordinate real general" << '\n'

       << "% Generated by MFEM" << '\n';


    os << height << " " << width << " " << NumNonZeroElems() << '\n';

    for (i = 0; i < height; i++)

    {

       for (j = I[i]; j < I[i+1]; j++)

       {

          os << i+1 << " " << J[j]+1 << " " << A[j] << '\n';

       }

    }

    os.precision(old_prec);

    os.flags(old_fmt);

 }


 void SparseMatrix::PrintCSR(std::ostream & os) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    int i;


    os << height << '\n';  // number of rows


    for (i = 0; i <= height; i++)

    {

       os << I[i]+1 << '\n';

    }


    for (i = 0; i < I[height]; i++)

    {

       os << J[i]+1 << '\n';

    }


    for (i = 0; i < I[height]; i++)

    {

       os << A[i] << '\n';

    }

 }


 void SparseMatrix::PrintCSR2(std::ostream & os) const

 {

    MFEM_VERIFY(Finalized(), "Matrix must be finalized.");


    int i;


    os << height << '\n'; // number of rows

    os << width << '\n';  // number of columns


    for (i = 0; i <= height; i++)

    {

       os << I[i] << '\n';

    }


    for (i = 0; i < I[height]; i++)

    {

       os << J[i] << '\n';

    }


    for (i = 0; i < I[height]; i++)

    {

       os << A[i] << '\n';

    }

 }


 void SparseMatrix::PrintInfo(std::ostream &os) const

 {

    const double MiB = 1024.*1024;

    int nnz = NumNonZeroElems();

    double pz = 100./nnz;

    int nz = CountSmallElems(0.0);

    double max_norm = MaxNorm();

    double symm = IsSymmetric();

    int nnf = CheckFinite();

    int ns12 = CountSmallElems(1e-12*max_norm);

    int ns15 = CountSmallElems(1e-15*max_norm);

    int ns18 = CountSmallElems(1e-18*max_norm);


    os <<

       "SparseMatrix statistics:\n"

       "  Format                      : " <<

       (Empty() ? "(empty)" : (Finalized() ? "CSR" : "LIL")) << "\n"

       "  Dimensions                  : " << height << " x " << width << "\n"

       "  Number of entries (total)   : " << nnz << "\n"

       "  Number of entries (per row) : " << 1.*nnz/Height() << "\n"

       "  Number of stored zeros      : " << nz*pz << "% (" << nz << ")\n"

       "  Number of Inf/Nan entries   : " << nnf*pz << "% ("<< nnf << ")\n"

       "  Norm, max |a_ij|            : " << max_norm << "\n"

       "  Symmetry, max |a_ij-a_ji|   : " << symm << "\n"

       "  Number of small entries:\n"

       "    |a_ij| <= 1e-12*Norm      : " << ns12*pz << "% (" << ns12 << ")\n"

       "    |a_ij| <= 1e-15*Norm      : " << ns15*pz << "% (" << ns15 << ")\n"

       "    |a_ij| <= 1e-18*Norm      : " << ns18*pz << "% (" << ns18 << ")\n";

    if (Finalized())

    {

       os << "  Memory used by CSR          : " <<

          (sizeof(int)*(height+1+nnz)+sizeof(double)*nnz)/MiB << " MiB\n";

    }

    if (Rows != NULL)

    {

       size_t used_mem = sizeof(RowNode*)*height;

 #ifdef MFEM_USE_MEMALLOC

       used_mem += NodesMem->MemoryUsage();

 #else

       for (int i = 0; i < height; i++)

       {

          for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)

          {

             used_mem += sizeof(RowNode);

          }

       }

 #endif

       os << "  Memory used by LIL          : " << used_mem/MiB << " MiB\n";

    }

 }


 void SparseMatrix::Destroy()

 {

    I.Delete();

    J.Delete();

    A.Delete();


    if (Rows != NULL)

    {

 #if !defined(MFEM_USE_MEMALLOC)

       for (int i = 0; i < height; i++)

       {

          RowNode *aux, *node_p = Rows[i];

          while (node_p != NULL)

          {

             aux = node_p;

             node_p = node_p->Prev;

             delete aux;

          }

       }

 #endif

       delete [] Rows;

    }


    delete [] ColPtrJ;

    delete [] ColPtrNode;

 #ifdef MFEM_USE_MEMALLOC

    delete NodesMem;

 #endif

    delete At;


    ClearGPUSparse();

 }


 int SparseMatrix::ActualWidth() const

 {

    int awidth = 0;

    if (A)

    {

       const int *start_j = J;

       const int *end_j = J + I[height];

       for (const int *jptr = start_j; jptr != end_j; ++jptr)

       {

          awidth = std::max(awidth, *jptr + 1);

       }

    }

    else

    {

       RowNode *aux;

       for (int i = 0; i < height; i++)

       {

          for (aux = Rows[i]; aux != NULL; aux = aux->Prev)

          {

             awidth = std::max(awidth, aux->Column + 1);

          }

       }

    }

    return awidth;

 }


 void SparseMatrixFunction (SparseMatrix & S, double (*f)(double))

 {

    int n = S.NumNonZeroElems();

    double * s = S.GetData();


    for (int i = 0; i < n; i++)

    {

       s[i] = f(s[i]);

    }

 }


 SparseMatrix *Transpose (const SparseMatrix &A)

 {

    MFEM_VERIFY(

       A.Finalized(),

       "Finalize must be called before Transpose. Use TransposeRowMatrix instead");


    int i, j, end;

    const int *A_i, *A_j;

    int m, n, nnz, *At_i, *At_j;

    const double *A_data;

    double *At_data;


    m      = A.Height(); // number of rows of A

    n      = A.Width();  // number of columns of A

    nnz    = A.NumNonZeroElems();

    A_i    = A.HostReadI();

    A_j    = A.HostReadJ();

    A_data = A.HostReadData();


    At_i = Memory<int>(n+1);

    At_j = Memory<int>(nnz);

    At_data = Memory<double>(nnz);


    for (i = 0; i <= n; i++)

    {

       At_i[i] = 0;

    }

    for (i = 0; i < nnz; i++)

    {

       At_i[A_j[i]+1]++;

    }

    for (i = 1; i < n; i++)

    {

       At_i[i+1] += At_i[i];

    }


    for (i = j = 0; i < m; i++)

    {

       end = A_i[i+1];

       for ( ; j < end; j++)

       {

          At_j[At_i[A_j[j]]] = i;

          At_data[At_i[A_j[j]]] = A_data[j];

          At_i[A_j[j]]++;

       }

    }


    for (i = n; i > 0; i--)

    {

       At_i[i] = At_i[i-1];

    }

    At_i[0] = 0;


    return  new SparseMatrix(At_i, At_j, At_data, n, m);

 }


 SparseMatrix *TransposeAbstractSparseMatrix (const AbstractSparseMatrix &A,

                                              int useActualWidth)

 {

    int i, j;

    int m, n, nnz, *At_i, *At_j;

    double *At_data;

    Array<int> Acols;

    Vector Avals;


    m = A.Height(); // number of rows of A

    if (useActualWidth)

    {

       n = 0;

       int tmp;

       for (i = 0; i < m; i++)

       {

          A.GetRow(i, Acols, Avals);

          if (Acols.Size())

          {

             tmp = Acols.Max();

             if (tmp > n)

             {

                n = tmp;

             }

          }

       }

       ++n;

    }

    else

    {

       n = A.Width(); // number of columns of A

    }

    nnz = A.NumNonZeroElems();


    At_i = Memory<int>(n+1);

    At_j = Memory<int>(nnz);

    At_data = Memory<double>(nnz);


    for (i = 0; i <= n; i++)

    {

       At_i[i] = 0;

    }


    for (i = 0; i < m; i++)

    {

       A.GetRow(i, Acols, Avals);

       for (j = 0; j<Acols.Size(); ++j)

       {

          At_i[Acols[j]+1]++;

       }

    }

    for (i = 1; i < n; i++)

    {

       At_i[i+1] += At_i[i];

    }


    for (i = 0; i < m; i++)

    {

       A.GetRow(i, Acols, Avals);

       for (j = 0; j<Acols.Size(); ++j)

       {

          At_j[At_i[Acols[j]]] = i;

          At_data[At_i[Acols[j]]] = Avals[j];

          At_i[Acols[j]]++;

       }

    }


    for (i = n; i > 0; i--)

    {

       At_i[i] = At_i[i-1];

    }

    At_i[0] = 0;


    return new SparseMatrix(At_i, At_j, At_data, n, m);

 }


 SparseMatrix *Mult (const SparseMatrix &A, const SparseMatrix &B,

                     SparseMatrix *OAB)

 {

    int nrowsA, ncolsA, nrowsB, ncolsB;

    const int *A_i, *A_j, *B_i, *B_j;

    int *C_i, *C_j, *B_marker;

    const double *A_data, *B_data;

    double *C_data;

    int ia, ib, ic, ja, jb, num_nonzeros;

    int row_start, counter;

    double a_entry, b_entry;

    SparseMatrix *C;


    nrowsA = A.Height();

    ncolsA = A.Width();

    nrowsB = B.Height();

    ncolsB = B.Width();


    MFEM_VERIFY(ncolsA == nrowsB,

                "number of columns of A (" << ncolsA

                << ") must equal number of rows of B (" << nrowsB << ")");


    A_i    = A.HostReadI();

    A_j    = A.HostReadJ();

    A_data = A.HostReadData();

    B_i    = B.HostReadI();

    B_j    = B.HostReadJ();

    B_data = B.HostReadData();


    B_marker = new int[ncolsB];


    for (ib = 0; ib < ncolsB; ib++)

    {

       B_marker[ib] = -1;

    }


    if (OAB == NULL)

    {

       C_i = Memory<int>(nrowsA+1);


       C_i[0] = num_nonzeros = 0;

       for (ic = 0; ic < nrowsA; ic++)

       {

          for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)

          {

             ja = A_j[ia];

             for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)

             {

                jb = B_j[ib];

                if (B_marker[jb] != ic)

                {

                   B_marker[jb] = ic;

                   num_nonzeros++;

                }

             }

          }

          C_i[ic+1] = num_nonzeros;

       }


       C_j    = Memory<int>(num_nonzeros);

       C_data = Memory<double>(num_nonzeros);


       C = new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);


       for (ib = 0; ib < ncolsB; ib++)

       {

          B_marker[ib] = -1;

       }

    }

    else

    {

       C = OAB;


       MFEM_VERIFY(nrowsA == C->Height() && ncolsB == C->Width(),

                   "Input matrix sizes do not match output sizes"

                   << " nrowsA = " << nrowsA

                   << ", C->Height() = " << C->Height()

                   << " ncolsB = " << ncolsB

                   << ", C->Width() = " << C->Width());


       // C_i    = C->HostReadI(); // not used

       C_j    = C->HostWriteJ();

       C_data = C->HostWriteData();

    }


    counter = 0;

    for (ic = 0; ic < nrowsA; ic++)

    {

       // row_start = C_i[ic];

       row_start = counter;

       for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)

       {

          ja = A_j[ia];

          a_entry = A_data[ia];

          for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)

          {

             jb = B_j[ib];

             b_entry = B_data[ib];

             if (B_marker[jb] < row_start)

             {

                B_marker[jb] = counter;

                if (OAB == NULL)

                {

                   C_j[counter] = jb;

                }

                C_data[counter] = a_entry*b_entry;

                counter++;

             }

             else

             {

                C_data[B_marker[jb]] += a_entry*b_entry;

             }

          }

       }

    }


    MFEM_VERIFY(

       OAB == NULL || counter == OAB->NumNonZeroElems(),

       "With pre-allocated output matrix, number of non-zeros ("

       << OAB->NumNonZeroElems()

       << ") did not match number of entries changed from matrix-matrix multiply, "

       << counter);


    delete [] B_marker;


    return C;

 }


 SparseMatrix * TransposeMult(const SparseMatrix &A, const SparseMatrix &B)

 {

    SparseMatrix *At  = Transpose(A);

    SparseMatrix *AtB = Mult(*At, B);

    delete At;

    return AtB;

 }


 SparseMatrix *MultAbstractSparseMatrix (const AbstractSparseMatrix &A,

                                         const AbstractSparseMatrix &B)

 {

    int nrowsA, ncolsA, nrowsB, ncolsB;

    int *C_i, *C_j, *B_marker;

    double *C_data;

    int ia, ib, ic, ja, jb, num_nonzeros;

    int row_start, counter;

    double a_entry, b_entry;

    SparseMatrix *C;


    nrowsA = A.Height();

    ncolsA = A.Width();

    nrowsB = B.Height();

    ncolsB = B.Width();


    MFEM_VERIFY(ncolsA == nrowsB,

                "number of columns of A (" << ncolsA

                << ") must equal number of rows of B (" << nrowsB << ")");


    B_marker = new int[ncolsB];


    for (ib = 0; ib < ncolsB; ib++)

    {

       B_marker[ib] = -1;

    }


    C_i = Memory<int>(nrowsA+1);


    C_i[0] = num_nonzeros = 0;


    Array<int> colsA, colsB;

    Vector dataA, dataB;

    for (ic = 0; ic < nrowsA; ic++)

    {

       A.GetRow(ic, colsA, dataA);

       for (ia = 0; ia < colsA.Size(); ia++)

       {

          ja = colsA[ia];

          B.GetRow(ja, colsB, dataB);

          for (ib = 0; ib < colsB.Size(); ib++)

          {

             jb = colsB[ib];

             if (B_marker[jb] != ic)

             {

                B_marker[jb] = ic;

                num_nonzeros++;

             }

          }

       }

       C_i[ic+1] = num_nonzeros;

    }


    C_j    = Memory<int>(num_nonzeros);

    C_data = Memory<double>(num_nonzeros);


    C = new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);


    for (ib = 0; ib < ncolsB; ib++)

    {

       B_marker[ib] = -1;

    }


    counter = 0;

    for (ic = 0; ic < nrowsA; ic++)

    {

       row_start = counter;

       A.GetRow(ic, colsA, dataA);

       for (ia = 0; ia < colsA.Size(); ia++)

       {

          ja = colsA[ia];

          a_entry = dataA[ia];

          B.GetRow(ja, colsB, dataB);

          for (ib = 0; ib < colsB.Size(); ib++)

          {

             jb = colsB[ib];

             b_entry = dataB[ib];

             if (B_marker[jb] < row_start)

             {

                B_marker[jb] = counter;

                C_j[counter] = jb;

                C_data[counter] = a_entry*b_entry;

                counter++;

             }

             else

             {

                C_data[B_marker[jb]] += a_entry*b_entry;

             }

          }

       }

    }


    delete [] B_marker;


    return C;

 }


 DenseMatrix *Mult (const SparseMatrix &A, DenseMatrix &B)

 {

    DenseMatrix *C = new DenseMatrix(A.Height(), B.Width());

    Vector columnB, columnC;

    for (int j = 0; j < B.Width(); ++j)

    {

       B.GetColumnReference(j, columnB);

       C->GetColumnReference(j, columnC);

       A.Mult(columnB, columnC);

    }

    return C;

 }


 DenseMatrix *RAP (const SparseMatrix &A, DenseMatrix &P)

 {

    DenseMatrix R (P, 't'); // R = P^T

    DenseMatrix *AP   = Mult (A, P);

    DenseMatrix *RAP_ = new DenseMatrix(R.Height(), AP->Width());

    Mult (R, *AP, *RAP_);

    delete AP;

    return RAP_;

 }


 DenseMatrix *RAP(DenseMatrix &A, const SparseMatrix &P)

 {

    SparseMatrix *R  = Transpose(P);

    DenseMatrix  *RA = Mult(*R, A);

    DenseMatrix   AtP(*RA, 't');

    delete RA;

    DenseMatrix  *RAtP = Mult(*R, AtP);

    delete R;

    DenseMatrix * RAP_ = new DenseMatrix(*RAtP, 't');

    delete RAtP;

    return RAP_;

 }


 SparseMatrix *RAP (const SparseMatrix &A, const SparseMatrix &R,

                    SparseMatrix *ORAP)

 {

    SparseMatrix *P  = Transpose (R);

    SparseMatrix *AP = Mult (A, *P);

    delete P;

    SparseMatrix *RAP_ = Mult (R, *AP, ORAP);

    delete AP;

    return RAP_;

 }


 SparseMatrix *RAP(const SparseMatrix &Rt, const SparseMatrix &A,

                   const SparseMatrix &P)

 {

    SparseMatrix * R = Transpose(Rt);

    SparseMatrix * RA = Mult(*R,A);

    delete R;

    SparseMatrix * RAP_ = Mult(*RA, P);

    delete RA;

    return RAP_;

 }


 SparseMatrix *Mult_AtDA (const SparseMatrix &A, const Vector &D,

                          SparseMatrix *OAtDA)

 {

    int i, At_nnz, *At_j;

    double *At_data;


    SparseMatrix *At = Transpose (A);

    At_nnz  = At -> NumNonZeroElems();

    At_j    = At -> GetJ();

    At_data = At -> GetData();

    for (i = 0; i < At_nnz; i++)

    {

       At_data[i] *= D(At_j[i]);

    }

    SparseMatrix *AtDA = Mult (*At, A, OAtDA);

    delete At;

    return AtDA;

 }


 SparseMatrix * Add(double a, const SparseMatrix & A, double b,

                    const SparseMatrix & B)

 {

    int nrows = A.Height();

    int ncols = A.Width();


    int * C_i = Memory<int>(nrows+1);

    int * C_j;

    double * C_data;


    const int *A_i = A.GetI();

    const int *A_j = A.GetJ();

    const double *A_data = A.GetData();


    const int *B_i = B.GetI();

    const int *B_j = B.GetJ();

    const double *B_data = B.GetData();


    int * marker = new int[ncols];

    std::fill(marker, marker+ncols, -1);


    int num_nonzeros = 0, jcol;

    C_i[0] = 0;

    for (int ic = 0; ic < nrows; ic++)

    {

       for (int ia = A_i[ic]; ia < A_i[ic+1]; ia++)

       {

          jcol = A_j[ia];

          marker[jcol] = ic;

          num_nonzeros++;

       }

       for (int ib = B_i[ic]; ib < B_i[ic+1]; ib++)

       {

          jcol = B_j[ib];

          if (marker[jcol] != ic)

          {

             marker[jcol] = ic;

             num_nonzeros++;

          }

       }

       C_i[ic+1] = num_nonzeros;

    }


    C_j = Memory<int>(num_nonzeros);

    C_data = Memory<double>(num_nonzeros);


    for (int ia = 0; ia < ncols; ia++)

    {

       marker[ia] = -1;

    }


    int pos = 0;

    for (int ic = 0; ic < nrows; ic++)

    {

       for (int ia = A_i[ic]; ia < A_i[ic+1]; ia++)

       {

          jcol = A_j[ia];

          C_j[pos] = jcol;

          C_data[pos] = a*A_data[ia];

          marker[jcol] = pos;

          pos++;

       }

       for (int ib = B_i[ic]; ib < B_i[ic+1]; ib++)

       {

          jcol = B_j[ib];

          if (marker[jcol] < C_i[ic])

          {

             C_j[pos] = jcol;

             C_data[pos] = b*B_data[ib];

             marker[jcol] = pos;

             pos++;

          }

          else

          {

             C_data[marker[jcol]] += b*B_data[ib];

          }

       }

    }


    delete[] marker;

    return new SparseMatrix(C_i, C_j, C_data, nrows, ncols);

 }


 SparseMatrix * Add(const SparseMatrix & A, const SparseMatrix & B)

 {

    return Add(1.,A,1.,B);

 }


 SparseMatrix * Add(Array<SparseMatrix *> & Ai)

 {

    MFEM_ASSERT(Ai.Size() > 0, "invalid size Ai.Size() = " << Ai.Size());


    SparseMatrix * accumulate = Ai[0];

    SparseMatrix * result = accumulate;


    for (int i=1; i < Ai.Size(); ++i)

    {

       result = Add(*accumulate, *Ai[i]);

       if (i != 1)

       {

          delete accumulate;

       }


       accumulate = result;

    }


    return result;

 }


 /// B += alpha * A

 void Add(const SparseMatrix &A,

          double alpha, DenseMatrix &B)

 {

    for (int r = 0; r < B.Height(); r++)

    {

       const int    * colA = A.GetRowColumns(r);

       const double * valA = A.GetRowEntries(r);

       for (int i=0; i<A.RowSize(r); i++)

       {

          B(r, colA[i]) += alpha * valA[i];

       }

    }

 }


 /// Produces a block matrix with blocks A_{ij}*B

 DenseMatrix *OuterProduct(const DenseMatrix &A, const DenseMatrix &B)

 {

    int mA = A.Height(), nA = A.Width();

    int mB = B.Height(), nB = B.Width();


    DenseMatrix *C = new DenseMatrix(mA * mB, nA * nB);

    *C = 0.0;

    for (int i=0; i<mA; i++)

    {

       for (int j=0; j<nA; j++)

       {

          C->AddMatrix(A(i,j), B, i * mB, j * nB);

       }

    }

    return C;

 }


 /// Produces a block matrix with blocks A_{ij}*B

 SparseMatrix *OuterProduct(const DenseMatrix &A, const SparseMatrix &B)

 {

    int mA = A.Height(), nA = A.Width();

    int mB = B.Height(), nB = B.Width();


    SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);


    for (int i=0; i<mA; i++)

    {

       for (int j=0; j<nA; j++)

       {

          for (int r=0; r<mB; r++)

          {

             const int    * colB = B.GetRowColumns(r);

             const double * valB = B.GetRowEntries(r);


             for (int cj=0; cj<B.RowSize(r); cj++)

             {

                C->Set(i * mB + r, j * nB + colB[cj], A(i,j) * valB[cj]);

             }

          }

       }

    }

    C->Finalize();


    return C;

 }


 /// Produces a block matrix with blocks A_{ij}*B

 SparseMatrix *OuterProduct(const SparseMatrix &A, const DenseMatrix &B)

 {

    int mA = A.Height(), nA = A.Width();

    int mB = B.Height(), nB = B.Width();


    SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);


    for (int r=0; r<mA; r++)

    {

       const int    * colA = A.GetRowColumns(r);

       const double * valA = A.GetRowEntries(r);


       for (int aj=0; aj<A.RowSize(r); aj++)

       {

          for (int i=0; i<mB; i++)

          {

             for (int j=0; j<nB; j++)

             {

                C->Set(r * mB + i, colA[aj] * nB + j, valA[aj] * B(i, j));

             }

          }

       }

    }

    C->Finalize();


    return C;

 }


 /// Produces a block matrix with blocks A_{ij}*B

 SparseMatrix *OuterProduct(const SparseMatrix &A, const SparseMatrix &B)

 {

    int mA = A.Height(), nA = A.Width();

    int mB = B.Height(), nB = B.Width();


    SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);


    for (int ar=0; ar<mA; ar++)

    {

       const int    * colA = A.GetRowColumns(ar);

       const double * valA = A.GetRowEntries(ar);


       for (int aj=0; aj<A.RowSize(ar); aj++)

       {

          for (int br=0; br<mB; br++)

          {

             const int    * colB = B.GetRowColumns(br);

             const double * valB = B.GetRowEntries(br);


             for (int bj=0; bj<B.RowSize(br); bj++)

             {

                C->Set(ar * mB + br, colA[aj] * nB + colB[bj],

                       valA[aj] * valB[bj]);

             }

          }

       }

    }

    C->Finalize();


    return C;

 }


 void SparseMatrix::Swap(SparseMatrix &other)

 {

    mfem::Swap(width, other.width);

    mfem::Swap(height, other.height);

    mfem::Swap(I, other.I);

    mfem::Swap(J, other.J);

    mfem::Swap(A, other.A);

    mfem::Swap(Rows, other.Rows);

    mfem::Swap(current_row, other.current_row);

    mfem::Swap(ColPtrJ, other.ColPtrJ);

    mfem::Swap(ColPtrNode, other.ColPtrNode);

    mfem::Swap(At, other.At);


 #ifdef MFEM_USE_MEMALLOC

    mfem::Swap(NodesMem, other.NodesMem);

 #endif


    mfem::Swap(isSorted, other.isSorted);

 }


 SparseMatrix::~SparseMatrix()

 {

    Destroy();

 #ifdef MFEM_USE_CUDA_OR_HIP

    if (Device::Allows(Backend::CUDA_MASK | Backend::HIP_MASK))

    {

       if (SparseMatrixCount==1)

       {

          if (handle)

          {

             MFEM_cu_or_hip(sparseDestroy)(handle);

             handle = nullptr;

          }

          if (dBuffer)

          {

             MFEM_Cu_or_Hip(MemFree)(dBuffer);

             dBuffer = nullptr;

             bufferSize = 0;

          }

       }

       SparseMatrixCount--;

    }

 #endif // MFEM_USE_CUDA_OR_HIP

 }


 }

mfem::SparseMatrix::ColPtrNode
RowNode ** ColPtrNode
Definition: sparsemat.hpp:77

mfem::SparseMatrix::I
Memory< int > I
Array with size (height+1) containing the row offsets.
Definition: sparsemat.hpp:62

mfem::MemAlloc::Alloc
Elem * Alloc()
Definition: mem_alloc.hpp:166

mfem::SparseMatrix::GetJacobiScaling
double GetJacobiScaling() const
Determine appropriate scaling for Jacobi iteration.
Definition: sparsemat.cpp:2551

mfem::SparseMatrix::CheckFinite
int CheckFinite() const
Count the number of entries that are NOT finite, i.e. Inf or Nan.
Definition: sparsemat.cpp:1660

mfem::Array::Size
int Size() const
Return the logical size of the array.
Definition: array.hpp:138

mfem::SparseMatrix::DiagScale
void DiagScale(const Vector &b, Vector &x, double sc=1.0, bool use_abs_diag=false) const
x = sc b / A_ii. When use_abs_diag = true, |A_ii| is used.
Definition: sparsemat.cpp:2616

mfem::TransposeAbstractSparseMatrix
SparseMatrix * TransposeAbstractSparseMatrix(const AbstractSparseMatrix &A, int useActualWidth)
Transpose of a sparse matrix. A does not need to be a CSR matrix.
Definition: sparsemat.cpp:3558

mfem::SparseMatrix::RowSize
int RowSize(const int i) const
Returns the number of elements in row i.
Definition: sparsemat.cpp:344

mfem::CheckFinite
int CheckFinite(const double *v, const int n)
Definition: vector.hpp:493

mfem::SparseMatrix::NumNonZeroElems
virtual int NumNonZeroElems() const
Returns the number of the nonzero elements in the matrix.
Definition: sparsemat.cpp:1584

mfem::CuMemFree
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition: cuda.cpp:79

mfem::SparseMatrix::_Add_
void _Add_(const int col, const double a)
Add a value to an entry in the &quot;current row&quot;. See SetColPtr().
Definition: sparsemat.hpp:584

mfem::SparseMatrix::BuildTranspose
void BuildTranspose() const
Build and store internally the transpose of this matrix which will be used in the methods AddMultTran...
Definition: sparsemat.cpp:956

mfem::Vector::NewDataAndSize
void NewDataAndSize(double *d, int s)
Set the Vector data and size, deleting the old data, if owned.
Definition: vector.hpp:163

mfem::SparseMatrix::Clear
void Clear()
Clear the contents of the SparseMatrix.
Definition: sparsemat.hpp:209

mfem::SparseMatrix::EliminateCol
void EliminateCol(int col, DiagonalPolicy dpolicy=DIAG_ZERO)
Eliminates the column col from the matrix.
Definition: sparsemat.cpp:1737

mfem::Array::GetMemory
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition: array.hpp:117

mfem::SparseMatrix::At
SparseMatrix * At
Transpose of A. Owned. Used to perform MultTranspose() on devices.
Definition: sparsemat.hpp:80

mfem::Array2D::NumCols
int NumCols() const
Definition: array.hpp:373

mfem::SparseMatrix::HostWriteJ
int * HostWriteJ()
Definition: sparsemat.hpp:263

mfem::SparseMatrix::bufferSize
static size_t bufferSize
Definition: sparsemat.hpp:101

mfem::MemAlloc::MemoryUsage
size_t MemoryUsage() const
Definition: mem_alloc.hpp:212

mfem::SparseMatrix::ColPtrJ
int * ColPtrJ
Definition: sparsemat.hpp:76

mfem::SparseMatrix::Finalize
virtual void Finalize(int skip_zeros=1)
Finalize the matrix initialization, switching the storage format from LIL to CSR. ...
Definition: sparsemat.hpp:545

mfem::SparseMatrix::MakeRef
void MakeRef(const SparseMatrix &master)
Clear the contents of the SparseMatrix and make it a reference to master.
Definition: sparsemat.cpp:313

mfem::Vector::SetSize
void SetSize(int s)
Resize the vector to size s.
Definition: vector.hpp:513

mfem::SparseMatrix::SearchRow
double & SearchRow(const int col)
Perform a fast search for an entry in the &quot;current row&quot;. See SetColPtr().
Definition: sparsemat.hpp:845

mfem::Memory::Delete
void Delete()
Delete the owned pointers and reset the Memory object.
Definition: mem_manager.hpp:1004

mfem::SparseMatrix::ToDenseMatrix
DenseMatrix * ToDenseMatrix() const
Produces a DenseMatrix from a SparseMatrix.
Definition: sparsemat.cpp:699

mfem::SparseMatrix::Empty
bool Empty() const
Check if the SparseMatrix is empty.
Definition: sparsemat.hpp:219

mfem::SparseMatrix::GetRowColumns
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
Definition: sparsemat.cpp:391

mfem::SparseMatrix::BooleanMult
void BooleanMult(const Array< int > &x, Array< int > &y) const
y = A * x, treating all entries as booleans (zero=false, nonzero=true).
Definition: sparsemat.cpp:1022

mfem::Mult
void Mult(const Table &A, const Table &B, Table &C)
C = A * B (as boolean matrices)
Definition: table.cpp:475

mfem::Operator::Width
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
Definition: operator.hpp:73

mfem::SparseMatrix::ReadData
const double * ReadData(bool on_dev=true) const
Definition: sparsemat.hpp:271

mfem::SparseMatrix::GetJ
int * GetJ()
Return the array J.
Definition: sparsemat.hpp:227

mfem::Backend::HIP_MASK
Biwise-OR of all HIP backends.
Definition: device.hpp:90

mfem::AbstractSparseMatrix
Abstract data type for sparse matrices.
Definition: matrix.hpp:73

mfem::SparseMatrix::operator+=
SparseMatrix & operator+=(const SparseMatrix &B)
Add the sparse matrix &#39;B&#39; to &#39;*this&#39;. This operation will cause an error if &#39;*this&#39; is finalized and ...
Definition: sparsemat.cpp:3137

mfem::Write
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true...
Definition: device.hpp:336

mfem::DenseMatrix
Data type dense matrix using column-major storage.
Definition: densemat.hpp:23

mfem::SparseMatrix::AddMult
void AddMult(const Vector &x, Vector &y, const double a=1.0) const
y += A * x (default) or y += a * A * x
Definition: sparsemat.cpp:736

mfem::Vector::Size
int Size() const
Returns the size of the vector.
Definition: vector.hpp:200

mfem::SparseMatrix::MaxNorm
double MaxNorm() const
Definition: sparsemat.cpp:1607

mfem::SparseMatrix::GetI
int * GetI()
Return the array I.
Definition: sparsemat.hpp:222

mfem::SparseMatrix::AddMultTranspose
void AddMultTranspose(const Vector &x, Vector &y, const double a=1.0) const
y += At * x (default) or y += a * At * x
Definition: sparsemat.cpp:911

mfem::MemoryType::PRESERVE

mfem::MatrixInverse
Abstract data type for matrix inverse.
Definition: matrix.hpp:62

mfem::SparseMatrix::dBuffer
static void * dBuffer
Definition: sparsemat.hpp:102

mfem::SparseMatrix::EliminateRowColMultipleRHS
void EliminateRowColMultipleRHS(int rc, const Vector &sol, DenseMatrix &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Similar to EliminateRowCol(int, const double, Vector &amp;, DiagonalPolicy), but multiple values for elim...
Definition: sparsemat.cpp:1949

mfem::AbstractSparseMatrix::NumNonZeroElems
virtual int NumNonZeroElems() const =0
Returns the number of non-zeros in a matrix.

mfem::SparseMatrix::ClearGPUSparse
void ClearGPUSparse()
Clear the cuSPARSE/hipSPARSE descriptors. This must be called after releasing the device memory of A...
Definition: sparsemat.cpp:81

mfem::SparseMatrix::PrintInfo
void PrintInfo(std::ostream &out) const
Print various sparse matrix statistics.
Definition: sparsemat.cpp:3381

mfem::Memory::CopyFrom
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
Definition: mem_manager.hpp:1168

mfem::SparseMatrix::Elem
virtual double & Elem(int i, int j)
Returns reference to a_{ij}.
Definition: sparsemat.cpp:597

mfem::SparseMatrix::Jacobi
void Jacobi(const Vector &b, const Vector &x0, Vector &x1, double sc, bool use_abs_diag=false) const
Definition: sparsemat.cpp:2584

mfem::SparseMatrix::AddRow
void AddRow(const int row, const Array< int > &cols, const Vector &srow)
Definition: sparsemat.cpp:3018

mfem::SparseMatrix::GetRowEntries
double * GetRowEntries(const int row)
Return a pointer to the entries in a row.
Definition: sparsemat.cpp:405

mfem::Vector::GetData
double * GetData() const
Return a pointer to the beginning of the Vector data.
Definition: vector.hpp:209

mfem::SparseMatrix::SortColumnIndices
void SortColumnIndices()
Sort the column indices corresponding to each row.
Definition: sparsemat.cpp:457

mfem::Vector::UseDevice
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition: vector.hpp:118

mfem::SparseMatrix::PrintMM
void PrintMM(std::ostream &out=mfem::out) const
Prints matrix in Matrix Market sparse format.
Definition: sparsemat.cpp:3310

mfem::Memory::Capacity
int Capacity() const
Return the size of the allocated memory.
Definition: mem_manager.hpp:294

mfem::SparseMatrix::vecY_descr
cusparseDnVecDescr_t vecY_descr
Definition: sparsemat.hpp:113

mfem::SparseMatrix::GetSubMatrix
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
Definition: sparsemat.cpp:2882

mfem::SparseMatrix::HostReadWriteData
double * HostReadWriteData()
Definition: sparsemat.hpp:281

mfem::SparseMatrix::_Set_
void _Set_(const int col, const double a)
Set an entry in the &quot;current row&quot;. See SetColPtr().
Definition: sparsemat.hpp:587

mfem::SparseMatrix::BooleanMultTranspose
void BooleanMultTranspose(const Array< int > &x, Array< int > &y) const
y = At * x, treating all entries as booleans (zero=false, nonzero=true).
Definition: sparsemat.cpp:1052

mfem::Mult_AtDA
SparseMatrix * Mult_AtDA(const SparseMatrix &A, const Vector &D, SparseMatrix *OAtDA)
Matrix multiplication A^t D A. All matrices must be finalized.
Definition: sparsemat.cpp:3926

mfem::SparseMatrix::PartAddMult
void PartAddMult(const Array< int > &rows, const Vector &x, Vector &y, const double a=1.0) const
Definition: sparsemat.cpp:1004

mfem::SparseMatrix::RowIsEmpty
bool RowIsEmpty(const int row) const
Definition: sparsemat.cpp:2910

mfem::Memory::GetMemoryType
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
Definition: mem_manager.hpp:1142

mfem::Memory::flags
unsigned flags
Bit flags defined from the FlagMask enum.
Definition: mem_manager.hpp:187

mfem::SparseMatrix::GetBlocks
void GetBlocks(Array2D< SparseMatrix * > &blocks) const
Definition: sparsemat.cpp:1433

linalg.hpp

mfem::SparseMatrix::ReadJ
const int * ReadJ(bool on_dev=true) const
Definition: sparsemat.hpp:255

mfem::SparseMatrix::ScaleRow
void ScaleRow(const int row, const double scale)
Definition: sparsemat.cpp:3050

mfem::SparseMatrix::SetEmpty
void SetEmpty()
Definition: sparsemat.cpp:325

mfem::SparseMatrix::NodesMem
RowNodeAlloc * NodesMem
Definition: sparsemat.hpp:84

mfem::SparseMatrix::GetData
double * GetData()
Return the element data, i.e. the array A.
Definition: sparsemat.hpp:232

mfem::SparseMatrix::Symmetrize
void Symmetrize()
(*this) = 1/2 ((*this) + (*this)^t)
Definition: sparsemat.cpp:1565

mfem::IsFinite
bool IsFinite(const double &val)
Definition: vector.hpp:482

mfem::SparseMatrix::MoveDiagonalFirst
void MoveDiagonalFirst()
Move the diagonal entry to the first position in each row, preserving the order of the rest of the co...
Definition: sparsemat.cpp:573

mfem::Add
void Add(const DenseMatrix &A, const DenseMatrix &B, double alpha, DenseMatrix &C)
C = A + alpha*B.
Definition: densemat.cpp:2282

mfem::SparseMatrix::Set
void Set(const int i, const int j, const double val)
Definition: sparsemat.cpp:2764

mfem::Memory::Wrap
void Wrap(T *ptr, int size, bool own)
Wrap an externally allocated host pointer, ptr with the current host memory type returned by MemoryMa...
Definition: mem_manager.hpp:888

mfem::SparseMatrix::ClearColPtr
void ClearColPtr() const
Reset the &quot;current row&quot; set by calling SetColPtr(). This method must be called between any two calls ...
Definition: sparsemat.hpp:826

mfem::SparseMatrix::ScaleColumns
void ScaleColumns(const Vector &sr)
this = this * diag(sr);
Definition: sparsemat.cpp:3109

mfem::SparseMatrix::~SparseMatrix
virtual ~SparseMatrix()
Destroys sparse matrix.
Definition: sparsemat.cpp:4198

mfem::SparseMatrix::PrintCSR2
void PrintCSR2(std::ostream &out) const
Prints a sparse matrix to stream out in CSR format.
Definition: sparsemat.cpp:3356

mfem::f
double f(const Vector &xvec)
Definition: lor_mms.hpp:32

mfem::SparseMatrix::GetRow
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const
Extract all column indices and values from a given row.
Definition: sparsemat.cpp:2931

mfem::SparseMatrix::isSorted
bool isSorted
Are the columns sorted already.
Definition: sparsemat.hpp:88

mfem::SparseMatrix::A
Memory< double > A
Array with size I[height], containing the actual entries of the sparse matrix, as indexed by the I ar...
Definition: sparsemat.hpp:68

mfem::SparseMatrix::SparseMatrix
SparseMatrix()
Create an empty SparseMatrix.
Definition: sparsemat.hpp:131

mfem::SparseMatrix
Data type sparse matrix.
Definition: sparsemat.hpp:50

mfem::Operator::Height
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition: operator.hpp:67

mfem::SparseMatrix::EnsureMultTranspose
void EnsureMultTranspose() const
Ensures that the matrix is capable of performing MultTranspose(), AddMultTranspose(), and AbsMultTranspose().
Definition: sparsemat.cpp:970

mfem::Vector::Write
virtual double * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:457

mfem::mfem_error
void mfem_error(const char *msg)
Function called when an error is encountered. Used by the macros MFEM_ABORT, MFEM_ASSERT, MFEM_VERIFY.
Definition: error.cpp:154

mfem::SparseMatrix::SetWidth
void SetWidth(int width_=-1)
Change the width of a SparseMatrix.
Definition: sparsemat.cpp:419

b
double b
Definition: lissajous.cpp:42

mfem::Array< double >

mfem::SparseMatrix::HostReadData
const double * HostReadData() const
Definition: sparsemat.hpp:277

mfem::Device::GetDeviceMemoryType
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition: device.hpp:273

mfem::SparseMatrix::IsSymmetric
double IsSymmetric() const
Returns max_{i,j} |(i,j)-(j,i)| for a finalized matrix.
Definition: sparsemat.cpp:1523

mfem::HipMemFree
void * HipMemFree(void *dptr)
Frees device memory.
Definition: hip.cpp:79

mfem::SparseMatrix::HostReadJ
const int * HostReadJ() const
Definition: sparsemat.hpp:261

mfem::Array::Max
T Max() const
Find the maximal element in the array, using the comparison operator &lt; for class T.
Definition: array.cpp:68

mfem::SparseMatrix::SparseMatrixCount
static int SparseMatrixCount
Definition: sparsemat.hpp:100

mfem::SparseMatrix::Destroy
void Destroy()
Definition: sparsemat.cpp:3432

mfem::SparseMatrix::SetDiagIdentity
void SetDiagIdentity()
If a row contains only one diag entry of zero, set it to 1.
Definition: sparsemat.cpp:2354

mfem::Array::Read
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:304

mfem::SparseMatrix::ScaleRows
void ScaleRows(const Vector &sl)
this = diag(sl) * this;
Definition: sparsemat.cpp:3078

mfem::SparseMatrix::handle
static cusparseHandle_t handle
Definition: sparsemat.hpp:107

mfem::OuterProduct
DenseMatrix * OuterProduct(const DenseMatrix &A, const DenseMatrix &B)
Produces a block matrix with blocks A_{ij}*B.
Definition: sparsemat.cpp:4070

mfem::Array::Sort
void Sort()
Sorts the array in ascending order. This requires operator&lt; to be defined for T.
Definition: array.hpp:248

mfem::AbstractSparseMatrix::GetRow
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const =0
Gets the columns indexes and values for row row.

mfem::Operator::DIAG_ONE
Set the diagonal value to one.
Definition: operator.hpp:50

mfem::RAP
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
Definition: densemat.cpp:3213

mfem::SparseMatrix::PrintMatlab
virtual void PrintMatlab(std::ostream &out=mfem::out) const
Prints matrix in matlab format.
Definition: sparsemat.cpp:3288

mfem::Read
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device&#39;s DeviceMemoryClass, if on_dev = true...
Definition: device.hpp:319

mfem::Memory::Reset
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
Definition: mem_manager.hpp:837

mfem::Vector::HostRead
virtual const double * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:453

mfem::Backend::CUDA_MASK
Biwise-OR of all CUDA backends.
Definition: device.hpp:88

mfem::SparseMatrix::AddSubMatrix
void AddSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
Definition: sparsemat.cpp:2718

mfem::SparseMatrix::Add
void Add(const int i, const int j, const double val)
Definition: sparsemat.cpp:2783

mfem::Array2D
Dynamic 2D array using row-major layout.
Definition: array.hpp:351

mfem::SparseMatrix::SetSubMatrix
void SetSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
Definition: sparsemat.cpp:2802

mfem::SparseMatrix::Finalized
bool Finalized() const
Returns whether or not CSR format has been finalized.
Definition: sparsemat.hpp:551

mfem::Backend::CPU_MASK
Biwise-OR of all CPU backends.
Definition: device.hpp:86

mfem::HipMemAlloc
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition: hip.cpp:34

mfem::HostWrite
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:343

mfem::SparseMatrixFunction
void SparseMatrixFunction(SparseMatrix &S, double(*f)(double))
Applies f() to each element of the matrix (after it is finalized).
Definition: sparsemat.cpp:3491

mfem::SparseMatrix::Inverse
virtual MatrixInverse * Inverse() const
This virtual method is not supported: it always returns NULL.
Definition: sparsemat.cpp:1684

mfem::Swap
void Swap(Array< T > &, Array< T > &)
Definition: array.hpp:630

mfem::Memory::ClearOwnerFlags
void ClearOwnerFlags() const
Clear the ownership flags for the host and device pointers, as well as any internal data allocated by...
Definition: mem_manager.hpp:283

mfem::SparseMatrix::EliminateRowColDiag
void EliminateRowColDiag(int rc, double value)
Perform elimination and set the diagonal entry to the given value.
Definition: sparsemat.cpp:2154

mfem::SparseMatrix::Mult
virtual void Mult(const Vector &x, Vector &y) const
Matrix vector multiplication.
Definition: sparsemat.cpp:729

mfem::SparseMatrix::SetColPtr
void SetColPtr(const int row) const
Initialize the SparseMatrix for fast access to the entries of the given row which becomes the &quot;curren...
Definition: sparsemat.hpp:791

mfem::MemoryType
MemoryType
Memory types supported by MFEM.
Definition: mem_manager.hpp:31

mfem::SparseMatrix::SetSubMatrixTranspose
void SetSubMatrixTranspose(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
Definition: sparsemat.cpp:2841

mfem::Memory::SetHostPtrOwner
void SetHostPtrOwner(bool own) const
Set/clear the ownership flag for the host pointer. Ownership indicates whether the pointer will be de...
Definition: mem_manager.hpp:269

mfem::Array::SetSize
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition: array.hpp:679

mfem::SparseMatrix::InitGPUSparse
void InitGPUSparse()
Definition: sparsemat.cpp:64

mfem::SparseMatrix::EliminateRowCol
void EliminateRowCol(int rc, const double sol, Vector &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Eliminate row rc and column rc and modify the rhs using sol.
Definition: sparsemat.cpp:1850

mfem::Transpose
void Transpose(const Table &A, Table &At, int ncols_A_)
Transpose a Table.
Definition: table.cpp:413

mfem::DenseMatrix::GetColumnReference
void GetColumnReference(int c, Vector &col)
Definition: densemat.hpp:289

mfem::SparseMatrix::Threshold
void Threshold(double tol, bool fix_empty_rows=false)
Remove entries smaller in absolute value than a given tolerance tol. If fix_empty_rows is true...
Definition: sparsemat.cpp:1257

mfem::SparseMatrix::initBuffers
bool initBuffers
Definition: sparsemat.hpp:103

mfem::SparseMatrix::HostWriteData
double * HostWriteData()
Definition: sparsemat.hpp:279

mfem::DenseMatrix::AddMatrix
void AddMatrix(DenseMatrix &A, int ro, int co)
Perform (ro+i,co+j)+=A(i,j) for 0&lt;=i&lt;A.Height, 0&lt;=j&lt;A.Width.
Definition: densemat.cpp:1684

mfem::SparseMatrix::Gauss_Seidel_back
void Gauss_Seidel_back(const Vector &x, Vector &y) const
Definition: sparsemat.cpp:2468

mfem::SparseMatrix::current_row
int current_row
Definition: sparsemat.hpp:75

mfem::SparseMatrix::ReadI
const int * ReadI(bool on_dev=true) const
Definition: sparsemat.hpp:239

mfem::Device::Allows
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition: device.hpp:258

mfem::SparseMatrix::Gauss_Seidel_forw
void Gauss_Seidel_forw(const Vector &x, Vector &y) const
Gauss-Seidel forward and backward iterations over a vector x.
Definition: sparsemat.cpp:2384

mfem::SparseMatrix::HostReadWriteJ
int * HostReadWriteJ()
Definition: sparsemat.hpp:265

mfem::SparseMatrix::Rows
RowNode ** Rows
Array of linked lists, one for every row. This array represents the linked list (LIL) storage format...
Definition: sparsemat.hpp:73

mfem::Operator::height
int height
Dimension of the output / number of rows in the matrix.
Definition: operator.hpp:27

mfem::SparseMatrix::EliminateRow
void EliminateRow(int row, const double sol, Vector &rhs)
Eliminates a column from the transpose matrix.
Definition: sparsemat.cpp:1689

a
double a
Definition: lissajous.cpp:41

mfem::HostRead
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory&lt;T&gt; &amp;mem, int size, false)
Definition: device.hpp:326

mfem::Vector::ReadWrite
virtual double * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:465

mfem::SparseMatrix::ActualWidth
int ActualWidth() const
Returns the actual Width of the matrix.
Definition: sparsemat.cpp:3465

mfem::SparseMatrix::GetDiag
void GetDiag(Vector &d) const
Returns the Diagonal of A.
Definition: sparsemat.cpp:665

mfem::SparseMatrix::RowNodeAlloc
MemAlloc< RowNode, 1024 > RowNodeAlloc
Definition: sparsemat.hpp:83

mfem::SparseMatrix::GetRowNorml1
double GetRowNorml1(int irow) const
For i = irow compute .
Definition: sparsemat.cpp:1233

mfem::SparseMatrix::ResetTranspose
void ResetTranspose() const
Definition: sparsemat.cpp:964

mfem::TransposeMult
SparseMatrix * TransposeMult(const SparseMatrix &A, const SparseMatrix &B)
C = A^T B.
Definition: sparsemat.cpp:3763

mfem::Memory::New
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
Definition: mem_manager.hpp:855

mfem::Operator::DIAG_KEEP
Keep the diagonal value.
Definition: operator.hpp:51

mfem::SparseMatrix::EliminateZeroRows
virtual void EliminateZeroRows(const double threshold=1e-12)
If a row contains only zeros, set its diagonal to 1.
Definition: sparsemat.cpp:2365

mfem::SparseMatrix::AbsMultTranspose
void AbsMultTranspose(const Vector &x, Vector &y) const
y = |At| * x, using entry-wise absolute values of the transpose of matrix A
Definition: sparsemat.cpp:1124

mfem::Operator::DiagonalPolicy
DiagonalPolicy
Defines operator diagonal policy upon elimination of rows and/or columns.
Definition: operator.hpp:47

mfem::SparseMatrix::operator()
double & operator()(int i, int j)
Returns reference to A[i][j].
Definition: sparsemat.cpp:607

mfem::SparseMatrix::MultTranspose
void MultTranspose(const Vector &x, Vector &y) const
Multiply a vector with the transposed matrix. y = At * x.
Definition: sparsemat.cpp:904

mfem::SparseMatrix::EliminateCols
void EliminateCols(const Array< int > &cols, const Vector *x=NULL, Vector *b=NULL)
Eliminate all columns i for which cols[i] != 0.
Definition: sparsemat.cpp:1777

mfem::SparseMatrix::OverrideSize
void OverrideSize(int height_, int width_)
Sets the height and width of the matrix.
Definition: sparsemat.cpp:297

mfem::Memory< int >

mfem::SparseMatrix::operator=
SparseMatrix & operator=(const SparseMatrix &rhs)
Assignment operator: deep copy.
Definition: sparsemat.cpp:303

mfem::infinity
double infinity()
Define a shortcut for std::numeric_limits&lt;double&gt;::infinity()
Definition: vector.hpp:46

mfem::SparseMatrix::operator*=
SparseMatrix & operator*=(double a)
Definition: sparsemat.cpp:3216

t
RefCoord t[3]
Definition: ncmesh_tables.hpp:158

mfem::SparseMatrix::SetRow
void SetRow(const int row, const Array< int > &cols, const Vector &srow)
Definition: sparsemat.cpp:2971

mfem::SparseMatrix::matA_descr
cusparseSpMatDescr_t matA_descr
Definition: sparsemat.hpp:111

mfem::Array::Write
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition: array.hpp:312

mfem::Array::MakeRef
void MakeRef(T *, int)
Make this Array a reference to a pointer.
Definition: array.hpp:864

alpha
const double alpha
Definition: ex15.cpp:369

mfem::SparseMatrix::ReadWriteData
double * ReadWriteData(bool on_dev=true)
Definition: sparsemat.hpp:275

mfem::SparseMatrix::ReadWriteJ
int * ReadWriteJ(bool on_dev=true)
Definition: sparsemat.hpp:259

mfem::Vector
Vector data type.
Definition: vector.hpp:60

mfem::SparseMatrix::Print
void Print(std::ostream &out=mfem::out, int width_=4) const
Prints matrix to stream out.
Definition: sparsemat.cpp:3240

mfem::SparseMatrix::HostReadI
const int * HostReadI() const
Definition: sparsemat.hpp:245

mfem::SparseMatrix::Jacobi2
void Jacobi2(const Vector &b, const Vector &x0, Vector &x1, double sc=1.0) const
Definition: sparsemat.cpp:2704

mfem::SparseMatrix::vecX_descr
cusparseDnVecDescr_t vecX_descr
Definition: sparsemat.hpp:112

mfem::SparseMatrix::MaxRowSize
int MaxRowSize() const
Returns the maximum number of elements among all rows.
Definition: sparsemat.cpp:367

mfem::SparseMatrix::HostReadWriteI
int * HostReadWriteI()
Definition: sparsemat.hpp:249

s
RefCoord s[3]
Definition: ncmesh_tables.hpp:158

mfem::SparseMatrix::Jacobi3
void Jacobi3(const Vector &b, const Vector &x0, Vector &x1, double sc=1.0) const
Definition: sparsemat.cpp:2711

mfem::MultAbstractSparseMatrix
SparseMatrix * MultAbstractSparseMatrix(const AbstractSparseMatrix &A, const AbstractSparseMatrix &B)
Matrix product of sparse matrices. A and B do not need to be CSR matrices.
Definition: sparsemat.cpp:3771

mfem::SparseMatrix::AbsMult
void AbsMult(const Vector &x, Vector &y) const
y = |A| * x, using entry-wise absolute values of matrix A
Definition: sparsemat.cpp:1075

mfem::SparseMatrix::_Get_
double _Get_(const int col) const
Read the value of an entry in the &quot;current row&quot;. See SetColPtr().
Definition: sparsemat.hpp:872

mfem::DenseMatrix::SetSize
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Definition: densemat.hpp:105

mfem::SparseMatrix::useGPUSparse
bool useGPUSparse
Definition: sparsemat.hpp:93

mfem::SparseMatrix::J
Memory< int > J
Array with size I[height], containing the column indices for all matrix entries, as indexed by the I ...
Definition: sparsemat.hpp:65

mfem::SparseMatrix::Swap
void Swap(SparseMatrix &other)
Definition: sparsemat.cpp:4178

mfem::SparseMatrix::PrintCSR
void PrintCSR(std::ostream &out) const
Prints matrix to stream out in hypre_CSRMatrix format.
Definition: sparsemat.cpp:3332

mfem::SparseMatrix::InnerProduct
double InnerProduct(const Vector &x, const Vector &y) const
Compute y^t A x.
Definition: sparsemat.cpp:1169

mfem::CuMemAlloc
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition: cuda.cpp:34

mfem::Vector::Read
virtual const double * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition: vector.hpp:449

mfem::Vector::HostReadWrite
virtual double * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition: vector.hpp:469

mfem::Array2D::NumRows
int NumRows() const
Definition: array.hpp:372

mfem::Operator::DIAG_ZERO
Set the diagonal value to zero.
Definition: operator.hpp:49

mfem::Operator::width
int width
Dimension of the input / number of columns in the matrix.
Definition: operator.hpp:28

mfem::SparseMatrix::EliminateBC
void EliminateBC(const Array< int > &ess_dofs, DiagonalPolicy diag_policy)
Eliminate essential (Dirichlet) boundary conditions.
Definition: sparsemat.cpp:2311

mfem::MemAlloc
Definition: mem_alloc.hpp:149

mfem::SparseMatrix::CountSmallElems
int CountSmallElems(double tol) const
Count the number of entries with |a_ij| &lt;= tol.
Definition: sparsemat.cpp:1632

mfem::SparseMatrix::PartMult
void PartMult(const Array< int > &rows, const Vector &x, Vector &y) const
Definition: sparsemat.cpp:978

mfem::SparseMatrix::GetRowSums
void GetRowSums(Vector &x) const
For all i compute .
Definition: sparsemat.cpp:1210

mfem::Vector::Neg
void Neg()
(*this) = -(*this)
Definition: vector.cpp:305