MFEM v4.7.0
Finite element discretization library
Loading...
Searching...
No Matches
sparsemat.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2024, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12// Implementation of sparse matrix
13
14#include "linalg.hpp"
15#include "../general/forall.hpp"
16#include "../general/table.hpp"
19
20#include <iostream>
21#include <iomanip>
22#include <cmath>
23#include <algorithm>
24#include <limits>
25#include <cstring>
26
27#if defined(MFEM_USE_CUDA)
28#define MFEM_cu_or_hip(stub) cu##stub
29#define MFEM_Cu_or_Hip(stub) Cu##stub
30#define MFEM_CU_or_HIP(stub) CU##stub
31#define MFEM_CUDA_or_HIP(stub) CUDA##stub
32
33#if CUSPARSE_VERSION >= 11400
34#define MFEM_GPUSPARSE_ALG CUSPARSE_SPMV_CSR_ALG1
35#else // CUSPARSE_VERSION >= 11400
36#define MFEM_GPUSPARSE_ALG CUSPARSE_CSRMV_ALG1
37#endif // CUSPARSE_VERSION >= 11400
38
39#elif defined(MFEM_USE_HIP)
40#define MFEM_cu_or_hip(stub) hip##stub
41#define MFEM_Cu_or_Hip(stub) Hip##stub
42#define MFEM_CU_or_HIP(stub) HIP##stub
43#define MFEM_CUDA_or_HIP(stub) HIP##stub
44
45// https://hipsparse.readthedocs.io/en/latest/usermanual.html#hipsparsespmvalg-t
46#define MFEM_GPUSPARSE_ALG HIPSPARSE_CSRMV_ALG1
47#endif // defined(MFEM_USE_CUDA)
48
49namespace mfem
50{
51
52using namespace std;
53
54#ifdef MFEM_USE_CUDA_OR_HIP
56// doxygen doesn't like the macro-assisted typename so let's skip parsing it:
57// \cond false
58MFEM_cu_or_hip(sparseHandle_t) SparseMatrix::handle = nullptr;
59// \endcond
61void * SparseMatrix::dBuffer = nullptr;
62#endif // MFEM_USE_CUDA_OR_HIP
63
65{
66 // Initialize cuSPARSE/hipSPARSE library
67#ifdef MFEM_USE_CUDA_OR_HIP
69 {
70 if (!handle) { MFEM_cu_or_hip(sparseCreate)(&handle); }
71 useGPUSparse=true;
73 }
74 else
75 {
76 useGPUSparse=false;
77 }
78#endif // MFEM_USE_CUDA_OR_HIP
79}
80
82{
83#ifdef MFEM_USE_CUDA_OR_HIP
84 if (initBuffers)
85 {
86#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
87 MFEM_cu_or_hip(sparseDestroySpMat)(matA_descr);
88 MFEM_cu_or_hip(sparseDestroyDnVec)(vecX_descr);
89 MFEM_cu_or_hip(sparseDestroyDnVec)(vecY_descr);
90#else
91 cusparseDestroyMatDescr(matA_descr);
92#endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
93 initBuffers = false;
94 }
95#endif // MFEM_USE_CUDA_OR_HIP
96}
97
98SparseMatrix::SparseMatrix(int nrows, int ncols)
99 : AbstractSparseMatrix(nrows, (ncols >= 0) ? ncols : nrows),
100 Rows(new RowNode *[nrows]),
101 current_row(-1),
102 ColPtrJ(NULL),
103 ColPtrNode(NULL),
104 At(NULL),
105 isSorted(false)
106{
107 // We probably do not need to set the ownership flags here.
108 I.SetHostPtrOwner(true);
109 J.SetHostPtrOwner(true);
110 A.SetHostPtrOwner(true);
111
112 for (int i = 0; i < nrows; i++)
113 {
114 Rows[i] = NULL;
115 }
116
117#ifdef MFEM_USE_MEMALLOC
119#endif
120
122}
123
124SparseMatrix::SparseMatrix(int *i, int *j, real_t *data, int m, int n)
125 : AbstractSparseMatrix(m, n),
126 Rows(NULL),
127 ColPtrJ(NULL),
128 ColPtrNode(NULL),
129 At(NULL),
130 isSorted(false)
131{
132 I.Wrap(i, height+1, true);
133 J.Wrap(j, I[height], true);
134 A.Wrap(data, I[height], true);
135
136#ifdef MFEM_USE_MEMALLOC
137 NodesMem = NULL;
138#endif
139
141}
142
143SparseMatrix::SparseMatrix(int *i, int *j, real_t *data, int m, int n,
144 bool ownij, bool owna, bool issorted)
145 : AbstractSparseMatrix(m, n),
146 Rows(NULL),
147 ColPtrJ(NULL),
148 ColPtrNode(NULL),
149 At(NULL),
150 isSorted(issorted)
151{
152 I.Wrap(i, height+1, ownij);
153 J.Wrap(j, I[height], ownij);
154
155#ifdef MFEM_USE_MEMALLOC
156 NodesMem = NULL;
157#endif
158
159 if (data)
160 {
161 A.Wrap(data, I[height], owna);
162 }
163 else
164 {
165 const int nnz = I[height];
166 A.New(nnz);
167 for (int ii=0; ii<nnz; ++ii)
168 {
169 A[ii] = 0.0;
170 }
171 }
172
174}
175
176SparseMatrix::SparseMatrix(int nrows, int ncols, int rowsize)
177 : AbstractSparseMatrix(nrows, ncols)
178 , Rows(NULL)
179 , ColPtrJ(NULL)
180 , ColPtrNode(NULL)
181 , At(NULL)
182 , isSorted(false)
183{
184#ifdef MFEM_USE_MEMALLOC
185 NodesMem = NULL;
186#endif
187 I.New(nrows + 1);
188 J.New(nrows * rowsize);
189 A.New(nrows * rowsize);
190
191 for (int i = 0; i <= nrows; i++)
192 {
193 I[i] = i * rowsize;
194 }
195
197}
198
199SparseMatrix::SparseMatrix(const SparseMatrix &mat, bool copy_graph,
200 MemoryType mt)
201 : AbstractSparseMatrix(mat.Height(), mat.Width())
202{
203 if (mat.Finalized())
204 {
205 mat.HostReadI();
206 const int nnz = mat.I[height];
207 if (copy_graph)
208 {
209 I.New(height+1, mt == MemoryType::PRESERVE ? mat.I.GetMemoryType() : mt);
210 J.New(nnz, mt == MemoryType::PRESERVE ? mat.J.GetMemoryType() : mt);
211 I.CopyFrom(mat.I, height+1);
212 J.CopyFrom(mat.J, nnz);
213 }
214 else
215 {
216 I = mat.I;
217 J = mat.J;
220 }
221 A.New(nnz, mt == MemoryType::PRESERVE ? mat.A.GetMemoryType() : mt);
222 A.CopyFrom(mat.A, nnz);
223
224 Rows = NULL;
225#ifdef MFEM_USE_MEMALLOC
226 NodesMem = NULL;
227#endif
228 }
229 else
230 {
231#ifdef MFEM_USE_MEMALLOC
233#endif
234 Rows = new RowNode *[height];
235 for (int i = 0; i < height; i++)
236 {
237 RowNode **node_pp = &Rows[i];
238 for (RowNode *node_p = mat.Rows[i]; node_p; node_p = node_p->Prev)
239 {
240#ifdef MFEM_USE_MEMALLOC
241 RowNode *new_node_p = NodesMem->Alloc();
242#else
243 RowNode *new_node_p = new RowNode;
244#endif
245 new_node_p->Value = node_p->Value;
246 new_node_p->Column = node_p->Column;
247 *node_pp = new_node_p;
248 node_pp = &new_node_p->Prev;
249 }
250 *node_pp = NULL;
251 }
252
253 // We probably do not need to set the ownership flags here.
254 I.SetHostPtrOwner(true);
255 J.SetHostPtrOwner(true);
256 A.SetHostPtrOwner(true);
257 }
258
259 current_row = -1;
260 ColPtrJ = NULL;
261 ColPtrNode = NULL;
262 At = NULL;
263 isSorted = mat.isSorted;
264
266}
267
269 : AbstractSparseMatrix(v.Size(), v.Size())
270 , Rows(NULL)
271 , ColPtrJ(NULL)
272 , ColPtrNode(NULL)
273 , At(NULL)
274 , isSorted(true)
275{
276#ifdef MFEM_USE_MEMALLOC
277 NodesMem = NULL;
278#endif
279 I.New(height + 1);
280 J.New(height);
281 A.New(height);
282
283 for (int i = 0; i <= height; i++)
284 {
285 I[i] = i;
286 }
287
288 for (int r=0; r<height; r++)
289 {
290 J[r] = r;
291 A[r] = v[r];
292 }
293
295}
296
297void SparseMatrix::OverrideSize(int height_, int width_)
298{
299 height = height_;
300 width = width_;
301}
302
304{
305 Clear();
306
307 SparseMatrix copy(rhs);
308 Swap(copy);
309
310 return *this;
311}
312
314{
315 MFEM_ASSERT(master.Finalized(), "'master' must be finalized");
316 Clear();
317 height = master.Height();
318 width = master.Width();
319 I = master.I; I.ClearOwnerFlags();
320 J = master.J; J.ClearOwnerFlags();
321 A = master.A; A.ClearOwnerFlags();
322 isSorted = master.isSorted;
323}
324
326{
327 height = width = 0;
328 I.Reset();
329 J.Reset();
330 A.Reset();
331 Rows = NULL;
332 current_row = -1;
333 ColPtrJ = NULL;
334 ColPtrNode = NULL;
335 At = NULL;
336#ifdef MFEM_USE_MEMALLOC
337 NodesMem = NULL;
338#endif
339 isSorted = false;
340
342}
343
344int SparseMatrix::RowSize(const int i) const
345{
346 int gi = i;
347 if (gi < 0)
348 {
349 gi = -1-gi;
350 }
351
352 if (I)
353 {
354 return I[gi+1]-I[gi];
355 }
356
357 int s = 0;
358 RowNode *row = Rows[gi];
359 for ( ; row != NULL; row = row->Prev)
360 if (row->Value != 0.0)
361 {
362 s++;
363 }
364 return s;
365}
366
368{
369 int max_row_size=0;
370 int rowSize=0;
371 if (I)
372 {
373 for (int i=0; i < height; ++i)
374 {
375 rowSize = I[i+1]-I[i];
376 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
377 }
378 }
379 else
380 {
381 for (int i=0; i < height; ++i)
382 {
383 rowSize = RowSize(i);
384 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
385 }
386 }
387
388 return max_row_size;
389}
390
392{
393 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
394
395 return J + I[row];
396}
397
398const int *SparseMatrix::GetRowColumns(const int row) const
399{
400 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
401
402 return J + I[row];
403}
404
406{
407 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
408
409 return A + I[row];
410}
411
412const real_t *SparseMatrix::GetRowEntries(const int row) const
413{
414 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
415
416 return A + I[row];
417}
418
419void SparseMatrix::SetWidth(int newWidth)
420{
421 if (newWidth == width)
422 {
423 // Nothing to be done here
424 return;
425 }
426 else if (newWidth == -1)
427 {
428 // Compute the actual width
429 width = ActualWidth();
430 // No need to reset the ColPtr, since the new ColPtr will be shorter.
431 }
432 else if (newWidth > width)
433 {
434 // We need to reset ColPtr, since now we may have additional columns.
435 if (Rows != NULL)
436 {
437 delete [] ColPtrNode;
438 ColPtrNode = static_cast<RowNode **>(NULL);
439 }
440 else
441 {
442 delete [] ColPtrJ;
443 ColPtrJ = static_cast<int *>(NULL);
444 }
445 width = newWidth;
446 }
447 else
448 {
449 // Check that the new width is bigger or equal to the actual width.
450 MFEM_ASSERT(newWidth >= ActualWidth(),
451 "The new width needs to be bigger or equal to the actual width");
452 width = newWidth;
453 }
454}
455
456
458{
459 MFEM_VERIFY(Finalized(), "Matrix is not Finalized!");
460
461 if (isSorted)
462 {
463 return;
464 }
465
466#ifdef MFEM_USE_CUDA_OR_HIP
468 {
469#if defined(MFEM_USE_CUDA)
470 size_t pBufferSizeInBytes = 0;
471 void *pBuffer = NULL;
472
473 const int n = Height();
474 const int m = Width();
475 const int nnzA = J.Capacity();
476 real_t * d_a_sorted = ReadWriteData();
477 const int * d_ia = ReadI();
478 int * d_ja_sorted = ReadWriteJ();
479 csru2csrInfo_t sortInfoA;
480
481 cusparseMatDescr_t matA_descr;
482 cusparseCreateMatDescr( &matA_descr );
483 cusparseSetMatIndexBase( matA_descr, CUSPARSE_INDEX_BASE_ZERO );
484 cusparseSetMatType( matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL );
485
486 cusparseCreateCsru2csrInfo( &sortInfoA );
487
488#ifdef MFEM_USE_SINGLE
489 cusparseScsru2csr_bufferSizeExt( handle, n, m, nnzA, d_a_sorted, d_ia,
490 d_ja_sorted, sortInfoA,
491 &pBufferSizeInBytes);
492#elif defined MFEM_USE_DOUBLE
493 cusparseDcsru2csr_bufferSizeExt( handle, n, m, nnzA, d_a_sorted, d_ia,
494 d_ja_sorted, sortInfoA,
495 &pBufferSizeInBytes);
496#else
497 MFEM_ABORT("Floating point type undefined");
498#endif
499
500 CuMemAlloc( &pBuffer, pBufferSizeInBytes );
501
502#ifdef MFEM_USE_SINGLE
503 cusparseScsru2csr( handle, n, m, nnzA, matA_descr, d_a_sorted, d_ia,
504 d_ja_sorted, sortInfoA, pBuffer);
505#elif defined MFEM_USE_DOUBLE
506 cusparseDcsru2csr( handle, n, m, nnzA, matA_descr, d_a_sorted, d_ia,
507 d_ja_sorted, sortInfoA, pBuffer);
508#else
509 MFEM_ABORT("Floating point type undefined");
510#endif
511
512 // The above call is (at least in some cases) asynchronous, so we need to
513 // wait for it to finish before we can free device temporaries.
514 MFEM_STREAM_SYNC;
515
516 cusparseDestroyCsru2csrInfo( sortInfoA );
517 cusparseDestroyMatDescr( matA_descr );
518
519 CuMemFree( pBuffer );
520#endif
521 }
523 {
524#if defined(MFEM_USE_HIP)
525 size_t pBufferSizeInBytes = 0;
526 void *pBuffer = NULL;
527 int *P = NULL;
528
529 const int n = Height();
530 const int m = Width();
531 const int nnzA = J.Capacity();
532 real_t * d_a_sorted = ReadWriteData();
533 const int * d_ia = ReadI();
534 int * d_ja_sorted = ReadWriteJ();
535
536 hipsparseMatDescr_t descrA;
537 hipsparseCreateMatDescr( &descrA );
538 // FIXME: There is not in-place version of csr sort in hipSPARSE currently, so we make
539 // a temporary copy of the data for gthr, sort that, and then copy the sorted values
540 // back to the array being returned. Where there is an in-place version available,
541 // we should use it.
542 Array< real_t > a_tmp( nnzA );
543 real_t *d_a_tmp = a_tmp.Write();
544
545 hipsparseXcsrsort_bufferSizeExt(handle, n, m, nnzA, d_ia, d_ja_sorted,
546 &pBufferSizeInBytes);
547
548 HipMemAlloc( &pBuffer, pBufferSizeInBytes );
549 HipMemAlloc( (void**)&P, nnzA * sizeof(int) );
550
551 hipsparseCreateIdentityPermutation(handle, nnzA, P);
552 hipsparseXcsrsort(handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P, pBuffer);
553
554#if defined(MFEM_USE_SINGLE)
555 hipsparseSgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
556 HIPSPARSE_INDEX_BASE_ZERO);
557#elif defined(MFEM_USE_DOUBLE)
558 hipsparseDgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
559 HIPSPARSE_INDEX_BASE_ZERO);
560#else
561 MFEM_ABORT("Unsupported floating point type!");
562#endif
563
564 A.CopyFrom( a_tmp.GetMemory(), nnzA );
565 hipsparseDestroyMatDescr( descrA );
566
567 HipMemFree( pBuffer );
568 HipMemFree( P );
569#endif
570 }
571 else
572#endif // MFEM_USE_CUDA_OR_HIP
573 {
574 const int * Ip=HostReadI();
577
579 for (int j = 0, i = 0; i < height; i++)
580 {
581 int end = Ip[i+1];
582 row.SetSize(end - j);
583 for (int k = 0; k < row.Size(); k++)
584 {
585 row[k].one = J[j+k];
586 row[k].two = A[j+k];
587 }
588 row.Sort();
589 for (int k = 0; k < row.Size(); k++, j++)
590 {
591 J[j] = row[k].one;
592 A[j] = row[k].two;
593 }
594 }
595 }
596 isSorted = true;
597}
598
600{
601 MFEM_VERIFY(Finalized(), "Matrix is not Finalized!");
602
603 for (int row = 0, end = 0; row < height; row++)
604 {
605 int start = end, j;
606 end = I[row+1];
607 for (j = start; true; j++)
608 {
609 MFEM_VERIFY(j < end, "diagonal entry not found in row = " << row);
610 if (J[j] == row) { break; }
611 }
612 const real_t diag = A[j];
613 for ( ; j > start; j--)
614 {
615 J[j] = J[j-1];
616 A[j] = A[j-1];
617 }
618 J[start] = row;
619 A[start] = diag;
620 }
621}
622
624{
625 return operator()(i,j);
626}
627
628const real_t &SparseMatrix::Elem(int i, int j) const
629{
630 return operator()(i,j);
631}
632
634{
635 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
636 "Trying to access element outside of the matrix. "
637 << "height = " << height << ", "
638 << "width = " << width << ", "
639 << "i = " << i << ", "
640 << "j = " << j);
641
642 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
643
644 for (int k = I[i], end = I[i+1]; k < end; k++)
645 {
646 if (J[k] == j)
647 {
648 return A[k];
649 }
650 }
651
652 MFEM_ABORT("Did not find i = " << i << ", j = " << j << " in matrix.");
653 return A[0];
654}
655
656const real_t &SparseMatrix::operator()(int i, int j) const
657{
658 static const real_t zero = 0.0;
659
660 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
661 "Trying to access element outside of the matrix. "
662 << "height = " << height << ", "
663 << "width = " << width << ", "
664 << "i = " << i << ", "
665 << "j = " << j);
666
667 if (Finalized())
668 {
669 for (int k = I[i], end = I[i+1]; k < end; k++)
670 {
671 if (J[k] == j)
672 {
673 return A[k];
674 }
675 }
676 }
677 else
678 {
679 for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)
680 {
681 if (node_p->Column == j)
682 {
683 return node_p->Value;
684 }
685 }
686 }
687
688 return zero;
689}
690
692{
693 MFEM_VERIFY(height == width, "Matrix must be square, not height = "
694 << height << ", width = " << width);
695 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
696
697 d.SetSize(height);
698
699 const auto II = this->ReadI();
700 const auto JJ = this->ReadJ();
701 const auto AA = this->ReadData();
702 auto dd = d.Write();
703
704 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
705 {
706 const int begin = II[i];
707 const int end = II[i+1];
708 int j;
709 for (j = begin; j < end; j++)
710 {
711 if (JJ[j] == i)
712 {
713 dd[i] = AA[j];
714 break;
715 }
716 }
717 if (j == end)
718 {
719 dd[i] = 0.;
720 }
721 });
722}
723
724/// Produces a DenseMatrix from a SparseMatrix
726{
727 int num_rows = this->Height();
728 int num_cols = this->Width();
729
730 DenseMatrix * B = new DenseMatrix(num_rows, num_cols);
731
732 this->ToDenseMatrix(*B);
733
734 return B;
735}
736
737/// Produces a DenseMatrix from a SparseMatrix
739{
740 B.SetSize(height, width);
741 B = 0.0;
742
743 for (int r=0; r<height; r++)
744 {
745 const int * col = this->GetRowColumns(r);
746 const real_t * val = this->GetRowEntries(r);
747
748 for (int cj=0; cj<this->RowSize(r); cj++)
749 {
750 B(r, col[cj]) = val[cj];
751 }
752 }
753}
754
755void SparseMatrix::Mult(const Vector &x, Vector &y) const
756{
757 if (Finalized()) { y.UseDevice(true); }
758 y = 0.0;
759 AddMult(x, y);
760}
761
762void SparseMatrix::AddMult(const Vector &x, Vector &y, const real_t a) const
763{
764 MFEM_ASSERT(width == x.Size(), "Input vector size (" << x.Size()
765 << ") must match matrix width (" << width << ")");
766 MFEM_ASSERT(height == y.Size(), "Output vector size (" << y.Size()
767 << ") must match matrix height (" << height << ")");
768
769 if (!Finalized())
770 {
771 const real_t *xp = x.HostRead();
772 real_t *yp = y.HostReadWrite();
773
774 // The matrix is not finalized, but multiplication is still possible
775 for (int i = 0; i < height; i++)
776 {
777 RowNode *row = Rows[i];
778 real_t b = 0.0;
779 for ( ; row != NULL; row = row->Prev)
780 {
781 b += row->Value * xp[row->Column];
782 }
783 *yp += a * b;
784 yp++;
785 }
786 return;
787 }
788
789#ifndef MFEM_USE_LEGACY_OPENMP
790 const int height = this->height;
791 const int nnz = J.Capacity();
792 auto d_I = Read(I, height+1);
793 auto d_J = Read(J, nnz);
794 auto d_A = Read(A, nnz);
795 auto d_x = x.Read();
796 auto d_y = y.ReadWrite();
797
798 // Skip if matrix has no non-zeros
799 if (nnz == 0) {return;}
801 {
802#ifdef MFEM_USE_CUDA_OR_HIP
803 const real_t alpha = a;
804 const real_t beta = 1.0;
805
806 // Setup descriptors
807 if (!initBuffers)
808 {
809#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
810 // Setup matrix descriptor
811 MFEM_cu_or_hip(sparseCreateCsr)(
813 Width(),
814 J.Capacity(),
815 const_cast<int *>(d_I),
816 const_cast<int *>(d_J),
817 const_cast<real_t *>(d_A),
818 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
819 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
820 MFEM_CU_or_HIP(SPARSE_INDEX_BASE_ZERO),
821#ifdef MFEM_USE_SINGLE
822 MFEM_CUDA_or_HIP(_R_32F));
823#else
824 MFEM_CUDA_or_HIP(_R_64F));
825#endif
826
827 // Create handles for input/output vectors
828 MFEM_cu_or_hip(sparseCreateDnVec)(&vecX_descr,
829 x.Size(),
830 const_cast<real_t *>(d_x),
831#ifdef MFEM_USE_SINGLE
832 MFEM_CUDA_or_HIP(_R_32F));
833#else
834 MFEM_CUDA_or_HIP(_R_64F));
835#endif
836 MFEM_cu_or_hip(sparseCreateDnVec)(&vecY_descr, y.Size(), d_y,
837#ifdef MFEM_USE_SINGLE
838 MFEM_CUDA_or_HIP(_R_32F));
839#else
840 MFEM_CUDA_or_HIP(_R_64F));
841#endif
842#else
843 cusparseCreateMatDescr(&matA_descr);
844 cusparseSetMatIndexBase(matA_descr, CUSPARSE_INDEX_BASE_ZERO);
845 cusparseSetMatType(matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
846#endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
847 initBuffers = true;
848 }
849 // Allocate kernel space. Buffer is shared between different sparsemats
850 size_t newBufferSize = 0;
851
852 MFEM_cu_or_hip(sparseSpMV_bufferSize)(
853 handle,
854 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
855 &alpha,
858 &beta,
860#ifdef MFEM_USE_SINGLE
861 MFEM_CUDA_or_HIP(_R_32F),
862#else
863 MFEM_CUDA_or_HIP(_R_64F),
864#endif
865 MFEM_GPUSPARSE_ALG,
866 &newBufferSize);
867
868 // Check if we need to resize
869 if (newBufferSize > bufferSize)
870 {
871 bufferSize = newBufferSize;
872 if (dBuffer != nullptr) { MFEM_Cu_or_Hip(MemFree)(dBuffer); }
873 MFEM_Cu_or_Hip(MemAlloc)(&dBuffer, bufferSize);
874 }
875
876#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
877 // Update input/output vectors
878 MFEM_cu_or_hip(sparseDnVecSetValues)(vecX_descr,
879 const_cast<real_t *>(d_x));
880 MFEM_cu_or_hip(sparseDnVecSetValues)(vecY_descr, d_y);
881
882 // Y = alpha A * X + beta * Y
883 MFEM_cu_or_hip(sparseSpMV)(
884 handle,
885 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
886 &alpha,
889 &beta,
891#ifdef MFEM_USE_SINGLE
892 MFEM_CUDA_or_HIP(_R_32F),
893#else
894 MFEM_CUDA_or_HIP(_R_64F),
895#endif
896 MFEM_GPUSPARSE_ALG,
897 dBuffer);
898#else
899#ifdef MFEM_USE_SINGLE
900 cusparseScsrmv(handle,
901#else
902 cusparseDcsrmv(handle,
903#endif
904 CUSPARSE_OPERATION_NON_TRANSPOSE,
905 Height(),
906 Width(),
907 J.Capacity(),
908 &alpha,
910 const_cast<real_t *>(d_A),
911 const_cast<int *>(d_I),
912 const_cast<int *>(d_J),
913 const_cast<real_t *>(d_x),
914 &beta,
915 d_y);
916#endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
917#endif // MFEM_USE_CUDA_OR_HIP
918 }
919 else
920 {
921 // Native version
922 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
923 {
924 real_t d = 0.0;
925 const int end = d_I[i+1];
926 for (int j = d_I[i]; j < end; j++)
927 {
928 d += d_A[j] * d_x[d_J[j]];
929 }
930 d_y[i] += a * d;
931 });
932
933 }
934
935#else // MFEM_USE_LEGACY_OPENMP
936 const real_t *Ap = A, *xp = x.GetData();
937 real_t *yp = y.GetData();
938 const int *Jp = J, *Ip = I;
939
940 #pragma omp parallel for
941 for (int i = 0; i < height; i++)
942 {
943 real_t d = 0.0;
944 const int end = Ip[i+1];
945 for (int j = Ip[i]; j < end; j++)
946 {
947 d += Ap[j] * xp[Jp[j]];
948 }
949 yp[i] += a * d;
950 }
951#endif // MFEM_USE_LEGACY_OPENMP
952}
953
955{
956 if (Finalized()) { y.UseDevice(true); }
957 y = 0.0;
958 AddMultTranspose(x, y);
959}
960
962 const real_t a) const
963{
964 MFEM_ASSERT(height == x.Size(), "Input vector size (" << x.Size()
965 << ") must match matrix height (" << height << ")");
966 MFEM_ASSERT(width == y.Size(), "Output vector size (" << y.Size()
967 << ") must match matrix width (" << width << ")");
968
969 if (!Finalized())
970 {
971 real_t *yp = y.HostReadWrite();
972 const real_t *xp = x.HostRead();
973 // The matrix is not finalized, but multiplication is still possible
974 for (int i = 0; i < height; i++)
975 {
976 RowNode *row = Rows[i];
977 real_t b = a * xp[i];
978 for ( ; row != NULL; row = row->Prev)
979 {
980 yp[row->Column] += row->Value * b;
981 }
982 }
983 return;
984 }
985
987 if (At)
988 {
989 At->AddMult(x, y, a);
990 }
991 else
992 {
993 real_t *yp = y.HostReadWrite();
994 const real_t *xp = x.HostRead();
995
996 const int *Ip = HostRead(I, height+1);
997 const int nnz = Ip[height];
998 const int *Jp = HostRead(J, nnz);
999 const real_t *Ap = HostRead(A, nnz);
1000
1001 for (int i = 0; i < height; i++)
1002 {
1003 const real_t xi = a * xp[i];
1004 const int end = Ip[i+1];
1005 for (int j = Ip[i]; j < end; j++)
1006 {
1007 const int Jj = Jp[j];
1008 yp[Jj] += Ap[j] * xi;
1009 }
1010 }
1011 }
1012}
1013
1015{
1016 if (At == NULL)
1017 {
1018 At = Transpose(*this);
1019 }
1020}
1021
1023{
1024 delete At;
1025 At = NULL;
1026}
1027
1029{
1031 {
1033 }
1034}
1035
1037 const Array<int> &rows, const Vector &x, Vector &y) const
1038{
1039 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
1040
1041 const int n = rows.Size();
1042 const int nnz = J.Capacity();
1043 auto d_rows = rows.Read();
1044 auto d_I = Read(I, height+1);
1045 auto d_J = Read(J, nnz);
1046 auto d_A = Read(A, nnz);
1047 auto d_x = x.Read();
1048 auto d_y = y.Write();
1049 mfem::forall(n, [=] MFEM_HOST_DEVICE (int i)
1050 {
1051 const int r = d_rows[i];
1052 const int end = d_I[r + 1];
1053 real_t a = 0.0;
1054 for (int j = d_I[r]; j < end; j++)
1055 {
1056 a += d_A[j] * d_x[d_J[j]];
1057 }
1058 d_y[r] = a;
1059 });
1060}
1061
1063 const Array<int> &rows, const Vector &x, Vector &y, const real_t a) const
1064{
1065 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
1066
1067 for (int i = 0; i < rows.Size(); i++)
1068 {
1069 int r = rows[i];
1070 int end = I[r + 1];
1071 real_t val = 0.0;
1072 for (int j = I[r]; j < end; j++)
1073 {
1074 val += A[j] * x(J[j]);
1075 }
1076 y(r) += a * val;
1077 }
1078}
1079
1081{
1082 MFEM_ASSERT(Finalized(), "Matrix must be finalized.");
1083 MFEM_ASSERT(x.Size() == Width(), "Input vector size (" << x.Size()
1084 << ") must match matrix width (" << Width() << ")");
1085
1087
1088 const int height = Height();
1089 const int nnz = J.Capacity();
1090 auto d_I = Read(I, height+1);
1091 auto d_J = Read(J, nnz);
1092 auto d_x = Read(x.GetMemory(), x.Size());
1093 auto d_y = Write(y.GetMemory(), y.Size());
1094 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
1095 {
1096 bool d_yi = false;
1097 const int end = d_I[i+1];
1098 for (int j = d_I[i]; j < end; j++)
1099 {
1100 if (d_x[d_J[j]])
1101 {
1102 d_yi = true;
1103 break;
1104 }
1105 }
1106 d_y[i] = d_yi;
1107 });
1108}
1109
1111 Array<int> &y) const
1112{
1113 MFEM_ASSERT(Finalized(), "Matrix must be finalized.");
1114 MFEM_ASSERT(x.Size() == Height(), "Input vector size (" << x.Size()
1115 << ") must match matrix height (" << Height() << ")");
1116
1117 y.SetSize(Width());
1118 y = 0;
1119
1120 for (int i = 0; i < Height(); i++)
1121 {
1122 if (x[i])
1123 {
1124 int end = I[i+1];
1125 for (int j = I[i]; j < end; j++)
1126 {
1127 y[J[j]] = x[i];
1128 }
1129 }
1130 }
1131}
1132
1133void SparseMatrix::AbsMult(const Vector &x, Vector &y) const
1134{
1135 MFEM_ASSERT(width == x.Size(), "Input vector size (" << x.Size()
1136 << ") must match matrix width (" << width << ")");
1137 MFEM_ASSERT(height == y.Size(), "Output vector size (" << y.Size()
1138 << ") must match matrix height (" << height << ")");
1139
1140 if (Finalized()) { y.UseDevice(true); }
1141 y = 0.0;
1142
1143 if (!Finalized())
1144 {
1145 const real_t *xp = x.HostRead();
1146 real_t *yp = y.HostReadWrite();
1147
1148 // The matrix is not finalized, but multiplication is still possible
1149 for (int i = 0; i < height; i++)
1150 {
1151 RowNode *row = Rows[i];
1152 real_t b = 0.0;
1153 for ( ; row != NULL; row = row->Prev)
1154 {
1155 b += std::abs(row->Value) * xp[row->Column];
1156 }
1157 *yp += b;
1158 yp++;
1159 }
1160 return;
1161 }
1162
1163 const int height = this->height;
1164 const int nnz = J.Capacity();
1165 auto d_I = Read(I, height+1);
1166 auto d_J = Read(J, nnz);
1167 auto d_A = Read(A, nnz);
1168 auto d_x = x.Read();
1169 auto d_y = y.ReadWrite();
1170 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
1171 {
1172 real_t d = 0.0;
1173 const int end = d_I[i+1];
1174 for (int j = d_I[i]; j < end; j++)
1175 {
1176 d += std::abs(d_A[j]) * d_x[d_J[j]];
1177 }
1178 d_y[i] += d;
1179 });
1180}
1181
1183{
1184 MFEM_ASSERT(height == x.Size(), "Input vector size (" << x.Size()
1185 << ") must match matrix height (" << height << ")");
1186 MFEM_ASSERT(width == y.Size(), "Output vector size (" << y.Size()
1187 << ") must match matrix width (" << width << ")");
1188
1189 y = 0.0;
1190
1191 if (!Finalized())
1192 {
1193 real_t *yp = y.GetData();
1194 // The matrix is not finalized, but multiplication is still possible
1195 for (int i = 0; i < height; i++)
1196 {
1197 RowNode *row = Rows[i];
1198 real_t b = x(i);
1199 for ( ; row != NULL; row = row->Prev)
1200 {
1201 yp[row->Column] += fabs(row->Value) * b;
1202 }
1203 }
1204 return;
1205 }
1206
1208 if (At)
1209 {
1210 At->AbsMult(x, y);
1211 }
1212 else
1213 {
1214 for (int i = 0; i < height; i++)
1215 {
1216 const real_t xi = x[i];
1217 const int end = I[i+1];
1218 for (int j = I[i]; j < end; j++)
1219 {
1220 const int Jj = J[j];
1221 y[Jj] += std::abs(A[j]) * xi;
1222 }
1223 }
1224 }
1225}
1226
1228{
1229 MFEM_ASSERT(x.Size() == Width(), "x.Size() = " << x.Size()
1230 << " must be equal to Width() = " << Width());
1231 MFEM_ASSERT(y.Size() == Height(), "y.Size() = " << y.Size()
1232 << " must be equal to Height() = " << Height());
1233
1234 x.HostRead();
1235 y.HostRead();
1236 if (Finalized())
1237 {
1238 const int nnz = J.Capacity();
1239 HostRead(I, height+1);
1240 HostRead(J, nnz);
1241 HostRead(A, nnz);
1242 }
1243
1244 real_t prod = 0.0;
1245 for (int i = 0; i < height; i++)
1246 {
1247 real_t a = 0.0;
1248 if (A)
1249 {
1250 for (int j = I[i], end = I[i+1]; j < end; j++)
1251 {
1252 a += A[j] * x(J[j]);
1253 }
1254 }
1255 else
1256 {
1257 for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)
1258 {
1259 a += np->Value * x(np->Column);
1260 }
1261 }
1262 prod += a * y(i);
1263 }
1264
1265 return prod;
1266}
1267
1269{
1270 for (int i = 0; i < height; i++)
1271 {
1272 real_t a = 0.0;
1273 if (A)
1274 {
1275 for (int j = I[i], end = I[i+1]; j < end; j++)
1276 {
1277 a += A[j];
1278 }
1279 }
1280 else
1281 {
1282 for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)
1283 {
1284 a += np->Value;
1285 }
1286 }
1287 x(i) = a;
1288 }
1289}
1290
1292{
1293 MFEM_VERIFY(irow < height,
1294 "row " << irow << " not in matrix with height " << height);
1295
1296 real_t a = 0.0;
1297 if (A)
1298 {
1299 for (int j = I[irow], end = I[irow+1]; j < end; j++)
1300 {
1301 a += fabs(A[j]);
1302 }
1303 }
1304 else
1305 {
1306 for (RowNode *np = Rows[irow]; np != NULL; np = np->Prev)
1307 {
1308 a += fabs(np->Value);
1309 }
1310 }
1311
1312 return a;
1313}
1314
1315void SparseMatrix::Threshold(real_t tol, bool fix_empty_rows)
1316{
1317 MFEM_ASSERT(Finalized(), "Matrix must be finalized.");
1318 real_t atol;
1319 atol = std::abs(tol);
1320
1321 fix_empty_rows = height == width ? fix_empty_rows : false;
1322
1323 real_t *newA;
1324 int *newI, *newJ;
1325 int i, j, nz;
1326
1327 newI = Memory<int>(height+1);
1328 newI[0] = 0;
1329 for (i = 0, nz = 0; i < height; i++)
1330 {
1331 bool found = false;
1332 for (j = I[i]; j < I[i+1]; j++)
1333 if (std::abs(A[j]) > atol)
1334 {
1335 found = true;
1336 nz++;
1337 }
1338 if (fix_empty_rows && !found) { nz++; }
1339 newI[i+1] = nz;
1340 }
1341
1342 newJ = Memory<int>(nz);
1343 newA = Memory<real_t>(nz);
1344 // Assume we're sorted until we find out otherwise
1345 isSorted = true;
1346 for (i = 0, nz = 0; i < height; i++)
1347 {
1348 bool found = false;
1349 int lastCol = -1;
1350 for (j = I[i]; j < I[i+1]; j++)
1351 if (std::abs(A[j]) > atol)
1352 {
1353 found = true;
1354 newJ[nz] = J[j];
1355 newA[nz] = A[j];
1356 if ( lastCol > newJ[nz] )
1357 {
1358 isSorted = false;
1359 }
1360 lastCol = newJ[nz];
1361 nz++;
1362 }
1363 if (fix_empty_rows && !found)
1364 {
1365 newJ[nz] = i;
1366 newA[nz] = 0.0;
1367 nz++;
1368 }
1369 }
1370 Destroy();
1371 I.Wrap(newI, height+1, true);
1372 J.Wrap(newJ, I[height], true);
1373 A.Wrap(newA, I[height], true);
1374}
1375
1376void SparseMatrix::Finalize(int skip_zeros, bool fix_empty_rows)
1377{
1378 int i, j, nr, nz;
1379 RowNode *aux;
1380
1381 if (Finalized())
1382 {
1383 return;
1384 }
1385
1386 delete [] ColPtrNode;
1387 ColPtrNode = NULL;
1388
1389 I.New(height+1);
1390 I[0] = 0;
1391 for (i = 1; i <= height; i++)
1392 {
1393 nr = 0;
1394 for (aux = Rows[i-1]; aux != NULL; aux = aux->Prev)
1395 {
1396 if (skip_zeros && aux->Value == 0.0)
1397 {
1398 if (skip_zeros == 2) { continue; }
1399 if ((i-1) != aux->Column) { continue; }
1400
1401 bool found = false;
1402 real_t found_val = 0.0; // init to suppress gcc warning
1403 for (RowNode *other = Rows[aux->Column]; other != NULL; other = other->Prev)
1404 {
1405 if (other->Column == (i-1))
1406 {
1407 found = true;
1408 found_val = other->Value;
1409 break;
1410 }
1411 }
1412 if (found && found_val == 0.0) { continue; }
1413
1414 }
1415 nr++;
1416 }
1417 if (fix_empty_rows && !nr) { nr = 1; }
1418 I[i] = I[i-1] + nr;
1419 }
1420
1421 nz = I[height];
1422 J.New(nz);
1423 A.New(nz);
1424 // Assume we're sorted until we find out otherwise
1425 isSorted = true;
1426 for (j = i = 0; i < height; i++)
1427 {
1428 int lastCol = -1;
1429 nr = 0;
1430 for (aux = Rows[i]; aux != NULL; aux = aux->Prev)
1431 {
1432 if (skip_zeros && aux->Value == 0.0)
1433 {
1434 if (skip_zeros == 2) { continue; }
1435 if (i != aux->Column) { continue; }
1436
1437 bool found = false;
1438 real_t found_val = 0.0; // init to suppress gcc warning
1439 for (RowNode *other = Rows[aux->Column]; other != NULL; other = other->Prev)
1440 {
1441 if (other->Column == i)
1442 {
1443 found = true;
1444 found_val = other->Value;
1445 break;
1446 }
1447 }
1448 if (found && found_val == 0.0) { continue; }
1449 }
1450
1451 J[j] = aux->Column;
1452 A[j] = aux->Value;
1453
1454 if ( lastCol > J[j] )
1455 {
1456 isSorted = false;
1457 }
1458 lastCol = J[j];
1459
1460 j++;
1461 nr++;
1462 }
1463 if (fix_empty_rows && !nr)
1464 {
1465 J[j] = i;
1466 A[j] = 1.0;
1467 j++;
1468 }
1469 }
1470
1471#ifdef MFEM_USE_MEMALLOC
1472 delete NodesMem;
1473 NodesMem = NULL;
1474#else
1475 for (i = 0; i < height; i++)
1476 {
1477 RowNode *node_p = Rows[i];
1478 while (node_p != NULL)
1479 {
1480 aux = node_p;
1481 node_p = node_p->Prev;
1482 delete aux;
1483 }
1484 }
1485#endif
1486
1487 delete [] Rows;
1488 Rows = NULL;
1489}
1490
1492{
1493 int br = blocks.NumRows(), bc = blocks.NumCols();
1494 int nr = (height + br - 1)/br, nc = (width + bc - 1)/bc;
1495
1496 for (int j = 0; j < bc; j++)
1497 {
1498 for (int i = 0; i < br; i++)
1499 {
1500 int *bI = Memory<int>(nr + 1);
1501 for (int k = 0; k <= nr; k++)
1502 {
1503 bI[k] = 0;
1504 }
1505 blocks(i,j) = new SparseMatrix(bI, NULL, NULL, nr, nc);
1506 }
1507 }
1508
1509 for (int gr = 0; gr < height; gr++)
1510 {
1511 int bi = gr/nr, i = gr%nr + 1;
1512 if (Finalized())
1513 {
1514 for (int j = I[gr]; j < I[gr+1]; j++)
1515 {
1516 if (A[j] != 0.0)
1517 {
1518 blocks(bi, J[j]/nc)->I[i]++;
1519 }
1520 }
1521 }
1522 else
1523 {
1524 for (RowNode *n_p = Rows[gr]; n_p != NULL; n_p = n_p->Prev)
1525 {
1526 if (n_p->Value != 0.0)
1527 {
1528 blocks(bi, n_p->Column/nc)->I[i]++;
1529 }
1530 }
1531 }
1532 }
1533
1534 for (int j = 0; j < bc; j++)
1535 {
1536 for (int i = 0; i < br; i++)
1537 {
1538 SparseMatrix &b = *blocks(i,j);
1539 int nnz = 0, rs;
1540 for (int k = 1; k <= nr; k++)
1541 {
1542 rs = b.I[k], b.I[k] = nnz, nnz += rs;
1543 }
1544 b.J.New(nnz);
1545 b.A.New(nnz);
1546 }
1547 }
1548
1549 for (int gr = 0; gr < height; gr++)
1550 {
1551 int bi = gr/nr, i = gr%nr + 1;
1552 if (Finalized())
1553 {
1554 for (int j = I[gr]; j < I[gr+1]; j++)
1555 {
1556 if (A[j] != 0.0)
1557 {
1558 SparseMatrix &b = *blocks(bi, J[j]/nc);
1559 b.J[b.I[i]] = J[j] % nc;
1560 b.A[b.I[i]] = A[j];
1561 b.I[i]++;
1562 }
1563 }
1564 }
1565 else
1566 {
1567 for (RowNode *n_p = Rows[gr]; n_p != NULL; n_p = n_p->Prev)
1568 {
1569 if (n_p->Value != 0.0)
1570 {
1571 SparseMatrix &b = *blocks(bi, n_p->Column/nc);
1572 b.J[b.I[i]] = n_p->Column % nc;
1573 b.A[b.I[i]] = n_p->Value;
1574 b.I[i]++;
1575 }
1576 }
1577 }
1578 }
1579}
1580
1582{
1583 if (height != width)
1584 {
1585 return infinity();
1586 }
1587
1588 real_t symm = 0.0;
1589 if (Empty())
1590 {
1591 // return 0.0;
1592 }
1593 else if (Finalized())
1594 {
1595 for (int i = 1; i < height; i++)
1596 {
1597 for (int j = I[i]; j < I[i+1]; j++)
1598 {
1599 if (J[j] < i)
1600 {
1601 symm = std::max(symm, std::abs(A[j]-(*this)(J[j],i)));
1602 }
1603 }
1604 }
1605 }
1606 else
1607 {
1608 for (int i = 0; i < height; i++)
1609 {
1610 for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)
1611 {
1612 int col = node_p->Column;
1613 if (col < i)
1614 {
1615 symm = std::max(symm, std::abs(node_p->Value-(*this)(col,i)));
1616 }
1617 }
1618 }
1619 }
1620 return symm;
1621}
1622
1624{
1625 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
1626
1627 int i, j;
1628 for (i = 1; i < height; i++)
1629 {
1630 for (j = I[i]; j < I[i+1]; j++)
1631 {
1632 if (J[j] < i)
1633 {
1634 A[j] += (*this)(J[j],i);
1635 A[j] *= 0.5;
1636 (*this)(J[j],i) = A[j];
1637 }
1638 }
1639 }
1640}
1641
1643{
1644 if (Finalized())
1645 {
1646 HostReadI();
1647 return I[height];
1648 }
1649 else
1650 {
1651 int nnz = 0;
1652
1653 for (int i = 0; i < height; i++)
1654 {
1655 for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)
1656 {
1657 nnz++;
1658 }
1659 }
1660
1661 return nnz;
1662 }
1663}
1664
1666{
1667 real_t m = 0.0;
1668
1669 if (A)
1670 {
1671 int nnz = I[height];
1672 for (int j = 0; j < nnz; j++)
1673 {
1674 m = std::max(m, std::abs(A[j]));
1675 }
1676 }
1677 else
1678 {
1679 for (int i = 0; i < height; i++)
1680 {
1681 for (RowNode *n_p = Rows[i]; n_p != NULL; n_p = n_p->Prev)
1682 {
1683 m = std::max(m, std::abs(n_p->Value));
1684 }
1685 }
1686 }
1687 return m;
1688}
1689
1691{
1692 int counter = 0;
1693
1694 if (A)
1695 {
1696 const int nz = I[height];
1697 const real_t *Ap = A;
1698
1699 for (int i = 0; i < nz; i++)
1700 {
1701 counter += (std::abs(Ap[i]) <= tol);
1702 }
1703 }
1704 else
1705 {
1706 for (int i = 0; i < height; i++)
1707 {
1708 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1709 {
1710 counter += (std::abs(aux->Value) <= tol);
1711 }
1712 }
1713 }
1714
1715 return counter;
1716}
1717
1719{
1720 if (Empty())
1721 {
1722 return 0;
1723 }
1724 else if (Finalized())
1725 {
1726 return mfem::CheckFinite(A, I[height]);
1727 }
1728 else
1729 {
1730 int counter = 0;
1731 for (int i = 0; i < height; i++)
1732 {
1733 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1734 {
1735 counter += !IsFinite(aux->Value);
1736 }
1737 }
1738 return counter;
1739 }
1740}
1741
1743{
1744 return NULL;
1745}
1746
1747void SparseMatrix::EliminateRow(int row, const real_t sol, Vector &rhs)
1748{
1749 RowNode *aux;
1750
1751 MFEM_ASSERT(row < height && row >= 0,
1752 "Row " << row << " not in matrix of height " << height);
1753
1754 MFEM_VERIFY(!Finalized(), "Matrix must NOT be finalized.");
1755
1756 for (aux = Rows[row]; aux != NULL; aux = aux->Prev)
1757 {
1758 rhs(aux->Column) -= sol * aux->Value;
1759 aux->Value = 0.0;
1760 }
1761}
1762
1764{
1765 RowNode *aux;
1766
1767 MFEM_ASSERT(row < height && row >= 0,
1768 "Row " << row << " not in matrix of height " << height);
1769 MFEM_ASSERT(dpolicy != DIAG_KEEP, "Diagonal policy must not be DIAG_KEEP");
1770 MFEM_ASSERT(dpolicy != DIAG_ONE || height == width,
1771 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1772 << height << ", width = " << width);
1773
1774 if (Rows == NULL)
1775 {
1776 for (int i=I[row]; i < I[row+1]; ++i)
1777 {
1778 A[i]=0.0;
1779 }
1780 }
1781 else
1782 {
1783 for (aux = Rows[row]; aux != NULL; aux = aux->Prev)
1784 {
1785 aux->Value = 0.0;
1786 }
1787 }
1788
1789 if (dpolicy == DIAG_ONE)
1790 {
1791 SearchRow(row, row) = 1.;
1792 }
1793}
1794
1796{
1797 MFEM_ASSERT(col < width && col >= 0,
1798 "Col " << col << " not in matrix of width " << width);
1799 MFEM_ASSERT(dpolicy != DIAG_KEEP, "Diagonal policy must not be DIAG_KEEP");
1800 MFEM_ASSERT(dpolicy != DIAG_ONE || height == width,
1801 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1802 << height << ", width = " << width);
1803
1804 if (Rows == NULL)
1805 {
1806 const int nnz = I[height];
1807 for (int jpos = 0; jpos != nnz; ++jpos)
1808 {
1809 if (J[jpos] == col)
1810 {
1811 A[jpos] = 0.0;
1812 }
1813 }
1814 }
1815 else
1816 {
1817 for (int i = 0; i < height; i++)
1818 {
1819 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1820 {
1821 if (aux->Column == col)
1822 {
1823 aux->Value = 0.0;
1824 break;
1825 }
1826 }
1827 }
1828 }
1829
1830 if (dpolicy == DIAG_ONE)
1831 {
1832 SearchRow(col, col) = 1.0;
1833 }
1834}
1835
1837 Vector *b)
1838{
1839 if (Rows == NULL)
1840 {
1841 for (int i = 0; i < height; i++)
1842 {
1843 for (int jpos = I[i]; jpos != I[i+1]; ++jpos)
1844 {
1845 if (cols[ J[jpos]])
1846 {
1847 if (x && b)
1848 {
1849 (*b)(i) -= A[jpos] * (*x)( J[jpos] );
1850 }
1851 A[jpos] = 0.0;
1852 }
1853 }
1854 }
1855 }
1856 else
1857 {
1858 for (int i = 0; i < height; i++)
1859 {
1860 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1861 {
1862 if (cols[aux -> Column])
1863 {
1864 if (x && b)
1865 {
1866 (*b)(i) -= aux -> Value * (*x)(aux -> Column);
1867 }
1868 aux->Value = 0.0;
1869 }
1870 }
1871 }
1872 }
1873}
1874
1876{
1877 if (Rows)
1878 {
1879 RowNode *nd;
1880 for (int row = 0; row < height; row++)
1881 {
1882 for (nd = Rows[row]; nd != NULL; nd = nd->Prev)
1883 {
1884 if (col_marker[nd->Column])
1885 {
1886 Ae.Add(row, nd->Column, nd->Value);
1887 nd->Value = 0.0;
1888 }
1889 }
1890 }
1891 }
1892 else
1893 {
1894 for (int row = 0; row < height; row++)
1895 {
1896 for (int j = I[row]; j < I[row+1]; j++)
1897 {
1898 if (col_marker[J[j]])
1899 {
1900 Ae.Add(row, J[j], A[j]);
1901 A[j] = 0.0;
1902 }
1903 }
1904 }
1905 }
1906}
1907
1908
1909void SparseMatrix::EliminateRowCol(int rc, const real_t sol, Vector &rhs,
1910 DiagonalPolicy dpolicy)
1911{
1912 MFEM_ASSERT(rc < height && rc >= 0,
1913 "Row " << rc << " not in matrix of height " << height);
1917
1918 if (Rows == NULL)
1919 {
1920 for (int j = I[rc]; j < I[rc+1]; j++)
1921 {
1922 const int col = J[j];
1923 if (col == rc)
1924 {
1925 switch (dpolicy)
1926 {
1927 case DIAG_KEEP:
1928 rhs(rc) = A[j] * sol;
1929 break;
1930 case DIAG_ONE:
1931 A[j] = 1.0;
1932 rhs(rc) = sol;
1933 break;
1934 case DIAG_ZERO:
1935 A[j] = 0.;
1936 rhs(rc) = 0.;
1937 break;
1938 default:
1939 mfem_error("SparseMatrix::EliminateRowCol () #2");
1940 break;
1941 }
1942 }
1943 else
1944 {
1945 A[j] = 0.0;
1946 for (int k = I[col]; 1; k++)
1947 {
1948 if (k == I[col+1])
1949 {
1950 mfem_error("SparseMatrix::EliminateRowCol () #3");
1951 }
1952 else if (J[k] == rc)
1953 {
1954 rhs(col) -= sol * A[k];
1955 A[k] = 0.0;
1956 break;
1957 }
1958 }
1959 }
1960 }
1961 }
1962 else
1963 {
1964 for (RowNode *aux = Rows[rc]; aux != NULL; aux = aux->Prev)
1965 {
1966 const int col = aux->Column;
1967 if (col == rc)
1968 {
1969 switch (dpolicy)
1970 {
1971 case DIAG_KEEP:
1972 rhs(rc) = aux->Value * sol;
1973 break;
1974 case DIAG_ONE:
1975 aux->Value = 1.0;
1976 rhs(rc) = sol;
1977 break;
1978 case DIAG_ZERO:
1979 aux->Value = 0.;
1980 rhs(rc) = 0.;
1981 break;
1982 default:
1983 mfem_error("SparseMatrix::EliminateRowCol () #4");
1984 break;
1985 }
1986 }
1987 else
1988 {
1989 aux->Value = 0.0;
1990 for (RowNode *node = Rows[col]; 1; node = node->Prev)
1991 {
1992 if (node == NULL)
1993 {
1994 mfem_error("SparseMatrix::EliminateRowCol () #5");
1995 }
1996 else if (node->Column == rc)
1997 {
1998 rhs(col) -= sol * node->Value;
1999 node->Value = 0.0;
2000 break;
2001 }
2002 }
2003 }
2004 }
2005 }
2006}
2007
2009 DenseMatrix &rhs,
2010 DiagonalPolicy dpolicy)
2011{
2012 MFEM_ASSERT(rc < height && rc >= 0,
2013 "Row " << rc << " not in matrix of height " << height);
2014 MFEM_ASSERT(sol.Size() == rhs.Width(), "solution size (" << sol.Size()
2015 << ") must match rhs width (" << rhs.Width() << ")");
2016
2017 const int num_rhs = rhs.Width();
2018 if (Rows == NULL)
2019 {
2020 for (int j = I[rc]; j < I[rc+1]; j++)
2021 {
2022 const int col = J[j];
2023 if (col == rc)
2024 {
2025 switch (dpolicy)
2026 {
2027 case DIAG_KEEP:
2028 for (int r = 0; r < num_rhs; r++)
2029 {
2030 rhs(rc,r) = A[j] * sol(r);
2031 }
2032 break;
2033 case DIAG_ONE:
2034 A[j] = 1.0;
2035 for (int r = 0; r < num_rhs; r++)
2036 {
2037 rhs(rc,r) = sol(r);
2038 }
2039 break;
2040 case DIAG_ZERO:
2041 A[j] = 0.;
2042 for (int r = 0; r < num_rhs; r++)
2043 {
2044 rhs(rc,r) = 0.;
2045 }
2046 break;
2047 default:
2048 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #3");
2049 break;
2050 }
2051 }
2052 else
2053 {
2054 A[j] = 0.0;
2055 for (int k = I[col]; 1; k++)
2056 {
2057 if (k == I[col+1])
2058 {
2059 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #4");
2060 }
2061 else if (J[k] == rc)
2062 {
2063 for (int r = 0; r < num_rhs; r++)
2064 {
2065 rhs(col,r) -= sol(r) * A[k];
2066 }
2067 A[k] = 0.0;
2068 break;
2069 }
2070 }
2071 }
2072 }
2073 }
2074 else
2075 {
2076 for (RowNode *aux = Rows[rc]; aux != NULL; aux = aux->Prev)
2077 {
2078 const int col = aux->Column;
2079 if (col == rc)
2080 {
2081 switch (dpolicy)
2082 {
2083 case DIAG_KEEP:
2084 for (int r = 0; r < num_rhs; r++)
2085 {
2086 rhs(rc,r) = aux->Value * sol(r);
2087 }
2088 break;
2089 case DIAG_ONE:
2090 aux->Value = 1.0;
2091 for (int r = 0; r < num_rhs; r++)
2092 {
2093 rhs(rc,r) = sol(r);
2094 }
2095 break;
2096 case DIAG_ZERO:
2097 aux->Value = 0.;
2098 for (int r = 0; r < num_rhs; r++)
2099 {
2100 rhs(rc,r) = 0.;
2101 }
2102 break;
2103 default:
2104 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #5");
2105 break;
2106 }
2107 }
2108 else
2109 {
2110 aux->Value = 0.0;
2111 for (RowNode *node = Rows[col]; 1; node = node->Prev)
2112 {
2113 if (node == NULL)
2114 {
2115 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #6");
2116 }
2117 else if (node->Column == rc)
2118 {
2119 for (int r = 0; r < num_rhs; r++)
2120 {
2121 rhs(col,r) -= sol(r) * node->Value;
2122 }
2123 node->Value = 0.0;
2124 break;
2125 }
2126 }
2127 }
2128 }
2129 }
2130}
2131
2133{
2134 MFEM_ASSERT(rc < height && rc >= 0,
2135 "Row " << rc << " not in matrix of height " << height);
2136
2137 if (Rows == NULL)
2138 {
2139 const auto &II = this->I; // only use const access for I
2140 const auto &JJ = this->J; // only use const access for J
2141 for (int j = II[rc]; j < II[rc+1]; j++)
2142 {
2143 const int col = JJ[j];
2144 if (col == rc)
2145 {
2146 if (dpolicy == DIAG_ONE)
2147 {
2148 A[j] = 1.0;
2149 }
2150 else if (dpolicy == DIAG_ZERO)
2151 {
2152 A[j] = 0.0;
2153 }
2154 }
2155 else
2156 {
2157 A[j] = 0.0;
2158 for (int k = II[col]; 1; k++)
2159 {
2160 if (k == II[col+1])
2161 {
2162 mfem_error("SparseMatrix::EliminateRowCol() #2");
2163 }
2164 else if (JJ[k] == rc)
2165 {
2166 A[k] = 0.0;
2167 break;
2168 }
2169 }
2170 }
2171 }
2172 }
2173 else
2174 {
2175 RowNode *aux, *node;
2176
2177 for (aux = Rows[rc]; aux != NULL; aux = aux->Prev)
2178 {
2179 const int col = aux->Column;
2180 if (col == rc)
2181 {
2182 if (dpolicy == DIAG_ONE)
2183 {
2184 aux->Value = 1.0;
2185 }
2186 else if (dpolicy == DIAG_ZERO)
2187 {
2188 aux->Value = 0.;
2189 }
2190 }
2191 else
2192 {
2193 aux->Value = 0.0;
2194 for (node = Rows[col]; 1; node = node->Prev)
2195 {
2196 if (node == NULL)
2197 {
2198 mfem_error("SparseMatrix::EliminateRowCol() #3");
2199 }
2200 else if (node->Column == rc)
2201 {
2202 node->Value = 0.0;
2203 break;
2204 }
2205 }
2206 }
2207 }
2208 }
2209}
2210
2211// This is almost identical to EliminateRowCol(int, int), except for
2212// the A[j] = value; and aux->Value = value; lines.
2214{
2215 MFEM_ASSERT(rc < height && rc >= 0,
2216 "Row " << rc << " not in matrix of height " << height);
2217
2218 if (Rows == NULL)
2219 {
2220 for (int j = I[rc]; j < I[rc+1]; j++)
2221 {
2222 const int col = J[j];
2223 if (col == rc)
2224 {
2225 A[j] = value;
2226 }
2227 else
2228 {
2229 A[j] = 0.0;
2230 for (int k = I[col]; 1; k++)
2231 {
2232 if (k == I[col+1])
2233 {
2234 mfem_error("SparseMatrix::EliminateRowCol() #2");
2235 }
2236 else if (J[k] == rc)
2237 {
2238 A[k] = 0.0;
2239 break;
2240 }
2241 }
2242 }
2243 }
2244 }
2245 else
2246 {
2247 RowNode *aux, *node;
2248
2249 for (aux = Rows[rc]; aux != NULL; aux = aux->Prev)
2250 {
2251 const int col = aux->Column;
2252 if (col == rc)
2253 {
2254 aux->Value = value;
2255 }
2256 else
2257 {
2258 aux->Value = 0.0;
2259 for (node = Rows[col]; 1; node = node->Prev)
2260 {
2261 if (node == NULL)
2262 {
2263 mfem_error("SparseMatrix::EliminateRowCol() #3");
2264 }
2265 else if (node->Column == rc)
2266 {
2267 node->Value = 0.0;
2268 break;
2269 }
2270 }
2271 }
2272 }
2273 }
2274}
2275
2277 DiagonalPolicy dpolicy)
2278{
2279 if (Rows)
2280 {
2281 RowNode *nd, *nd2;
2282 for (nd = Rows[rc]; nd != NULL; nd = nd->Prev)
2283 {
2284 const int col = nd->Column;
2285 if (col == rc)
2286 {
2287 switch (dpolicy)
2288 {
2289 case DIAG_ONE:
2290 Ae.Add(rc, rc, nd->Value - 1.0);
2291 nd->Value = 1.0;
2292 break;
2293 case DIAG_ZERO:
2294 Ae.Add(rc, rc, nd->Value);
2295 nd->Value = 0.;
2296 break;
2297 case DIAG_KEEP:
2298 break;
2299 default:
2300 mfem_error("SparseMatrix::EliminateRowCol #1");
2301 break;
2302 }
2303 }
2304 else
2305 {
2306 Ae.Add(rc, col, nd->Value);
2307 nd->Value = 0.0;
2308 for (nd2 = Rows[col]; 1; nd2 = nd2->Prev)
2309 {
2310 if (nd2 == NULL)
2311 {
2312 mfem_error("SparseMatrix::EliminateRowCol #2");
2313 }
2314 else if (nd2->Column == rc)
2315 {
2316 Ae.Add(col, rc, nd2->Value);
2317 nd2->Value = 0.0;
2318 break;
2319 }
2320 }
2321 }
2322 }
2323 }
2324 else
2325 {
2326 for (int j = I[rc]; j < I[rc+1]; j++)
2327 {
2328 const int col = J[j];
2329 if (col == rc)
2330 {
2331 switch (dpolicy)
2332 {
2333 case DIAG_ONE:
2334 Ae.Add(rc, rc, A[j] - 1.0);
2335 A[j] = 1.0;
2336 break;
2337 case DIAG_ZERO:
2338 Ae.Add(rc, rc, A[j]);
2339 A[j] = 0.;
2340 break;
2341 case DIAG_KEEP:
2342 break;
2343 default:
2344 mfem_error("SparseMatrix::EliminateRowCol #3");
2345 break;
2346 }
2347 }
2348 else
2349 {
2350 Ae.Add(rc, col, A[j]);
2351 A[j] = 0.0;
2352 for (int k = I[col]; true; k++)
2353 {
2354 if (k == I[col+1])
2355 {
2356 mfem_error("SparseMatrix::EliminateRowCol #4");
2357 }
2358 else if (J[k] == rc)
2359 {
2360 Ae.Add(col, rc, A[k]);
2361 A[k] = 0.0;
2362 break;
2363 }
2364 }
2365 }
2366 }
2367 }
2368}
2369
2371 DiagonalPolicy diag_policy)
2372{
2373 const int n_ess_dofs = ess_dofs.Size();
2374 const auto ess_dofs_d = ess_dofs.Read();
2375 const auto dI = ReadI();
2376 const auto dJ = ReadJ();
2377 auto dA = ReadWriteData();
2378
2379 mfem::forall(n_ess_dofs, [=] MFEM_HOST_DEVICE (int i)
2380 {
2381 const int idof = ess_dofs_d[i];
2382 for (int j=dI[idof]; j<dI[idof+1]; ++j)
2383 {
2384 const int jdof = dJ[j];
2385 if (jdof != idof)
2386 {
2387 dA[j] = 0.0;
2388 for (int k=dI[jdof]; k<dI[jdof+1]; ++k)
2389 {
2390 if (dJ[k] == idof)
2391 {
2392 dA[k] = 0.0;
2393 break;
2394 }
2395 }
2396 }
2397 else
2398 {
2399 if (diag_policy == DiagonalPolicy::DIAG_ONE)
2400 {
2401 dA[j] = 1.0;
2402 }
2403 else if (diag_policy == DiagonalPolicy::DIAG_ZERO)
2404 {
2405 dA[j] = 0.0;
2406 }
2407 // else (diag_policy == DiagonalPolicy::DIAG_KEEP)
2408 }
2409 }
2410 });
2411}
2412
2414{
2415 for (int i = 0; i < height; i++)
2416 {
2417 if (I[i+1] == I[i]+1 && fabs(A[I[i]]) < 1e-16)
2418 {
2419 A[I[i]] = 1.0;
2420 }
2421 }
2422}
2423
2425{
2426 for (int i = 0; i < height; i++)
2427 {
2428 real_t zero = 0.0;
2429 for (int j = I[i]; j < I[i+1]; j++)
2430 {
2431 zero += fabs(A[j]);
2432 }
2433 if (zero <= threshold)
2434 {
2435 for (int j = I[i]; j < I[i+1]; j++)
2436 {
2437 A[j] = (J[j] == i) ? 1.0 : 0.0;
2438 }
2439 }
2440 }
2441}
2442
2444{
2445 if (!Finalized())
2446 {
2447 real_t *yp = y.GetData();
2448 const real_t *xp = x.GetData();
2449 RowNode *diag_p, *n_p, **R = Rows;
2450
2451 const int s = height;
2452 for (int i = 0; i < s; i++)
2453 {
2454 real_t sum = 0.0;
2455 diag_p = NULL;
2456 for (n_p = R[i]; n_p != NULL; n_p = n_p->Prev)
2457 {
2458 const int c = n_p->Column;
2459 if (c == i)
2460 {
2461 diag_p = n_p;
2462 }
2463 else
2464 {
2465 sum += n_p->Value * yp[c];
2466 }
2467 }
2468
2469 if (diag_p != NULL && diag_p->Value != 0.0)
2470 {
2471 yp[i] = (xp[i] - sum) / diag_p->Value;
2472 }
2473 else if (xp[i] == sum)
2474 {
2475 yp[i] = sum;
2476 }
2477 else
2478 {
2479 mfem_error("SparseMatrix::Gauss_Seidel_forw()");
2480 }
2481 }
2482 }
2483 else
2484 {
2485 const int s = height;
2486 const int nnz = J.Capacity();
2487 const int *Ip = HostRead(I, s+1);
2488 const int *Jp = HostRead(J, nnz);
2489 const real_t *Ap = HostRead(A, nnz);
2490 real_t *yp = y.HostReadWrite();
2491 const real_t *xp = x.HostRead();
2492
2493 for (int i = 0, j = Ip[0]; i < s; i++)
2494 {
2495 const int end = Ip[i+1];
2496 real_t sum = 0.0;
2497 int d = -1;
2498 for ( ; j < end; j++)
2499 {
2500 const int c = Jp[j];
2501 if (c == i)
2502 {
2503 d = j;
2504 }
2505 else
2506 {
2507 sum += Ap[j] * yp[c];
2508 }
2509 }
2510
2511 if (d >= 0 && Ap[d] != 0.0)
2512 {
2513 yp[i] = (xp[i] - sum) / Ap[d];
2514 }
2515 else if (xp[i] == sum)
2516 {
2517 yp[i] = sum;
2518 }
2519 else
2520 {
2521 mfem_error("SparseMatrix::Gauss_Seidel_forw(...) #2");
2522 }
2523 }
2524 }
2525}
2526
2528{
2529 if (!Finalized())
2530 {
2531 real_t *yp = y.GetData();
2532 const real_t *xp = x.GetData();
2533 RowNode *diag_p, *n_p, **R = Rows;
2534
2535 for (int i = height-1; i >= 0; i--)
2536 {
2537 real_t sum = 0.;
2538 diag_p = NULL;
2539 for (n_p = R[i]; n_p != NULL; n_p = n_p->Prev)
2540 {
2541 const int c = n_p->Column;
2542 if (c == i)
2543 {
2544 diag_p = n_p;
2545 }
2546 else
2547 {
2548 sum += n_p->Value * yp[c];
2549 }
2550 }
2551
2552 if (diag_p != NULL && diag_p->Value != 0.0)
2553 {
2554 yp[i] = (xp[i] - sum) / diag_p->Value;
2555 }
2556 else if (xp[i] == sum)
2557 {
2558 yp[i] = sum;
2559 }
2560 else
2561 {
2562 mfem_error("SparseMatrix::Gauss_Seidel_back()");
2563 }
2564 }
2565 }
2566 else
2567 {
2568 const int s = height;
2569 const int nnz = J.Capacity();
2570 const int *Ip = HostRead(I, s+1);
2571 const int *Jp = HostRead(J, nnz);
2572 const real_t *Ap = HostRead(A, nnz);
2573 real_t *yp = y.HostReadWrite();
2574 const real_t *xp = x.HostRead();
2575
2576 for (int i = s-1, j = Ip[s]-1; i >= 0; i--)
2577 {
2578 const int beg = Ip[i];
2579 real_t sum = 0.;
2580 int d = -1;
2581 for ( ; j >= beg; j--)
2582 {
2583 const int c = Jp[j];
2584 if (c == i)
2585 {
2586 d = j;
2587 }
2588 else
2589 {
2590 sum += Ap[j] * yp[c];
2591 }
2592 }
2593
2594 if (d >= 0 && Ap[d] != 0.0)
2595 {
2596 yp[i] = (xp[i] - sum) / Ap[d];
2597 }
2598 else if (xp[i] == sum)
2599 {
2600 yp[i] = sum;
2601 }
2602 else
2603 {
2604 mfem_error("SparseMatrix::Gauss_Seidel_back(...) #2");
2605 }
2606 }
2607 }
2608}
2609
2611{
2612 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2613
2614 real_t sc = 1.0;
2615 for (int i = 0; i < height; i++)
2616 {
2617 int d = -1;
2618 real_t norm = 0.0;
2619 for (int j = I[i]; j < I[i+1]; j++)
2620 {
2621 if (J[j] == i)
2622 {
2623 d = j;
2624 }
2625 norm += fabs(A[j]);
2626 }
2627 if (d >= 0 && A[d] != 0.0)
2628 {
2629 real_t a = 1.8 * fabs(A[d]) / norm;
2630 if (a < sc)
2631 {
2632 sc = a;
2633 }
2634 }
2635 else
2636 {
2637 mfem_error("SparseMatrix::GetJacobiScaling() #2");
2638 }
2639 }
2640 return sc;
2641}
2642
2643void SparseMatrix::Jacobi(const Vector &b, const Vector &x0, Vector &x1,
2644 real_t sc, bool use_abs_diag) const
2645{
2646 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2647
2648 for (int i = 0; i < height; i++)
2649 {
2650 int d = -1;
2651 real_t sum = b(i);
2652 for (int j = I[i]; j < I[i+1]; j++)
2653 {
2654 if (J[j] == i)
2655 {
2656 d = j;
2657 }
2658 else
2659 {
2660 sum -= A[j] * x0(J[j]);
2661 }
2662 }
2663 if (d >= 0 && A[d] != 0.0)
2664 {
2665 const real_t diag = (use_abs_diag) ? fabs(A[d]) : A[d];
2666 x1(i) = sc * (sum / diag) + (1.0 - sc) * x0(i);
2667 }
2668 else
2669 {
2670 mfem_error("SparseMatrix::Jacobi(...) #2");
2671 }
2672 }
2673}
2674
2676 real_t sc, bool use_abs_diag) const
2677{
2678 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2679
2680 const int H = height;
2681 const int nnz = J.Capacity();
2682 const bool use_dev = b.UseDevice() || x.UseDevice();
2683
2684 const auto Ap = Read(A, nnz, use_dev);
2685 const auto Ip = Read(I, height+1, use_dev);
2686 const auto Jp = Read(J, nnz, use_dev);
2687
2688 const auto bp = b.Read(use_dev);
2689 auto xp = x.Write(use_dev);
2690
2691 mfem::forall_switch(use_dev, H, [=] MFEM_HOST_DEVICE (int i)
2692 {
2693 const int end = Ip[i+1];
2694 for (int j = Ip[i]; true; j++)
2695 {
2696 if (j == end)
2697 {
2698 MFEM_ABORT_KERNEL("Diagonal not found in SparseMatrix::DiagScale");
2699 }
2700 if (Jp[j] == i)
2701 {
2702 const real_t diag = (use_abs_diag) ? fabs(Ap[j]) : Ap[j];
2703 if (diag == 0.0)
2704 {
2705 MFEM_ABORT_KERNEL("Zero diagonal in SparseMatrix::DiagScale");
2706 }
2707 xp[i] = sc * bp[i] / diag;
2708 break;
2709 }
2710 }
2711 });
2712}
2713
2714template <bool useFabs>
2715static void JacobiDispatch(const Vector &b, const Vector &x0, Vector &x1,
2716 const Memory<int> &I, const Memory<int> &J,
2717 const Memory<real_t> &A, const int height,
2718 const real_t sc)
2719{
2720 const bool useDevice = b.UseDevice() || x0.UseDevice() || x1.UseDevice();
2721
2722 const auto bp = b.Read(useDevice);
2723 const auto x0p = x0.Read(useDevice);
2724 auto x1p = x1.Write(useDevice);
2725
2726 const auto Ip = Read(I, height+1, useDevice);
2727 const auto Jp = Read(J, J.Capacity(), useDevice);
2728 const auto Ap = Read(A, J.Capacity(), useDevice);
2729
2730 mfem::forall_switch(useDevice, height, [=] MFEM_HOST_DEVICE (int i)
2731 {
2732 real_t resi = bp[i], norm = 0.0;
2733 for (int j = Ip[i]; j < Ip[i+1]; j++)
2734 {
2735 resi -= Ap[j] * x0p[Jp[j]];
2736 if (useFabs)
2737 {
2738 norm += fabs(Ap[j]);
2739 }
2740 else
2741 {
2742 norm += Ap[j];
2743 }
2744 }
2745 if (norm > 0.0)
2746 {
2747 x1p[i] = x0p[i] + sc * resi / norm;
2748 }
2749 else
2750 {
2751 if (useFabs)
2752 {
2753 MFEM_ABORT_KERNEL("L1 norm of row is zero.");
2754 }
2755 else
2756 {
2757 MFEM_ABORT_KERNEL("sum of row is zero.");
2758 }
2759 }
2760 });
2761}
2762
2763void SparseMatrix::Jacobi2(const Vector &b, const Vector &x0, Vector &x1,
2764 real_t sc) const
2765{
2766 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2767 JacobiDispatch<true>(b,x0,x1,I,J,A,height,sc);
2768}
2769
2770void SparseMatrix::Jacobi3(const Vector &b, const Vector &x0, Vector &x1,
2771 real_t sc) const
2772{
2773 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2774 JacobiDispatch<false>(b,x0,x1,I,J,A,height,sc);
2775}
2776
2778 const DenseMatrix &subm, int skip_zeros)
2779{
2780 int i, j, gi, gj, s, t;
2781 real_t a;
2782
2783 if (Finalized())
2784 {
2785 HostReadI();
2786 HostReadJ();
2788 }
2789
2790 for (i = 0; i < rows.Size(); i++)
2791 {
2792 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2793 else { s = 1; }
2794 MFEM_ASSERT(gi < height,
2795 "Trying to insert a row " << gi << " outside the matrix height "
2796 << height);
2797 SetColPtr(gi);
2798 for (j = 0; j < cols.Size(); j++)
2799 {
2800 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2801 else { t = s; }
2802 MFEM_ASSERT(gj < width,
2803 "Trying to insert a column " << gj << " outside the matrix width "
2804 << width);
2805 a = subm(i, j);
2806 if (skip_zeros && a == 0.0)
2807 {
2808 // Skip assembly of zero elements if either:
2809 // (i) user specified to skip zeros regardless of symmetry, or
2810 // (ii) symmetry is not broken.
2811 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2812 {
2813 continue;
2814 }
2815 }
2816 if (t < 0) { a = -a; }
2817 _Add_(gj, a);
2818 }
2819 ClearColPtr();
2820 }
2821}
2822
2823void SparseMatrix::Set(const int i, const int j, const real_t val)
2824{
2825 real_t a = val;
2826 int gi, gj, s, t;
2827
2828 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2829 else { s = 1; }
2830 MFEM_ASSERT(gi < height,
2831 "Trying to set a row " << gi << " outside the matrix height "
2832 << height);
2833 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2834 else { t = s; }
2835 MFEM_ASSERT(gj < width,
2836 "Trying to set a column " << gj << " outside the matrix width "
2837 << width);
2838 if (t < 0) { a = -a; }
2839 _Set_(gi, gj, a);
2840}
2841
2842void SparseMatrix::Add(const int i, const int j, const real_t val)
2843{
2844 int gi, gj, s, t;
2845 real_t a = val;
2846
2847 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2848 else { s = 1; }
2849 MFEM_ASSERT(gi < height,
2850 "Trying to insert a row " << gi << " outside the matrix height "
2851 << height);
2852 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2853 else { t = s; }
2854 MFEM_ASSERT(gj < width,
2855 "Trying to insert a column " << gj << " outside the matrix width "
2856 << width);
2857 if (t < 0) { a = -a; }
2858 _Add_(gi, gj, a);
2859}
2860
2862 const DenseMatrix &subm, int skip_zeros)
2863{
2864 int i, j, gi, gj, s, t;
2865 real_t a;
2866
2867 for (i = 0; i < rows.Size(); i++)
2868 {
2869 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2870 else { s = 1; }
2871 MFEM_ASSERT(gi < height,
2872 "Trying to set a row " << gi << " outside the matrix height "
2873 << height);
2874 SetColPtr(gi);
2875 for (j = 0; j < cols.Size(); j++)
2876 {
2877 a = subm(i, j);
2878 if (skip_zeros && a == 0.0)
2879 {
2880 // Skip assembly of zero elements if either:
2881 // (i) user specified to skip zeros regardless of symmetry, or
2882 // (ii) symmetry is not broken.
2883 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2884 {
2885 continue;
2886 }
2887 }
2888 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2889 else { t = s; }
2890 MFEM_ASSERT(gj < width,
2891 "Trying to set a column " << gj << " outside the matrix width "
2892 << width);
2893 if (t < 0) { a = -a; }
2894 _Set_(gj, a);
2895 }
2896 ClearColPtr();
2897 }
2898}
2899
2901 const Array<int> &cols,
2902 const DenseMatrix &subm,
2903 int skip_zeros)
2904{
2905 int i, j, gi, gj, s, t;
2906 real_t a;
2907
2908 for (i = 0; i < rows.Size(); i++)
2909 {
2910 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2911 else { s = 1; }
2912 MFEM_ASSERT(gi < height,
2913 "Trying to set a row " << gi << " outside the matrix height "
2914 << height);
2915 SetColPtr(gi);
2916 for (j = 0; j < cols.Size(); j++)
2917 {
2918 a = subm(j, i);
2919 if (skip_zeros && a == 0.0)
2920 {
2921 // Skip assembly of zero elements if either:
2922 // (i) user specified to skip zeros regardless of symmetry, or
2923 // (ii) symmetry is not broken.
2924 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2925 {
2926 continue;
2927 }
2928 }
2929 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2930 else { t = s; }
2931 MFEM_ASSERT(gj < width,
2932 "Trying to set a column " << gj << " outside the matrix width "
2933 << width);
2934 if (t < 0) { a = -a; }
2935 _Set_(gj, a);
2936 }
2937 ClearColPtr();
2938 }
2939}
2940
2942 DenseMatrix &subm) const
2943{
2944 int i, j, gi, gj, s, t;
2945 real_t a;
2946
2947 for (i = 0; i < rows.Size(); i++)
2948 {
2949 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2950 else { s = 1; }
2951 MFEM_ASSERT(gi < height,
2952 "Trying to read a row " << gi << " outside the matrix height "
2953 << height);
2954 SetColPtr(gi);
2955 for (j = 0; j < cols.Size(); j++)
2956 {
2957 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2958 else { t = s; }
2959 MFEM_ASSERT(gj < width,
2960 "Trying to read a column " << gj << " outside the matrix width "
2961 << width);
2962 a = _Get_(gj);
2963 subm(i, j) = (t < 0) ? (-a) : (a);
2964 }
2965 ClearColPtr();
2966 }
2967}
2968
2969bool SparseMatrix::RowIsEmpty(const int row) const
2970{
2971 int gi;
2972
2973 if ((gi=row) < 0)
2974 {
2975 gi = -1-gi;
2976 }
2977 MFEM_ASSERT(gi < height,
2978 "Trying to query a row " << gi << " outside the matrix height "
2979 << height);
2980 if (Rows)
2981 {
2982 return (Rows[gi] == NULL);
2983 }
2984 else
2985 {
2986 return (I[gi] == I[gi+1]);
2987 }
2988}
2989
2990int SparseMatrix::GetRow(const int row, Array<int> &cols, Vector &srow) const
2991{
2992 RowNode *n;
2993 int j, gi;
2994
2995 if ((gi=row) < 0) { gi = -1-gi; }
2996 MFEM_ASSERT(gi < height,
2997 "Trying to read a row " << gi << " outside the matrix height "
2998 << height);
2999 if (Rows)
3000 {
3001 for (n = Rows[gi], j = 0; n; n = n->Prev)
3002 {
3003 j++;
3004 }
3005 cols.SetSize(j);
3006 srow.SetSize(j);
3007 for (n = Rows[gi], j = 0; n; n = n->Prev, j++)
3008 {
3009 cols[j] = n->Column;
3010 srow(j) = n->Value;
3011 }
3012 if (row < 0)
3013 {
3014 srow.Neg();
3015 }
3016
3017 return 0;
3018 }
3019 else
3020 {
3021 j = I[gi];
3022 cols.MakeRef(const_cast<int*>((const int*)J) + j, I[gi+1]-j);
3023 srow.NewDataAndSize(
3024 const_cast<real_t*>((const real_t*)A) + j, cols.Size());
3025 MFEM_ASSERT(row >= 0, "Row not valid: " << row << ", height: " << height);
3026 return 1;
3027 }
3028}
3029
3030void SparseMatrix::SetRow(const int row, const Array<int> &cols,
3031 const Vector &srow)
3032{
3033 int gi, gj, s, t;
3034 real_t a;
3035
3036 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
3037 else { s = 1; }
3038 MFEM_ASSERT(gi < height,
3039 "Trying to set a row " << gi << " outside the matrix height "
3040 << height);
3041
3042 if (!Finalized())
3043 {
3044 SetColPtr(gi);
3045 for (int j = 0; j < cols.Size(); j++)
3046 {
3047 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3048 else { t = s; }
3049 MFEM_ASSERT(gj < width,
3050 "Trying to set a column " << gj << " outside the matrix"
3051 " width " << width);
3052 a = srow(j);
3053 if (t < 0) { a = -a; }
3054 _Set_(gj, a);
3055 }
3056 ClearColPtr();
3057 }
3058 else
3059 {
3060 MFEM_ASSERT(cols.Size() == RowSize(gi), "");
3061 MFEM_ASSERT(cols.Size() == srow.Size(), "");
3062
3063 for (int i = I[gi], j = 0; j < cols.Size(); j++, i++)
3064 {
3065 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3066 else { t = s; }
3067 MFEM_ASSERT(gj < width,
3068 "Trying to set a column " << gj << " outside the matrix"
3069 " width " << width);
3070
3071 J[i] = gj;
3072 A[i] = srow[j] * t;
3073 }
3074 }
3075}
3076
3077void SparseMatrix::AddRow(const int row, const Array<int> &cols,
3078 const Vector &srow)
3079{
3080 int j, gi, gj, s, t;
3081 real_t a;
3082
3083 MFEM_VERIFY(!Finalized(), "Matrix must NOT be finalized.");
3084
3085 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
3086 else { s = 1; }
3087 MFEM_ASSERT(gi < height,
3088 "Trying to insert a row " << gi << " outside the matrix height "
3089 << height);
3090 SetColPtr(gi);
3091 for (j = 0; j < cols.Size(); j++)
3092 {
3093 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3094 else { t = s; }
3095 MFEM_ASSERT(gj < width,
3096 "Trying to insert a column " << gj << " outside the matrix width "
3097 << width);
3098 a = srow(j);
3099 if (a == 0.0)
3100 {
3101 continue;
3102 }
3103 if (t < 0) { a = -a; }
3104 _Add_(gj, a);
3105 }
3106 ClearColPtr();
3107}
3108
3109void SparseMatrix::ScaleRow(const int row, const real_t scale)
3110{
3111 int i;
3112
3113 if ((i=row) < 0)
3114 {
3115 i = -1-i;
3116 }
3117 if (Rows != NULL)
3118 {
3119 RowNode *aux;
3120
3121 for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)
3122 {
3123 aux -> Value *= scale;
3124 }
3125 }
3126 else
3127 {
3128 int j, end = I[i+1];
3129
3130 for (j = I[i]; j < end; j++)
3131 {
3132 A[j] *= scale;
3133 }
3134 }
3135}
3136
3138{
3139 real_t scale;
3140 if (Rows != NULL)
3141 {
3142 RowNode *aux;
3143 for (int i=0; i < height; ++i)
3144 {
3145 scale = sl(i);
3146 for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)
3147 {
3148 aux -> Value *= scale;
3149 }
3150 }
3151 }
3152 else
3153 {
3154 int j, end;
3155
3156 for (int i=0; i < height; ++i)
3157 {
3158 end = I[i+1];
3159 scale = sl(i);
3160 for (j = I[i]; j < end; j++)
3161 {
3162 A[j] *= scale;
3163 }
3164 }
3165 }
3166}
3167
3169{
3170 if (Rows != NULL)
3171 {
3172 RowNode *aux;
3173 for (int i=0; i < height; ++i)
3174 {
3175 for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)
3176 {
3177 aux -> Value *= sr(aux->Column);
3178 }
3179 }
3180 }
3181 else
3182 {
3183 int j, end;
3184
3185 for (int i=0; i < height; ++i)
3186 {
3187 end = I[i+1];
3188 for (j = I[i]; j < end; j++)
3189 {
3190 A[j] *= sr(J[j]);
3191 }
3192 }
3193 }
3194}
3195
3197{
3198 MFEM_ASSERT(height == B.height && width == B.width,
3199 "Mismatch of this matrix size and rhs. This height = "
3200 << height << ", width = " << width << ", B.height = "
3201 << B.height << ", B.width = " << B.width);
3202
3203 for (int i = 0; i < height; i++)
3204 {
3205 SetColPtr(i);
3206 if (B.Rows)
3207 {
3208 for (RowNode *aux = B.Rows[i]; aux != NULL; aux = aux->Prev)
3209 {
3210 _Add_(aux->Column, aux->Value);
3211 }
3212 }
3213 else
3214 {
3215 for (int j = B.I[i]; j < B.I[i+1]; j++)
3216 {
3217 _Add_(B.J[j], B.A[j]);
3218 }
3219 }
3220 ClearColPtr();
3221 }
3222
3223 return (*this);
3224}
3225
3227{
3228 for (int i = 0; i < height; i++)
3229 {
3230 B.SetColPtr(i);
3231 if (Rows)
3232 {
3233 for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)
3234 {
3235 np->Value += a * B._Get_(np->Column);
3236 }
3237 }
3238 else
3239 {
3240 for (int j = I[i]; j < I[i+1]; j++)
3241 {
3242 A[j] += a * B._Get_(J[j]);
3243 }
3244 }
3245 B.ClearColPtr();
3246 }
3247}
3248
3250{
3251 if (Rows == NULL)
3252 {
3253 const int nnz = J.Capacity();
3254 real_t *h_A = HostWrite(A, nnz);
3255 for (int i = 0; i < nnz; i++)
3256 {
3257 h_A[i] = a;
3258 }
3259 }
3260 else
3261 {
3262 for (int i = 0; i < height; i++)
3263 {
3264 for (RowNode *node_p = Rows[i]; node_p != NULL;
3265 node_p = node_p -> Prev)
3266 {
3267 node_p -> Value = a;
3268 }
3269 }
3270 }
3271
3272 return (*this);
3273}
3274
3276{
3277 if (Rows == NULL)
3278 {
3279 for (int i = 0, nnz = I[height]; i < nnz; i++)
3280 {
3281 A[i] *= a;
3282 }
3283 }
3284 else
3285 {
3286 for (int i = 0; i < height; i++)
3287 {
3288 for (RowNode *node_p = Rows[i]; node_p != NULL;
3289 node_p = node_p -> Prev)
3290 {
3291 node_p -> Value *= a;
3292 }
3293 }
3294 }
3295
3296 return (*this);
3297}
3298
3299void SparseMatrix::Print(std::ostream & os, int width_) const
3300{
3301 int i, j;
3302
3303 if (A == NULL)
3304 {
3305 RowNode *nd;
3306 for (i = 0; i < height; i++)
3307 {
3308 os << "[row " << i << "]\n";
3309 for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++)
3310 {
3311 os << " (" << nd->Column << "," << nd->Value << ")";
3312 if ( !((j+1) % width_) )
3313 {
3314 os << '\n';
3315 }
3316 }
3317 if (j % width_)
3318 {
3319 os << '\n';
3320 }
3321 }
3322 return;
3323 }
3324
3325 // HostRead forces synchronization
3326 HostReadI();
3327 HostReadJ();
3328 HostReadData();
3329 for (i = 0; i < height; i++)
3330 {
3331 os << "[row " << i << "]\n";
3332 for (j = I[i]; j < I[i+1]; j++)
3333 {
3334 os << " (" << J[j] << "," << A[j] << ")";
3335 if ( !((j+1-I[i]) % width_) )
3336 {
3337 os << '\n';
3338 }
3339 }
3340 if ((j-I[i]) % width_)
3341 {
3342 os << '\n';
3343 }
3344 }
3345}
3346
3347void SparseMatrix::PrintMatlab(std::ostream & os) const
3348{
3349 os << "% size " << height << " " << width << "\n";
3350 os << "% Non Zeros " << NumNonZeroElems() << "\n";
3351
3352 int i, j;
3353 ios::fmtflags old_fmt = os.flags();
3354 os.setf(ios::scientific);
3355 std::streamsize old_prec = os.precision(14);
3356
3357 if (A == NULL)
3358 {
3359 RowNode *nd;
3360 for (i = 0; i < height; i++)
3361 {
3362 for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++)
3363 {
3364 os << i+1 << " " << nd->Column+1 << " " << nd->Value << '\n';
3365 }
3366 }
3367 }
3368 else
3369 {
3370 // HostRead forces synchronization
3371 HostReadI();
3372 HostReadJ();
3373 HostReadData();
3374 for (i = 0; i < height; i++)
3375 {
3376 for (j = I[i]; j < I[i+1]; j++)
3377 {
3378 os << i+1 << " " << J[j]+1 << " " << A[j] << '\n';
3379 }
3380 }
3381 }
3382 // Write a zero entry at (m,n) to make sure MATLAB doesn't shrink the matrix
3383 os << height << " " << width << " 0.0\n";
3384 os.precision(old_prec);
3385 os.flags(old_fmt);
3386}
3387
3388void SparseMatrix::PrintMM(std::ostream & os) const
3389{
3390 int i, j;
3391 ios::fmtflags old_fmt = os.flags();
3392 os.setf(ios::scientific);
3393 std::streamsize old_prec = os.precision(14);
3394
3395 os << "%%MatrixMarket matrix coordinate real general" << '\n'
3396 << "% Generated by MFEM" << '\n';
3397
3398 os << height << " " << width << " " << NumNonZeroElems() << '\n';
3399
3400 if (A == NULL)
3401 {
3402 RowNode *nd;
3403 for (i = 0; i < height; i++)
3404 {
3405 for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++)
3406 {
3407 os << i+1 << " " << nd->Column+1 << " " << nd->Value << '\n';
3408 }
3409 }
3410 }
3411 else
3412 {
3413 // HostRead forces synchronization
3414 HostReadI();
3415 HostReadJ();
3416 HostReadData();
3417 for (i = 0; i < height; i++)
3418 {
3419 for (j = I[i]; j < I[i+1]; j++)
3420 {
3421 os << i+1 << " " << J[j]+1 << " " << A[j] << '\n';
3422 }
3423 }
3424 }
3425 os.precision(old_prec);
3426 os.flags(old_fmt);
3427}
3428
3429void SparseMatrix::PrintCSR(std::ostream & os) const
3430{
3431 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
3432
3433 int i;
3434
3435 os << height << '\n'; // number of rows
3436
3437 // HostRead forces synchronization
3438 HostReadI();
3439 HostReadJ();
3440 HostReadData();
3441 for (i = 0; i <= height; i++)
3442 {
3443 os << I[i]+1 << '\n';
3444 }
3445
3446 for (i = 0; i < I[height]; i++)
3447 {
3448 os << J[i]+1 << '\n';
3449 }
3450
3451 for (i = 0; i < I[height]; i++)
3452 {
3453 os << A[i] << '\n';
3454 }
3455}
3456
3457void SparseMatrix::PrintCSR2(std::ostream & os) const
3458{
3459 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
3460
3461 int i;
3462
3463 os << height << '\n'; // number of rows
3464 os << width << '\n'; // number of columns
3465
3466 // HostRead forces synchronization
3467 HostReadI();
3468 HostReadJ();
3469 HostReadData();
3470 for (i = 0; i <= height; i++)
3471 {
3472 os << I[i] << '\n';
3473 }
3474
3475 for (i = 0; i < I[height]; i++)
3476 {
3477 os << J[i] << '\n';
3478 }
3479
3480 for (i = 0; i < I[height]; i++)
3481 {
3482 os << A[i] << '\n';
3483 }
3484}
3485
3486void SparseMatrix::PrintInfo(std::ostream &os) const
3487{
3488 const real_t MiB = 1024.*1024;
3489 int nnz = NumNonZeroElems();
3490 real_t pz = 100./nnz;
3491 int nz = CountSmallElems(0.0);
3492 real_t max_norm = MaxNorm();
3493 real_t symm = IsSymmetric();
3494 int nnf = CheckFinite();
3495 int ns12 = CountSmallElems(1e-12*max_norm);
3496 int ns15 = CountSmallElems(1e-15*max_norm);
3497 int ns18 = CountSmallElems(1e-18*max_norm);
3498
3499 os <<
3500 "SparseMatrix statistics:\n"
3501 " Format : " <<
3502 (Empty() ? "(empty)" : (Finalized() ? "CSR" : "LIL")) << "\n"
3503 " Dimensions : " << height << " x " << width << "\n"
3504 " Number of entries (total) : " << nnz << "\n"
3505 " Number of entries (per row) : " << 1.*nnz/Height() << "\n"
3506 " Number of stored zeros : " << nz*pz << "% (" << nz << ")\n"
3507 " Number of Inf/Nan entries : " << nnf*pz << "% ("<< nnf << ")\n"
3508 " Norm, max |a_ij| : " << max_norm << "\n"
3509 " Symmetry, max |a_ij-a_ji| : " << symm << "\n"
3510 " Number of small entries:\n"
3511 " |a_ij| <= 1e-12*Norm : " << ns12*pz << "% (" << ns12 << ")\n"
3512 " |a_ij| <= 1e-15*Norm : " << ns15*pz << "% (" << ns15 << ")\n"
3513 " |a_ij| <= 1e-18*Norm : " << ns18*pz << "% (" << ns18 << ")\n";
3514 if (Finalized())
3515 {
3516 os << " Memory used by CSR : " <<
3517 (sizeof(int)*(height+1+nnz)+sizeof(real_t)*nnz)/MiB << " MiB\n";
3518 }
3519 if (Rows != NULL)
3520 {
3521 size_t used_mem = sizeof(RowNode*)*height;
3522#ifdef MFEM_USE_MEMALLOC
3523 used_mem += NodesMem->MemoryUsage();
3524#else
3525 for (int i = 0; i < height; i++)
3526 {
3527 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
3528 {
3529 used_mem += sizeof(RowNode);
3530 }
3531 }
3532#endif
3533 os << " Memory used by LIL : " << used_mem/MiB << " MiB\n";
3534 }
3535}
3536
3538{
3539 I.Delete();
3540 J.Delete();
3541 A.Delete();
3542
3543 if (Rows != NULL)
3544 {
3545#if !defined(MFEM_USE_MEMALLOC)
3546 for (int i = 0; i < height; i++)
3547 {
3548 RowNode *aux, *node_p = Rows[i];
3549 while (node_p != NULL)
3550 {
3551 aux = node_p;
3552 node_p = node_p->Prev;
3553 delete aux;
3554 }
3555 }
3556#endif
3557 delete [] Rows;
3558 }
3559
3560 delete [] ColPtrJ;
3561 delete [] ColPtrNode;
3562#ifdef MFEM_USE_MEMALLOC
3563 delete NodesMem;
3564#endif
3565 delete At;
3566
3568}
3569
3571{
3572 int awidth = 0;
3573 if (A)
3574 {
3575 const int *start_j = J;
3576 const int *end_j = J + I[height];
3577 for (const int *jptr = start_j; jptr != end_j; ++jptr)
3578 {
3579 awidth = std::max(awidth, *jptr + 1);
3580 }
3581 }
3582 else
3583 {
3584 RowNode *aux;
3585 for (int i = 0; i < height; i++)
3586 {
3587 for (aux = Rows[i]; aux != NULL; aux = aux->Prev)
3588 {
3589 awidth = std::max(awidth, aux->Column + 1);
3590 }
3591 }
3592 }
3593 return awidth;
3594}
3595
3597{
3598 int n = S.NumNonZeroElems();
3599 real_t * s = S.GetData();
3600
3601 for (int i = 0; i < n; i++)
3602 {
3603 s[i] = f(s[i]);
3604 }
3605}
3606
3608{
3609 MFEM_VERIFY(
3610 A.Finalized(),
3611 "Finalize must be called before Transpose. Use TransposeRowMatrix instead");
3612
3613 int i, j, end;
3614 const int *A_i, *A_j;
3615 int m, n, nnz, *At_i, *At_j;
3616 const real_t *A_data;
3617 real_t *At_data;
3618
3619 m = A.Height(); // number of rows of A
3620 n = A.Width(); // number of columns of A
3621 nnz = A.NumNonZeroElems();
3622 A_i = A.HostReadI();
3623 A_j = A.HostReadJ();
3624 A_data = A.HostReadData();
3625
3626 At_i = Memory<int>(n+1);
3627 At_j = Memory<int>(nnz);
3628 At_data = Memory<real_t>(nnz);
3629
3630 for (i = 0; i <= n; i++)
3631 {
3632 At_i[i] = 0;
3633 }
3634 for (i = 0; i < nnz; i++)
3635 {
3636 At_i[A_j[i]+1]++;
3637 }
3638 for (i = 1; i < n; i++)
3639 {
3640 At_i[i+1] += At_i[i];
3641 }
3642
3643 for (i = j = 0; i < m; i++)
3644 {
3645 end = A_i[i+1];
3646 for ( ; j < end; j++)
3647 {
3648 At_j[At_i[A_j[j]]] = i;
3649 At_data[At_i[A_j[j]]] = A_data[j];
3650 At_i[A_j[j]]++;
3651 }
3652 }
3653
3654 for (i = n; i > 0; i--)
3655 {
3656 At_i[i] = At_i[i-1];
3657 }
3658 At_i[0] = 0;
3659
3660 return new SparseMatrix(At_i, At_j, At_data, n, m);
3661}
3662
3664 int useActualWidth)
3665{
3666 int i, j;
3667 int m, n, nnz, *At_i, *At_j;
3668 real_t *At_data;
3669 Array<int> Acols;
3670 Vector Avals;
3671
3672 m = A.Height(); // number of rows of A
3673 if (useActualWidth)
3674 {
3675 n = 0;
3676 int tmp;
3677 for (i = 0; i < m; i++)
3678 {
3679 A.GetRow(i, Acols, Avals);
3680 if (Acols.Size())
3681 {
3682 tmp = Acols.Max();
3683 if (tmp > n)
3684 {
3685 n = tmp;
3686 }
3687 }
3688 }
3689 ++n;
3690 }
3691 else
3692 {
3693 n = A.Width(); // number of columns of A
3694 }
3695 nnz = A.NumNonZeroElems();
3696
3697 At_i = Memory<int>(n+1);
3698 At_j = Memory<int>(nnz);
3699 At_data = Memory<real_t>(nnz);
3700
3701 for (i = 0; i <= n; i++)
3702 {
3703 At_i[i] = 0;
3704 }
3705
3706 for (i = 0; i < m; i++)
3707 {
3708 A.GetRow(i, Acols, Avals);
3709 for (j = 0; j<Acols.Size(); ++j)
3710 {
3711 At_i[Acols[j]+1]++;
3712 }
3713 }
3714 for (i = 1; i < n; i++)
3715 {
3716 At_i[i+1] += At_i[i];
3717 }
3718
3719 for (i = 0; i < m; i++)
3720 {
3721 A.GetRow(i, Acols, Avals);
3722 for (j = 0; j<Acols.Size(); ++j)
3723 {
3724 At_j[At_i[Acols[j]]] = i;
3725 At_data[At_i[Acols[j]]] = Avals[j];
3726 At_i[Acols[j]]++;
3727 }
3728 }
3729
3730 for (i = n; i > 0; i--)
3731 {
3732 At_i[i] = At_i[i-1];
3733 }
3734 At_i[0] = 0;
3735
3736 return new SparseMatrix(At_i, At_j, At_data, n, m);
3737}
3738
3739
3741 SparseMatrix *OAB)
3742{
3743 int nrowsA, ncolsA, nrowsB, ncolsB;
3744 const int *A_i, *A_j, *B_i, *B_j;
3745 int *C_i, *C_j, *B_marker;
3746 const real_t *A_data, *B_data;
3747 real_t *C_data;
3748 int ia, ib, ic, ja, jb, num_nonzeros;
3749 int row_start, counter;
3750 real_t a_entry, b_entry;
3751 SparseMatrix *C;
3752
3753 nrowsA = A.Height();
3754 ncolsA = A.Width();
3755 nrowsB = B.Height();
3756 ncolsB = B.Width();
3757
3758 MFEM_VERIFY(ncolsA == nrowsB,
3759 "number of columns of A (" << ncolsA
3760 << ") must equal number of rows of B (" << nrowsB << ")");
3761
3762 A_i = A.HostReadI();
3763 A_j = A.HostReadJ();
3764 A_data = A.HostReadData();
3765 B_i = B.HostReadI();
3766 B_j = B.HostReadJ();
3767 B_data = B.HostReadData();
3768
3769 B_marker = new int[ncolsB];
3770
3771 for (ib = 0; ib < ncolsB; ib++)
3772 {
3773 B_marker[ib] = -1;
3774 }
3775
3776 if (OAB == NULL)
3777 {
3778 C_i = Memory<int>(nrowsA+1);
3779
3780 C_i[0] = num_nonzeros = 0;
3781 for (ic = 0; ic < nrowsA; ic++)
3782 {
3783 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3784 {
3785 ja = A_j[ia];
3786 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3787 {
3788 jb = B_j[ib];
3789 if (B_marker[jb] != ic)
3790 {
3791 B_marker[jb] = ic;
3792 num_nonzeros++;
3793 }
3794 }
3795 }
3796 C_i[ic+1] = num_nonzeros;
3797 }
3798
3799 C_j = Memory<int>(num_nonzeros);
3800 C_data = Memory<real_t>(num_nonzeros);
3801
3802 C = new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3803
3804 for (ib = 0; ib < ncolsB; ib++)
3805 {
3806 B_marker[ib] = -1;
3807 }
3808 }
3809 else
3810 {
3811 C = OAB;
3812
3813 MFEM_VERIFY(nrowsA == C->Height() && ncolsB == C->Width(),
3814 "Input matrix sizes do not match output sizes"
3815 << " nrowsA = " << nrowsA
3816 << ", C->Height() = " << C->Height()
3817 << " ncolsB = " << ncolsB
3818 << ", C->Width() = " << C->Width());
3819
3820 // C_i = C->HostReadI(); // not used
3821 C_j = C->HostWriteJ();
3822 C_data = C->HostWriteData();
3823 }
3824
3825 counter = 0;
3826 for (ic = 0; ic < nrowsA; ic++)
3827 {
3828 // row_start = C_i[ic];
3829 row_start = counter;
3830 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3831 {
3832 ja = A_j[ia];
3833 a_entry = A_data[ia];
3834 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3835 {
3836 jb = B_j[ib];
3837 b_entry = B_data[ib];
3838 if (B_marker[jb] < row_start)
3839 {
3840 B_marker[jb] = counter;
3841 if (OAB == NULL)
3842 {
3843 C_j[counter] = jb;
3844 }
3845 C_data[counter] = a_entry*b_entry;
3846 counter++;
3847 }
3848 else
3849 {
3850 C_data[B_marker[jb]] += a_entry*b_entry;
3851 }
3852 }
3853 }
3854 }
3855
3856 MFEM_VERIFY(
3857 OAB == NULL || counter == OAB->NumNonZeroElems(),
3858 "With pre-allocated output matrix, number of non-zeros ("
3859 << OAB->NumNonZeroElems()
3860 << ") did not match number of entries changed from matrix-matrix multiply, "
3861 << counter);
3862
3863 delete [] B_marker;
3864
3865 return C;
3866}
3867
3869{
3870 SparseMatrix *At = Transpose(A);
3871 SparseMatrix *AtB = Mult(*At, B);
3872 delete At;
3873 return AtB;
3874}
3875
3877 const AbstractSparseMatrix &B)
3878{
3879 int nrowsA, ncolsA, nrowsB, ncolsB;
3880 int *C_i, *C_j, *B_marker;
3881 real_t *C_data;
3882 int ia, ib, ic, ja, jb, num_nonzeros;
3883 int row_start, counter;
3884 real_t a_entry, b_entry;
3885 SparseMatrix *C;
3886
3887 nrowsA = A.Height();
3888 ncolsA = A.Width();
3889 nrowsB = B.Height();
3890 ncolsB = B.Width();
3891
3892 MFEM_VERIFY(ncolsA == nrowsB,
3893 "number of columns of A (" << ncolsA
3894 << ") must equal number of rows of B (" << nrowsB << ")");
3895
3896 B_marker = new int[ncolsB];
3897
3898 for (ib = 0; ib < ncolsB; ib++)
3899 {
3900 B_marker[ib] = -1;
3901 }
3902
3903 C_i = Memory<int>(nrowsA+1);
3904
3905 C_i[0] = num_nonzeros = 0;
3906
3907 Array<int> colsA, colsB;
3908 Vector dataA, dataB;
3909 for (ic = 0; ic < nrowsA; ic++)
3910 {
3911 A.GetRow(ic, colsA, dataA);
3912 for (ia = 0; ia < colsA.Size(); ia++)
3913 {
3914 ja = colsA[ia];
3915 B.GetRow(ja, colsB, dataB);
3916 for (ib = 0; ib < colsB.Size(); ib++)
3917 {
3918 jb = colsB[ib];
3919 if (B_marker[jb] != ic)
3920 {
3921 B_marker[jb] = ic;
3922 num_nonzeros++;
3923 }
3924 }
3925 }
3926 C_i[ic+1] = num_nonzeros;
3927 }
3928
3929 C_j = Memory<int>(num_nonzeros);
3930 C_data = Memory<real_t>(num_nonzeros);
3931
3932 C = new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3933
3934 for (ib = 0; ib < ncolsB; ib++)
3935 {
3936 B_marker[ib] = -1;
3937 }
3938
3939 counter = 0;
3940 for (ic = 0; ic < nrowsA; ic++)
3941 {
3942 row_start = counter;
3943 A.GetRow(ic, colsA, dataA);
3944 for (ia = 0; ia < colsA.Size(); ia++)
3945 {
3946 ja = colsA[ia];
3947 a_entry = dataA[ia];
3948 B.GetRow(ja, colsB, dataB);
3949 for (ib = 0; ib < colsB.Size(); ib++)
3950 {
3951 jb = colsB[ib];
3952 b_entry = dataB[ib];
3953 if (B_marker[jb] < row_start)
3954 {
3955 B_marker[jb] = counter;
3956 C_j[counter] = jb;
3957 C_data[counter] = a_entry*b_entry;
3958 counter++;
3959 }
3960 else
3961 {
3962 C_data[B_marker[jb]] += a_entry*b_entry;
3963 }
3964 }
3965 }
3966 }
3967
3968 delete [] B_marker;
3969
3970 return C;
3971}
3972
3974{
3975 DenseMatrix *C = new DenseMatrix(A.Height(), B.Width());
3976 Vector columnB, columnC;
3977 for (int j = 0; j < B.Width(); ++j)
3978 {
3979 B.GetColumnReference(j, columnB);
3980 C->GetColumnReference(j, columnC);
3981 A.Mult(columnB, columnC);
3982 }
3983 return C;
3984}
3985
3987{
3988 DenseMatrix R (P, 't'); // R = P^T
3989 DenseMatrix *AP = Mult (A, P);
3990 DenseMatrix *RAP_ = new DenseMatrix(R.Height(), AP->Width());
3991 Mult (R, *AP, *RAP_);
3992 delete AP;
3993 return RAP_;
3994}
3995
3997{
3998 SparseMatrix *R = Transpose(P);
3999 DenseMatrix *RA = Mult(*R, A);
4000 DenseMatrix AtP(*RA, 't');
4001 delete RA;
4002 DenseMatrix *RAtP = Mult(*R, AtP);
4003 delete R;
4004 DenseMatrix * RAP_ = new DenseMatrix(*RAtP, 't');
4005 delete RAtP;
4006 return RAP_;
4007}
4008
4010 SparseMatrix *ORAP)
4011{
4012 SparseMatrix *P = Transpose (R);
4013 SparseMatrix *AP = Mult (A, *P);
4014 delete P;
4015 SparseMatrix *RAP_ = Mult (R, *AP, ORAP);
4016 delete AP;
4017 return RAP_;
4018}
4019
4021 const SparseMatrix &P)
4022{
4023 SparseMatrix * R = Transpose(Rt);
4024 SparseMatrix * RA = Mult(*R,A);
4025 delete R;
4026 SparseMatrix * RAP_ = Mult(*RA, P);
4027 delete RA;
4028 return RAP_;
4029}
4030
4032 SparseMatrix *OAtDA)
4033{
4034 int i, At_nnz, *At_j;
4035 real_t *At_data;
4036
4037 SparseMatrix *At = Transpose (A);
4038 At_nnz = At -> NumNonZeroElems();
4039 At_j = At -> GetJ();
4040 At_data = At -> GetData();
4041 for (i = 0; i < At_nnz; i++)
4042 {
4043 At_data[i] *= D(At_j[i]);
4044 }
4045 SparseMatrix *AtDA = Mult (*At, A, OAtDA);
4046 delete At;
4047 return AtDA;
4048}
4049
4051 const SparseMatrix & B)
4052{
4053 int nrows = A.Height();
4054 int ncols = A.Width();
4055
4056 int * C_i = Memory<int>(nrows+1);
4057 int * C_j;
4058 real_t * C_data;
4059
4060 const int *A_i = A.GetI();
4061 const int *A_j = A.GetJ();
4062 const real_t *A_data = A.GetData();
4063
4064 const int *B_i = B.GetI();
4065 const int *B_j = B.GetJ();
4066 const real_t *B_data = B.GetData();
4067
4068 int * marker = new int[ncols];
4069 std::fill(marker, marker+ncols, -1);
4070
4071 int num_nonzeros = 0, jcol;
4072 C_i[0] = 0;
4073 for (int ic = 0; ic < nrows; ic++)
4074 {
4075 for (int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4076 {
4077 jcol = A_j[ia];
4078 marker[jcol] = ic;
4079 num_nonzeros++;
4080 }
4081 for (int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4082 {
4083 jcol = B_j[ib];
4084 if (marker[jcol] != ic)
4085 {
4086 marker[jcol] = ic;
4087 num_nonzeros++;
4088 }
4089 }
4090 C_i[ic+1] = num_nonzeros;
4091 }
4092
4093 C_j = Memory<int>(num_nonzeros);
4094 C_data = Memory<real_t>(num_nonzeros);
4095
4096 for (int ia = 0; ia < ncols; ia++)
4097 {
4098 marker[ia] = -1;
4099 }
4100
4101 int pos = 0;
4102 for (int ic = 0; ic < nrows; ic++)
4103 {
4104 for (int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4105 {
4106 jcol = A_j[ia];
4107 C_j[pos] = jcol;
4108 C_data[pos] = a*A_data[ia];
4109 marker[jcol] = pos;
4110 pos++;
4111 }
4112 for (int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4113 {
4114 jcol = B_j[ib];
4115 if (marker[jcol] < C_i[ic])
4116 {
4117 C_j[pos] = jcol;
4118 C_data[pos] = b*B_data[ib];
4119 marker[jcol] = pos;
4120 pos++;
4121 }
4122 else
4123 {
4124 C_data[marker[jcol]] += b*B_data[ib];
4125 }
4126 }
4127 }
4128
4129 delete[] marker;
4130 return new SparseMatrix(C_i, C_j, C_data, nrows, ncols);
4131}
4132
4134{
4135 return Add(1.,A,1.,B);
4136}
4137
4139{
4140 MFEM_ASSERT(Ai.Size() > 0, "invalid size Ai.Size() = " << Ai.Size());
4141
4142 SparseMatrix * accumulate = Ai[0];
4143 SparseMatrix * result = accumulate;
4144
4145 for (int i=1; i < Ai.Size(); ++i)
4146 {
4147 result = Add(*accumulate, *Ai[i]);
4148 if (i != 1)
4149 {
4150 delete accumulate;
4151 }
4152
4153 accumulate = result;
4154 }
4155
4156 return result;
4157}
4158
4159/// B += alpha * A
4160void Add(const SparseMatrix &A,
4162{
4163 for (int r = 0; r < B.Height(); r++)
4164 {
4165 const int * colA = A.GetRowColumns(r);
4166 const real_t * valA = A.GetRowEntries(r);
4167 for (int i=0; i<A.RowSize(r); i++)
4168 {
4169 B(r, colA[i]) += alpha * valA[i];
4170 }
4171 }
4172}
4173
4174/// Produces a block matrix with blocks A_{ij}*B
4176{
4177 int mA = A.Height(), nA = A.Width();
4178 int mB = B.Height(), nB = B.Width();
4179
4180 DenseMatrix *C = new DenseMatrix(mA * mB, nA * nB);
4181 *C = 0.0;
4182 for (int i=0; i<mA; i++)
4183 {
4184 for (int j=0; j<nA; j++)
4185 {
4186 C->AddMatrix(A(i,j), B, i * mB, j * nB);
4187 }
4188 }
4189 return C;
4190}
4191
4192/// Produces a block matrix with blocks A_{ij}*B
4194{
4195 int mA = A.Height(), nA = A.Width();
4196 int mB = B.Height(), nB = B.Width();
4197
4198 SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);
4199
4200 for (int i=0; i<mA; i++)
4201 {
4202 for (int j=0; j<nA; j++)
4203 {
4204 for (int r=0; r<mB; r++)
4205 {
4206 const int * colB = B.GetRowColumns(r);
4207 const real_t * valB = B.GetRowEntries(r);
4208
4209 for (int cj=0; cj<B.RowSize(r); cj++)
4210 {
4211 C->Set(i * mB + r, j * nB + colB[cj], A(i,j) * valB[cj]);
4212 }
4213 }
4214 }
4215 }
4216 C->Finalize();
4217
4218 return C;
4219}
4220
4221/// Produces a block matrix with blocks A_{ij}*B
4223{
4224 int mA = A.Height(), nA = A.Width();
4225 int mB = B.Height(), nB = B.Width();
4226
4227 SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);
4228
4229 for (int r=0; r<mA; r++)
4230 {
4231 const int * colA = A.GetRowColumns(r);
4232 const real_t * valA = A.GetRowEntries(r);
4233
4234 for (int aj=0; aj<A.RowSize(r); aj++)
4235 {
4236 for (int i=0; i<mB; i++)
4237 {
4238 for (int j=0; j<nB; j++)
4239 {
4240 C->Set(r * mB + i, colA[aj] * nB + j, valA[aj] * B(i, j));
4241 }
4242 }
4243 }
4244 }
4245 C->Finalize();
4246
4247 return C;
4248}
4249
4250/// Produces a block matrix with blocks A_{ij}*B
4252{
4253 int mA = A.Height(), nA = A.Width();
4254 int mB = B.Height(), nB = B.Width();
4255
4256 SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);
4257
4258 for (int ar=0; ar<mA; ar++)
4259 {
4260 const int * colA = A.GetRowColumns(ar);
4261 const real_t * valA = A.GetRowEntries(ar);
4262
4263 for (int aj=0; aj<A.RowSize(ar); aj++)
4264 {
4265 for (int br=0; br<mB; br++)
4266 {
4267 const int * colB = B.GetRowColumns(br);
4268 const real_t * valB = B.GetRowEntries(br);
4269
4270 for (int bj=0; bj<B.RowSize(br); bj++)
4271 {
4272 C->Set(ar * mB + br, colA[aj] * nB + colB[bj],
4273 valA[aj] * valB[bj]);
4274 }
4275 }
4276 }
4277 }
4278 C->Finalize();
4279
4280 return C;
4281}
4282
4284{
4285 mfem::Swap(width, other.width);
4286 mfem::Swap(height, other.height);
4287 mfem::Swap(I, other.I);
4288 mfem::Swap(J, other.J);
4289 mfem::Swap(A, other.A);
4290 mfem::Swap(Rows, other.Rows);
4292 mfem::Swap(ColPtrJ, other.ColPtrJ);
4294 mfem::Swap(At, other.At);
4295
4296#ifdef MFEM_USE_MEMALLOC
4298#endif
4299
4301}
4302
4304{
4305 Destroy();
4306#ifdef MFEM_USE_CUDA_OR_HIP
4308 {
4309 if (SparseMatrixCount==1)
4310 {
4311 if (handle)
4312 {
4313 MFEM_cu_or_hip(sparseDestroy)(handle);
4314 handle = nullptr;
4315 }
4316 if (dBuffer)
4317 {
4318 MFEM_Cu_or_Hip(MemFree)(dBuffer);
4319 dBuffer = nullptr;
4320 bufferSize = 0;
4321 }
4322 }
4324 }
4325#endif // MFEM_USE_CUDA_OR_HIP
4326}
4327
4328}
Abstract data type for sparse matrices.
Definition matrix.hpp:74
virtual int NumNonZeroElems() const =0
Returns the number of non-zeros in a matrix.
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const =0
Gets the columns indexes and values for row row.
Dynamic 2D array using row-major layout.
Definition array.hpp:372
int NumCols() const
Definition array.hpp:388
int NumRows() const
Definition array.hpp:387
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition array.hpp:123
T Max() const
Find the maximal element in the array, using the comparison operator < for class T.
Definition array.cpp:68
void Sort()
Sorts the array in ascending order. This requires operator< to be defined for T.
Definition array.hpp:261
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:697
int Size() const
Return the logical size of the array.
Definition array.hpp:144
void MakeRef(T *data_, int size_, bool own_data=false)
Make this Array a reference to a pointer.
Definition array.hpp:882
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:325
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:317
Data type dense matrix using column-major storage.
Definition densemat.hpp:24
void GetColumnReference(int c, Vector &col)
Definition densemat.hpp:312
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Definition densemat.hpp:105
void AddMatrix(DenseMatrix &A, int ro, int co)
Perform (ro+i,co+j)+=A(i,j) for 0<=i.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
Abstract data type for matrix inverse.
Definition matrix.hpp:63
size_t MemoryUsage() const
Class used by MFEM to store pointers to host and/or device memory.
void SetHostPtrOwner(bool own) const
Set/clear the ownership flag for the host pointer. Ownership indicates whether the pointer will be de...
int Capacity() const
Return the size of the allocated memory.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void Wrap(T *ptr, int size, bool own)
Wrap an externally allocated host pointer, ptr with the current host memory type returned by MemoryMa...
void Delete()
Delete the owned pointers and reset the Memory object.
void ClearOwnerFlags() const
Clear the ownership flags for the host and device pointers, as well as any internal data allocated by...
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
int width
Dimension of the input / number of columns in the matrix.
Definition operator.hpp:28
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition operator.hpp:66
int height
Dimension of the output / number of rows in the matrix.
Definition operator.hpp:27
DiagonalPolicy
Defines operator diagonal policy upon elimination of rows and/or columns.
Definition operator.hpp:48
@ DIAG_ONE
Set the diagonal value to one.
Definition operator.hpp:50
@ DIAG_KEEP
Keep the diagonal value.
Definition operator.hpp:51
@ DIAG_ZERO
Set the diagonal value to zero.
Definition operator.hpp:49
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
Definition operator.hpp:72
RowNode * Prev
Definition sparsemat.hpp:45
Data type sparse matrix.
Definition sparsemat.hpp:51
void PrintCSR2(std::ostream &out) const
Prints a sparse matrix to stream out in CSR format.
real_t MaxNorm() const
const int * HostReadJ() const
void GetDiag(Vector &d) const
Returns the Diagonal of A.
real_t GetRowNorml1(int irow) const
For i = irow compute .
virtual void MultTranspose(const Vector &x, Vector &y) const
Multiply a vector with the transposed matrix. y = At * x.
void PartMult(const Array< int > &rows, const Vector &x, Vector &y) const
bool isSorted
Are the columns sorted already.
Definition sparsemat.hpp:88
RowNodeAlloc * NodesMem
Definition sparsemat.hpp:84
void Gauss_Seidel_back(const Vector &x, Vector &y) const
bool RowIsEmpty(const int row) const
void ClearColPtr() const
Reset the "current row" set by calling SetColPtr(). This method must be called between any two calls ...
void Jacobi3(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
real_t * ReadWriteData(bool on_dev=true)
Memory< real_t > A
Array with size I[height], containing the actual entries of the sparse matrix, as indexed by the I ar...
Definition sparsemat.hpp:68
void EliminateRowCol(int rc, const real_t sol, Vector &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Eliminate row rc and column rc and modify the rhs using sol.
SparseMatrix & operator*=(real_t a)
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const
Extract all column indices and values from a given row.
void SetWidth(int width_=-1)
Change the width of a SparseMatrix.
MemAlloc< RowNode, 1024 > RowNodeAlloc
Definition sparsemat.hpp:83
bool Empty() const
Check if the SparseMatrix is empty.
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
SparseMatrix * At
Transpose of A. Owned. Used to perform MultTranspose() on devices.
Definition sparsemat.hpp:80
virtual MatrixInverse * Inverse() const
This virtual method is not supported: it always returns NULL.
void PartAddMult(const Array< int > &rows, const Vector &x, Vector &y, const real_t a=1.0) const
void GetRowSums(Vector &x) const
For all i compute .
bool Finalized() const
Returns whether or not CSR format has been finalized.
void EliminateBC(const Array< int > &ess_dofs, DiagonalPolicy diag_policy)
Eliminate essential (Dirichlet) boundary conditions.
void PrintInfo(std::ostream &out) const
Print various sparse matrix statistics.
virtual void AddMultTranspose(const Vector &x, Vector &y, const real_t a=1.0) const
y += At * x (default) or y += a * At * x
void MoveDiagonalFirst()
Move the diagonal entry to the first position in each row, preserving the order of the rest of the co...
void Add(const int i, const int j, const real_t val)
void EliminateRowColMultipleRHS(int rc, const Vector &sol, DenseMatrix &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Similar to EliminateRowCol(int, const double, Vector &, DiagonalPolicy), but multiple values for elim...
void Jacobi(const Vector &b, const Vector &x0, Vector &x1, real_t sc, bool use_abs_diag=false) const
void SetColPtr(const int row) const
Initialize the SparseMatrix for fast access to the entries of the given row which becomes the "curren...
void SetDiagIdentity()
If a row contains only one diag entry of zero, set it to 1.
void AbsMultTranspose(const Vector &x, Vector &y) const
y = |At| * x, using entry-wise absolute values of the transpose of matrix A
void Gauss_Seidel_forw(const Vector &x, Vector &y) const
Gauss-Seidel forward and backward iterations over a vector x.
void MakeRef(const SparseMatrix &master)
Clear the contents of the SparseMatrix and make it a reference to master.
int CheckFinite() const
Count the number of entries that are NOT finite, i.e. Inf or Nan.
Memory< int > J
Array with size I[height], containing the column indices for all matrix entries, as indexed by the I ...
Definition sparsemat.hpp:65
void EliminateCol(int col, DiagonalPolicy dpolicy=DIAG_ZERO)
Eliminates the column col from the matrix.
void EnsureMultTranspose() const
Ensures that the matrix is capable of performing MultTranspose(), AddMultTranspose(),...
void _Add_(const int col, const real_t a)
Add a value to an entry in the "current row". See SetColPtr().
SparseMatrix()
Create an empty SparseMatrix.
cusparseSpMatDescr_t matA_descr
void AbsMult(const Vector &x, Vector &y) const
y = |A| * x, using entry-wise absolute values of matrix A
void ClearGPUSparse()
Clear the cuSPARSE/hipSPARSE descriptors. This must be called after releasing the device memory of A.
Definition sparsemat.cpp:81
void Threshold(real_t tol, bool fix_empty_rows=false)
Remove entries smaller in absolute value than a given tolerance tol. If fix_empty_rows is true,...
void SetSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
const int * ReadI(bool on_dev=true) const
real_t InnerProduct(const Vector &x, const Vector &y) const
Compute y^t A x.
Memory< int > I
Array with size (height+1) containing the row offsets.
Definition sparsemat.hpp:62
real_t & operator()(int i, int j)
Returns reference to A[i][j].
const real_t * HostReadData() const
void PrintMM(std::ostream &out=mfem::out) const
Prints matrix in Matrix Market sparse format.
void ScaleColumns(const Vector &sr)
this = this * diag(sr);
virtual void PrintMatlab(std::ostream &out=mfem::out) const
Prints matrix in matlab format.
void PrintCSR(std::ostream &out) const
Prints matrix to stream out in hypre_CSRMatrix format.
void Swap(SparseMatrix &other)
int * ReadWriteJ(bool on_dev=true)
cusparseDnVecDescr_t vecY_descr
void BooleanMultTranspose(const Array< int > &x, Array< int > &y) const
y = At * x, treating all entries as booleans (zero=false, nonzero=true).
static size_t bufferSize
RowNode ** Rows
Array of linked lists, one for every row. This array represents the linked list (LIL) storage format.
Definition sparsemat.hpp:73
real_t & SearchRow(const int col)
Perform a fast search for an entry in the "current row". See SetColPtr().
static int SparseMatrixCount
void AddSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void Symmetrize()
(*this) = 1/2 ((*this) + (*this)^t)
void BooleanMult(const Array< int > &x, Array< int > &y) const
y = A * x, treating all entries as booleans (zero=false, nonzero=true).
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
real_t GetJacobiScaling() const
Determine appropriate scaling for Jacobi iteration.
RowNode ** ColPtrNode
Definition sparsemat.hpp:77
int RowSize(const int i) const
Returns the number of elements in row i.
real_t * HostReadWriteData()
int CountSmallElems(real_t tol) const
Count the number of entries with |a_ij| <= tol.
const int * ReadJ(bool on_dev=true) const
virtual void Mult(const Vector &x, Vector &y) const
Matrix vector multiplication.
virtual real_t & Elem(int i, int j)
Returns reference to a_{ij}.
int MaxRowSize() const
Returns the maximum number of elements among all rows.
real_t _Get_(const int col) const
Read the value of an entry in the "current row". See SetColPtr().
virtual ~SparseMatrix()
Destroys sparse matrix.
SparseMatrix & operator=(const SparseMatrix &rhs)
Assignment operator: deep copy.
virtual void EliminateZeroRows(const real_t threshold=1e-12)
If a row contains only zeros, set its diagonal to 1.
void OverrideSize(int height_, int width_)
Sets the height and width of the matrix.
real_t * HostWriteData()
static cusparseHandle_t handle
void EliminateRow(int row, const real_t sol, Vector &rhs)
Eliminates a column from the transpose matrix.
void GetBlocks(Array2D< SparseMatrix * > &blocks) const
SparseMatrix & operator+=(const SparseMatrix &B)
Add the sparse matrix 'B' to '*this'. This operation will cause an error if '*this' is finalized and ...
void Clear()
Clear the contents of the SparseMatrix.
void ResetTranspose() const
virtual void Finalize(int skip_zeros=1)
Finalize the matrix initialization, switching the storage format from LIL to CSR.
void EliminateRowColDiag(int rc, real_t value)
Perform elimination and set the diagonal entry to the given value.
real_t * GetRowEntries(const int row)
Return a pointer to the entries in a row.
void ScaleRows(const Vector &sl)
this = diag(sl) * this;
void SetRow(const int row, const Array< int > &cols, const Vector &srow)
virtual void AddMult(const Vector &x, Vector &y, const real_t a=1.0) const
y += A * x (default) or y += a * A * x
void DiagScale(const Vector &b, Vector &x, real_t sc=1.0, bool use_abs_diag=false) const
x = sc b / A_ii. When use_abs_diag = true, |A_ii| is used.
real_t * GetData()
Return the element data, i.e. the array A.
void SetSubMatrixTranspose(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void BuildTranspose() const
Build and store internally the transpose of this matrix which will be used in the methods AddMultTran...
void SortColumnIndices()
Sort the column indices corresponding to each row.
real_t IsSymmetric() const
Returns max_{i,j} |(i,j)-(j,i)| for a finalized matrix.
DenseMatrix * ToDenseMatrix() const
Produces a DenseMatrix from a SparseMatrix.
void _Set_(const int col, const real_t a)
Set an entry in the "current row". See SetColPtr().
static void * dBuffer
void EliminateCols(const Array< int > &cols, const Vector *x=NULL, Vector *b=NULL)
Eliminate all columns i for which cols[i] != 0.
int * GetJ()
Return the array J.
cusparseDnVecDescr_t vecX_descr
const real_t * ReadData(bool on_dev=true) const
virtual int NumNonZeroElems() const
Returns the number of the nonzero elements in the matrix.
void ScaleRow(const int row, const real_t scale)
void AddRow(const int row, const Array< int > &cols, const Vector &srow)
void Jacobi2(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
int * GetI()
Return the array I.
int ActualWidth() const
Returns the actual Width of the matrix.
const int * HostReadI() const
void Set(const int i, const int j, const real_t val)
void Print(std::ostream &out=mfem::out, int width_=4) const
Prints matrix to stream out.
Vector data type.
Definition vector.hpp:80
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:478
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:474
void Neg()
(*this) = -(*this)
Definition vector.cpp:300
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:490
int Size() const
Returns the size of the vector.
Definition vector.hpp:218
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition vector.hpp:136
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:538
void NewDataAndSize(real_t *d, int s)
Set the Vector data and size, deleting the old data, if owned.
Definition vector.hpp:181
real_t * GetData() const
Return a pointer to the beginning of the Vector data.
Definition vector.hpp:227
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:494
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:482
Vector beta
const real_t alpha
Definition ex15.cpp:369
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition cuda.cpp:34
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition cuda.cpp:79
DenseMatrix * OuterProduct(const DenseMatrix &A, const DenseMatrix &B)
Produces a block matrix with blocks A_{ij}*B.
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:320
void Swap(Array< T > &, Array< T > &)
Definition array.hpp:648
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
Definition vector.hpp:45
void mfem_error(const char *msg)
Definition error.cpp:154
void Mult(const Table &A, const Table &B, Table &C)
C = A * B (as boolean matrices)
Definition table.cpp:476
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory<T> &mem, int size, false)
Definition device.hpp:327
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:337
bool IsFinite(const real_t &val)
Definition vector.hpp:507
void Transpose(const Table &A, Table &At, int ncols_A_)
Transpose a Table.
Definition table.cpp:414
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
int CheckFinite(const real_t *v, const int n)
Definition vector.hpp:518
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory<T> &mem, int size, false)
Definition device.hpp:344
SparseMatrix * MultAbstractSparseMatrix(const AbstractSparseMatrix &A, const AbstractSparseMatrix &B)
Matrix product of sparse matrices. A and B do not need to be CSR matrices.
void * HipMemFree(void *dptr)
Frees device memory.
Definition hip.cpp:79
SparseMatrix * TransposeAbstractSparseMatrix(const AbstractSparseMatrix &A, int useActualWidth)
Transpose of a sparse matrix. A does not need to be a CSR matrix.
SparseMatrix * Mult_AtDA(const SparseMatrix &A, const Vector &D, SparseMatrix *OAtDA)
Matrix multiplication A^t D A. All matrices must be finalized.
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
SparseMatrix * TransposeMult(const SparseMatrix &A, const SparseMatrix &B)
C = A^T B.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition hip.cpp:34
void forall(int N, lambda &&body)
Definition forall.hpp:754
void forall_switch(bool use_dev, int N, lambda &&body)
Definition forall.hpp:757
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
void SparseMatrixFunction(SparseMatrix &S, real_t(*f)(real_t))
Applies f() to each element of the matrix (after it is finalized).
RefCoord t[3]
RefCoord s[3]
@ HIP_MASK
Biwise-OR of all HIP backends.
Definition device.hpp:91
@ CPU_MASK
Biwise-OR of all CPU backends.
Definition device.hpp:87
@ CUDA_MASK
Biwise-OR of all CUDA backends.
Definition device.hpp:89