MFEM v4.8.0
Finite element discretization library
Loading...
Searching...
No Matches
sparsemat.cpp
Go to the documentation of this file.
1// Copyright (c) 2010-2025, Lawrence Livermore National Security, LLC. Produced
2// at the Lawrence Livermore National Laboratory. All Rights reserved. See files
3// LICENSE and NOTICE for details. LLNL-CODE-806117.
4//
5// This file is part of the MFEM library. For more information and source code
6// availability visit https://mfem.org.
7//
8// MFEM is free software; you can redistribute it and/or modify it under the
9// terms of the BSD-3 license. We welcome feedback and contributions, see file
10// CONTRIBUTING.md for details.
11
12// Implementation of sparse matrix
13
14#include "linalg.hpp"
15#include "../general/forall.hpp"
16#include "../general/table.hpp"
19
20#include <iostream>
21#include <iomanip>
22#include <cmath>
23#include <algorithm>
24#include <limits>
25#include <cstring>
26
27#if defined(MFEM_USE_CUDA)
28#define MFEM_cu_or_hip(stub) cu##stub
29#define MFEM_Cu_or_Hip(stub) Cu##stub
30#define MFEM_CU_or_HIP(stub) CU##stub
31#define MFEM_CUDA_or_HIP(stub) CUDA##stub
32
33#if CUSPARSE_VERSION >= 11400
34#define MFEM_GPUSPARSE_ALG CUSPARSE_SPMV_CSR_ALG1
35#else // CUSPARSE_VERSION >= 11400
36#define MFEM_GPUSPARSE_ALG CUSPARSE_CSRMV_ALG1
37#endif // CUSPARSE_VERSION >= 11400
38
39#elif defined(MFEM_USE_HIP)
40#define MFEM_cu_or_hip(stub) hip##stub
41#define MFEM_Cu_or_Hip(stub) Hip##stub
42#define MFEM_CU_or_HIP(stub) HIP##stub
43#define MFEM_CUDA_or_HIP(stub) HIP##stub
44
45// https://hipsparse.readthedocs.io/en/latest/usermanual.html#hipsparsespmvalg-t
46#define MFEM_GPUSPARSE_ALG HIPSPARSE_CSRMV_ALG1
47#endif // defined(MFEM_USE_CUDA)
48
49namespace mfem
50{
51
52using namespace std;
53
54#ifdef MFEM_USE_CUDA_OR_HIP
56// doxygen doesn't like the macro-assisted typename so let's skip parsing it:
57/// @cond Suppress_Doxygen_warnings
58MFEM_cu_or_hip(sparseHandle_t) SparseMatrix::handle = nullptr;
59/// @endcond
61void * SparseMatrix::dBuffer = nullptr;
62#endif // MFEM_USE_CUDA_OR_HIP
63
65{
66 // Initialize cuSPARSE/hipSPARSE library
67#ifdef MFEM_USE_CUDA_OR_HIP
69 {
70 if (!handle) { MFEM_cu_or_hip(sparseCreate)(&handle); }
71 useGPUSparse=true;
73 }
74 else
75 {
76 useGPUSparse=false;
77 }
78#endif // MFEM_USE_CUDA_OR_HIP
79}
80
82{
83#ifdef MFEM_USE_CUDA_OR_HIP
84 if (initBuffers)
85 {
86#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
87 MFEM_cu_or_hip(sparseDestroySpMat)(matA_descr);
88 MFEM_cu_or_hip(sparseDestroyDnVec)(vecX_descr);
89 MFEM_cu_or_hip(sparseDestroyDnVec)(vecY_descr);
90#else
91 cusparseDestroyMatDescr(matA_descr);
92#endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
93 initBuffers = false;
94 }
95#endif // MFEM_USE_CUDA_OR_HIP
96}
97
98SparseMatrix::SparseMatrix(int nrows, int ncols)
99 : AbstractSparseMatrix(nrows, (ncols >= 0) ? ncols : nrows),
100 Rows(new RowNode *[nrows]),
101 current_row(-1),
102 ColPtrJ(NULL),
103 ColPtrNode(NULL),
104 At(NULL),
105 isSorted(false)
106{
107 // We probably do not need to set the ownership flags here.
108 I.SetHostPtrOwner(true);
109 J.SetHostPtrOwner(true);
110 A.SetHostPtrOwner(true);
111
112 for (int i = 0; i < nrows; i++)
113 {
114 Rows[i] = NULL;
115 }
116
117#ifdef MFEM_USE_MEMALLOC
119#endif
120
122}
123
124SparseMatrix::SparseMatrix(int *i, int *j, real_t *data, int m, int n)
125 : AbstractSparseMatrix(m, n),
126 Rows(NULL),
127 ColPtrJ(NULL),
128 ColPtrNode(NULL),
129 At(NULL),
130 isSorted(false)
131{
132 I.Wrap(i, height+1, true);
133 J.Wrap(j, I[height], true);
134 A.Wrap(data, I[height], true);
135
136#ifdef MFEM_USE_MEMALLOC
137 NodesMem = NULL;
138#endif
139
141}
142
143SparseMatrix::SparseMatrix(int *i, int *j, real_t *data, int m, int n,
144 bool ownij, bool owna, bool issorted)
145 : AbstractSparseMatrix(m, n),
146 Rows(NULL),
147 ColPtrJ(NULL),
148 ColPtrNode(NULL),
149 At(NULL),
150 isSorted(issorted)
151{
152 I.Wrap(i, height+1, ownij);
153 J.Wrap(j, I[height], ownij);
154
155#ifdef MFEM_USE_MEMALLOC
156 NodesMem = NULL;
157#endif
158
159 if (data)
160 {
161 A.Wrap(data, I[height], owna);
162 }
163 else
164 {
165 const int nnz = I[height];
166 A.New(nnz);
167 for (int ii=0; ii<nnz; ++ii)
168 {
169 A[ii] = 0.0;
170 }
171 }
172
174}
175
176SparseMatrix::SparseMatrix(int nrows, int ncols, int rowsize)
177 : AbstractSparseMatrix(nrows, ncols)
178 , Rows(NULL)
179 , ColPtrJ(NULL)
180 , ColPtrNode(NULL)
181 , At(NULL)
182 , isSorted(false)
183{
184#ifdef MFEM_USE_MEMALLOC
185 NodesMem = NULL;
186#endif
187 I.New(nrows + 1);
188 J.New(nrows * rowsize);
189 A.New(nrows * rowsize);
190
191 for (int i = 0; i <= nrows; i++)
192 {
193 I[i] = i * rowsize;
194 }
195
197}
198
199SparseMatrix::SparseMatrix(const SparseMatrix &mat, bool copy_graph,
200 MemoryType mt)
201 : AbstractSparseMatrix(mat.Height(), mat.Width())
202{
203 if (mat.Finalized())
204 {
205 mat.HostReadI();
206 const int nnz = mat.I[height];
207 if (copy_graph)
208 {
209 I.New(height+1, mt == MemoryType::PRESERVE ? mat.I.GetMemoryType() : mt);
210 J.New(nnz, mt == MemoryType::PRESERVE ? mat.J.GetMemoryType() : mt);
211 I.CopyFrom(mat.I, height+1);
212 J.CopyFrom(mat.J, nnz);
213 }
214 else
215 {
216 I = mat.I;
217 J = mat.J;
220 }
221 A.New(nnz, mt == MemoryType::PRESERVE ? mat.A.GetMemoryType() : mt);
222 A.CopyFrom(mat.A, nnz);
223
224 Rows = NULL;
225#ifdef MFEM_USE_MEMALLOC
226 NodesMem = NULL;
227#endif
228 }
229 else
230 {
231#ifdef MFEM_USE_MEMALLOC
233#endif
234 Rows = new RowNode *[height];
235 for (int i = 0; i < height; i++)
236 {
237 RowNode **node_pp = &Rows[i];
238 for (RowNode *node_p = mat.Rows[i]; node_p; node_p = node_p->Prev)
239 {
240#ifdef MFEM_USE_MEMALLOC
241 RowNode *new_node_p = NodesMem->Alloc();
242#else
243 RowNode *new_node_p = new RowNode;
244#endif
245 new_node_p->Value = node_p->Value;
246 new_node_p->Column = node_p->Column;
247 *node_pp = new_node_p;
248 node_pp = &new_node_p->Prev;
249 }
250 *node_pp = NULL;
251 }
252
253 // We probably do not need to set the ownership flags here.
254 I.SetHostPtrOwner(true);
255 J.SetHostPtrOwner(true);
256 A.SetHostPtrOwner(true);
257 }
258
259 current_row = -1;
260 ColPtrJ = NULL;
261 ColPtrNode = NULL;
262 At = NULL;
263 isSorted = mat.isSorted;
264
266}
267
269 : AbstractSparseMatrix(v.Size(), v.Size())
270 , Rows(NULL)
271 , ColPtrJ(NULL)
272 , ColPtrNode(NULL)
273 , At(NULL)
274 , isSorted(true)
275{
276#ifdef MFEM_USE_MEMALLOC
277 NodesMem = NULL;
278#endif
279 I.New(height + 1);
280 J.New(height);
281 A.New(height);
282
283 for (int i = 0; i <= height; i++)
284 {
285 I[i] = i;
286 }
287
288 for (int r=0; r<height; r++)
289 {
290 J[r] = r;
291 A[r] = v[r];
292 }
293
295}
296
297void SparseMatrix::OverrideSize(int height_, int width_)
298{
299 height = height_;
300 width = width_;
301}
302
304{
305 Clear();
306
307 SparseMatrix copy(rhs);
308 Swap(copy);
309
310 return *this;
311}
312
314{
315 MFEM_ASSERT(master.Finalized(), "'master' must be finalized");
316 Clear();
317 height = master.Height();
318 width = master.Width();
319 I = master.I; I.ClearOwnerFlags();
320 J = master.J; J.ClearOwnerFlags();
321 A = master.A; A.ClearOwnerFlags();
322 isSorted = master.isSorted;
323}
324
326{
327 height = width = 0;
328 I.Reset();
329 J.Reset();
330 A.Reset();
331 Rows = NULL;
332 current_row = -1;
333 ColPtrJ = NULL;
334 ColPtrNode = NULL;
335 At = NULL;
336#ifdef MFEM_USE_MEMALLOC
337 NodesMem = NULL;
338#endif
339 isSorted = false;
340
342}
343
344int SparseMatrix::RowSize(const int i) const
345{
346 int gi = i;
347 if (gi < 0)
348 {
349 gi = -1-gi;
350 }
351
352 if (I)
353 {
354 return I[gi+1]-I[gi];
355 }
356
357 int s = 0;
358 RowNode *row = Rows[gi];
359 for ( ; row != NULL; row = row->Prev)
360 if (row->Value != 0.0)
361 {
362 s++;
363 }
364 return s;
365}
366
368{
369 int max_row_size=0;
370 int rowSize=0;
371 if (I)
372 {
373 for (int i=0; i < height; ++i)
374 {
375 rowSize = I[i+1]-I[i];
376 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
377 }
378 }
379 else
380 {
381 for (int i=0; i < height; ++i)
382 {
383 rowSize = RowSize(i);
384 max_row_size = (max_row_size > rowSize) ? max_row_size : rowSize;
385 }
386 }
387
388 return max_row_size;
389}
390
392{
393 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
394
395 return J + I[row];
396}
397
398const int *SparseMatrix::GetRowColumns(const int row) const
399{
400 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
401
402 return J + I[row];
403}
404
406{
407 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
408
409 return A + I[row];
410}
411
412const real_t *SparseMatrix::GetRowEntries(const int row) const
413{
414 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
415
416 return A + I[row];
417}
418
419void SparseMatrix::SetWidth(int newWidth)
420{
421 if (newWidth == width)
422 {
423 // Nothing to be done here
424 return;
425 }
426 else if (newWidth == -1)
427 {
428 // Compute the actual width
429 width = ActualWidth();
430 // No need to reset the ColPtr, since the new ColPtr will be shorter.
431 }
432 else if (newWidth > width)
433 {
434 // We need to reset ColPtr, since now we may have additional columns.
435 if (Rows != NULL)
436 {
437 delete [] ColPtrNode;
438 ColPtrNode = static_cast<RowNode **>(NULL);
439 }
440 else
441 {
442 delete [] ColPtrJ;
443 ColPtrJ = static_cast<int *>(NULL);
444 }
445 width = newWidth;
446 }
447 else
448 {
449 // Check that the new width is bigger or equal to the actual width.
450 MFEM_ASSERT(newWidth >= ActualWidth(),
451 "The new width needs to be bigger or equal to the actual width");
452 width = newWidth;
453 }
454}
455
456
458{
459 MFEM_VERIFY(Finalized(), "Matrix is not Finalized!");
460
461 if (isSorted)
462 {
463 return;
464 }
465
466#ifdef MFEM_USE_CUDA_OR_HIP
468 {
469#if defined(MFEM_USE_CUDA)
470 size_t pBufferSizeInBytes = 0;
471 void *pBuffer = NULL;
472
473 const int n = Height();
474 const int m = Width();
475 const int nnzA = J.Capacity();
476 real_t * d_a_sorted = ReadWriteData();
477 const int * d_ia = ReadI();
478 int * d_ja_sorted = ReadWriteJ();
479 csru2csrInfo_t sortInfoA;
480
481 cusparseMatDescr_t matA_descr;
482 cusparseCreateMatDescr( &matA_descr );
483 cusparseSetMatIndexBase( matA_descr, CUSPARSE_INDEX_BASE_ZERO );
484 cusparseSetMatType( matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL );
485
486 cusparseCreateCsru2csrInfo( &sortInfoA );
487
488#ifdef MFEM_USE_SINGLE
489 cusparseScsru2csr_bufferSizeExt( handle, n, m, nnzA, d_a_sorted, d_ia,
490 d_ja_sorted, sortInfoA,
491 &pBufferSizeInBytes);
492#elif defined MFEM_USE_DOUBLE
493 cusparseDcsru2csr_bufferSizeExt( handle, n, m, nnzA, d_a_sorted, d_ia,
494 d_ja_sorted, sortInfoA,
495 &pBufferSizeInBytes);
496#else
497 MFEM_ABORT("Floating point type undefined");
498#endif
499
500 CuMemAlloc( &pBuffer, pBufferSizeInBytes );
501
502#ifdef MFEM_USE_SINGLE
503 cusparseScsru2csr( handle, n, m, nnzA, matA_descr, d_a_sorted, d_ia,
504 d_ja_sorted, sortInfoA, pBuffer);
505#elif defined MFEM_USE_DOUBLE
506 cusparseDcsru2csr( handle, n, m, nnzA, matA_descr, d_a_sorted, d_ia,
507 d_ja_sorted, sortInfoA, pBuffer);
508#else
509 MFEM_ABORT("Floating point type undefined");
510#endif
511
512 // The above call is (at least in some cases) asynchronous, so we need to
513 // wait for it to finish before we can free device temporaries.
514 MFEM_STREAM_SYNC;
515
516 cusparseDestroyCsru2csrInfo( sortInfoA );
517 cusparseDestroyMatDescr( matA_descr );
518
519 CuMemFree( pBuffer );
520#endif
521 }
523 {
524#if defined(MFEM_USE_HIP)
525 size_t pBufferSizeInBytes = 0;
526 void *pBuffer = NULL;
527 int *P = NULL;
528
529 const int n = Height();
530 const int m = Width();
531 const int nnzA = J.Capacity();
532 real_t * d_a_sorted = ReadWriteData();
533 const int * d_ia = ReadI();
534 int * d_ja_sorted = ReadWriteJ();
535
536 hipsparseMatDescr_t descrA;
537 hipsparseCreateMatDescr( &descrA );
538 // FIXME: There is not in-place version of csr sort in hipSPARSE currently, so we make
539 // a temporary copy of the data for gthr, sort that, and then copy the sorted values
540 // back to the array being returned. Where there is an in-place version available,
541 // we should use it.
542 Array< real_t > a_tmp( nnzA );
543 real_t *d_a_tmp = a_tmp.Write();
544
545 hipsparseXcsrsort_bufferSizeExt(handle, n, m, nnzA, d_ia, d_ja_sorted,
546 &pBufferSizeInBytes);
547
548 HipMemAlloc( &pBuffer, pBufferSizeInBytes );
549 HipMemAlloc( (void**)&P, nnzA * sizeof(int) );
550
551 hipsparseCreateIdentityPermutation(handle, nnzA, P);
552 hipsparseXcsrsort(handle, n, m, nnzA, descrA, d_ia, d_ja_sorted, P, pBuffer);
553
554#if defined(MFEM_USE_SINGLE)
555 hipsparseSgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
556 HIPSPARSE_INDEX_BASE_ZERO);
557#elif defined(MFEM_USE_DOUBLE)
558 hipsparseDgthr(handle, nnzA, d_a_sorted, d_a_tmp, P,
559 HIPSPARSE_INDEX_BASE_ZERO);
560#else
561 MFEM_ABORT("Unsupported floating point type!");
562#endif
563
564 A.CopyFrom( a_tmp.GetMemory(), nnzA );
565 hipsparseDestroyMatDescr( descrA );
566
567 HipMemFree( pBuffer );
568 HipMemFree( P );
569#endif
570 }
571 else
572#endif // MFEM_USE_CUDA_OR_HIP
573 {
574 const int * Ip=HostReadI();
577
579 for (int j = 0, i = 0; i < height; i++)
580 {
581 int end = Ip[i+1];
582 row.SetSize(end - j);
583 for (int k = 0; k < row.Size(); k++)
584 {
585 row[k].one = J[j+k];
586 row[k].two = A[j+k];
587 }
588 row.Sort();
589 for (int k = 0; k < row.Size(); k++, j++)
590 {
591 J[j] = row[k].one;
592 A[j] = row[k].two;
593 }
594 }
595 }
596 isSorted = true;
597}
598
600{
601 MFEM_VERIFY(Finalized(), "Matrix is not Finalized!");
602
603 for (int row = 0, end = 0; row < height; row++)
604 {
605 int start = end, j;
606 end = I[row+1];
607 for (j = start; true; j++)
608 {
609 MFEM_VERIFY(j < end, "diagonal entry not found in row = " << row);
610 if (J[j] == row) { break; }
611 }
612 const real_t diag = A[j];
613 for ( ; j > start; j--)
614 {
615 J[j] = J[j-1];
616 A[j] = A[j-1];
617 }
618 J[start] = row;
619 A[start] = diag;
620 }
621}
622
624{
625 return operator()(i,j);
626}
627
628const real_t &SparseMatrix::Elem(int i, int j) const
629{
630 return operator()(i,j);
631}
632
634{
635 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
636 "Trying to access element outside of the matrix. "
637 << "height = " << height << ", "
638 << "width = " << width << ", "
639 << "i = " << i << ", "
640 << "j = " << j);
641
642 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
643
644 for (int k = I[i], end = I[i+1]; k < end; k++)
645 {
646 if (J[k] == j)
647 {
648 return A[k];
649 }
650 }
651
652 MFEM_ABORT("Did not find i = " << i << ", j = " << j << " in matrix.");
653 return A[0];
654}
655
656const real_t &SparseMatrix::operator()(int i, int j) const
657{
658 static const real_t zero = 0.0;
659
660 MFEM_ASSERT(i < height && i >= 0 && j < width && j >= 0,
661 "Trying to access element outside of the matrix. "
662 << "height = " << height << ", "
663 << "width = " << width << ", "
664 << "i = " << i << ", "
665 << "j = " << j);
666
667 if (Finalized())
668 {
669 HostReadI();
670 HostReadJ();
671 HostReadData();
672 for (int k = I[i], end = I[i+1]; k < end; k++)
673 {
674 if (J[k] == j)
675 {
676 return A[k];
677 }
678 }
679 }
680 else
681 {
682 for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)
683 {
684 if (node_p->Column == j)
685 {
686 return node_p->Value;
687 }
688 }
689 }
690
691 return zero;
692}
693
695{
696 MFEM_VERIFY(height == width, "Matrix must be square, not height = "
697 << height << ", width = " << width);
698 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
699
700 d.SetSize(height);
701
702 const auto II = this->ReadI();
703 const auto JJ = this->ReadJ();
704 const auto AA = this->ReadData();
705 auto dd = d.Write();
706
707 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
708 {
709 const int begin = II[i];
710 const int end = II[i+1];
711 int j;
712 for (j = begin; j < end; j++)
713 {
714 if (JJ[j] == i)
715 {
716 dd[i] = AA[j];
717 break;
718 }
719 }
720 if (j == end)
721 {
722 dd[i] = 0.;
723 }
724 });
725}
726
727/// Produces a DenseMatrix from a SparseMatrix
729{
730 int num_rows = this->Height();
731 int num_cols = this->Width();
732
733 DenseMatrix * B = new DenseMatrix(num_rows, num_cols);
734
735 this->ToDenseMatrix(*B);
736
737 return B;
738}
739
740/// Produces a DenseMatrix from a SparseMatrix
742{
743 B.SetSize(height, width);
744 B = 0.0;
745
746 for (int r=0; r<height; r++)
747 {
748 const int * col = this->GetRowColumns(r);
749 const real_t * val = this->GetRowEntries(r);
750
751 for (int cj=0; cj<this->RowSize(r); cj++)
752 {
753 B(r, col[cj]) = val[cj];
754 }
755 }
756}
757
758void SparseMatrix::Mult(const Vector &x, Vector &y) const
759{
760 if (Finalized()) { y.UseDevice(true); }
761 y = 0.0;
762 AddMult(x, y);
763}
764
765void SparseMatrix::AddMult(const Vector &x, Vector &y, const real_t a) const
766{
767 MFEM_ASSERT(width == x.Size(), "Input vector size (" << x.Size()
768 << ") must match matrix width (" << width << ")");
769 MFEM_ASSERT(height == y.Size(), "Output vector size (" << y.Size()
770 << ") must match matrix height (" << height << ")");
771
772 if (!Finalized())
773 {
774 const real_t *xp = x.HostRead();
775 real_t *yp = y.HostReadWrite();
776
777 // The matrix is not finalized, but multiplication is still possible
778 for (int i = 0; i < height; i++)
779 {
780 RowNode *row = Rows[i];
781 real_t b = 0.0;
782 for ( ; row != NULL; row = row->Prev)
783 {
784 b += row->Value * xp[row->Column];
785 }
786 *yp += a * b;
787 yp++;
788 }
789 return;
790 }
791
792#ifndef MFEM_USE_LEGACY_OPENMP
793 const int height = this->height;
794 const int nnz = J.Capacity();
795 auto d_I = Read(I, height+1);
796 auto d_J = Read(J, nnz);
797 auto d_A = Read(A, nnz);
798 auto d_x = x.Read();
799 auto d_y = y.ReadWrite();
800
801 // Skip if matrix has no non-zeros
802 if (nnz == 0) {return;}
804 {
805#ifdef MFEM_USE_CUDA_OR_HIP
806 const real_t alpha = a;
807 const real_t beta = 1.0;
808
809 // Setup descriptors
810 if (!initBuffers)
811 {
812#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
813 // Setup matrix descriptor
814 MFEM_cu_or_hip(sparseCreateCsr)(
816 Width(),
817 J.Capacity(),
818 const_cast<int *>(d_I),
819 const_cast<int *>(d_J),
820 const_cast<real_t *>(d_A),
821 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
822 MFEM_CU_or_HIP(SPARSE_INDEX_32I),
823 MFEM_CU_or_HIP(SPARSE_INDEX_BASE_ZERO),
824#ifdef MFEM_USE_SINGLE
825 MFEM_CUDA_or_HIP(_R_32F));
826#else
827 MFEM_CUDA_or_HIP(_R_64F));
828#endif
829
830 // Create handles for input/output vectors
831 MFEM_cu_or_hip(sparseCreateDnVec)(&vecX_descr,
832 x.Size(),
833 const_cast<real_t *>(d_x),
834#ifdef MFEM_USE_SINGLE
835 MFEM_CUDA_or_HIP(_R_32F));
836#else
837 MFEM_CUDA_or_HIP(_R_64F));
838#endif
839 MFEM_cu_or_hip(sparseCreateDnVec)(&vecY_descr, y.Size(), d_y,
840#ifdef MFEM_USE_SINGLE
841 MFEM_CUDA_or_HIP(_R_32F));
842#else
843 MFEM_CUDA_or_HIP(_R_64F));
844#endif
845#else
846 cusparseCreateMatDescr(&matA_descr);
847 cusparseSetMatIndexBase(matA_descr, CUSPARSE_INDEX_BASE_ZERO);
848 cusparseSetMatType(matA_descr, CUSPARSE_MATRIX_TYPE_GENERAL);
849#endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
850 initBuffers = true;
851 }
852 // Allocate kernel space. Buffer is shared between different sparsemats
853 size_t newBufferSize = 0;
854
855 MFEM_cu_or_hip(sparseSpMV_bufferSize)(
856 handle,
857 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
858 &alpha,
861 &beta,
863#ifdef MFEM_USE_SINGLE
864 MFEM_CUDA_or_HIP(_R_32F),
865#else
866 MFEM_CUDA_or_HIP(_R_64F),
867#endif
868 MFEM_GPUSPARSE_ALG,
869 &newBufferSize);
870
871 // Check if we need to resize
872 if (newBufferSize > bufferSize)
873 {
874 bufferSize = newBufferSize;
875 if (dBuffer != nullptr) { MFEM_Cu_or_Hip(MemFree)(dBuffer); }
876 MFEM_Cu_or_Hip(MemAlloc)(&dBuffer, bufferSize);
877 }
878
879#if CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
880 // Update input/output vectors
881 MFEM_cu_or_hip(sparseDnVecSetValues)(vecX_descr,
882 const_cast<real_t *>(d_x));
883 MFEM_cu_or_hip(sparseDnVecSetValues)(vecY_descr, d_y);
884
885 // Y = alpha A * X + beta * Y
886 MFEM_cu_or_hip(sparseSpMV)(
887 handle,
888 MFEM_CU_or_HIP(SPARSE_OPERATION_NON_TRANSPOSE),
889 &alpha,
892 &beta,
894#ifdef MFEM_USE_SINGLE
895 MFEM_CUDA_or_HIP(_R_32F),
896#else
897 MFEM_CUDA_or_HIP(_R_64F),
898#endif
899 MFEM_GPUSPARSE_ALG,
900 dBuffer);
901#else
902#ifdef MFEM_USE_SINGLE
903 cusparseScsrmv(handle,
904#else
905 cusparseDcsrmv(handle,
906#endif
907 CUSPARSE_OPERATION_NON_TRANSPOSE,
908 Height(),
909 Width(),
910 J.Capacity(),
911 &alpha,
913 const_cast<real_t *>(d_A),
914 const_cast<int *>(d_I),
915 const_cast<int *>(d_J),
916 const_cast<real_t *>(d_x),
917 &beta,
918 d_y);
919#endif // CUDA_VERSION >= 10010 || defined(MFEM_USE_HIP)
920#endif // MFEM_USE_CUDA_OR_HIP
921 }
922 else
923 {
924 // Native version
925 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
926 {
927 real_t d = 0.0;
928 const int end = d_I[i+1];
929 for (int j = d_I[i]; j < end; j++)
930 {
931 d += d_A[j] * d_x[d_J[j]];
932 }
933 d_y[i] += a * d;
934 });
935
936 }
937
938#else // MFEM_USE_LEGACY_OPENMP
939 const real_t *Ap = A, *xp = x.GetData();
940 real_t *yp = y.GetData();
941 const int *Jp = J, *Ip = I;
942
943 #pragma omp parallel for
944 for (int i = 0; i < height; i++)
945 {
946 real_t d = 0.0;
947 const int end = Ip[i+1];
948 for (int j = Ip[i]; j < end; j++)
949 {
950 d += Ap[j] * xp[Jp[j]];
951 }
952 yp[i] += a * d;
953 }
954#endif // MFEM_USE_LEGACY_OPENMP
955}
956
958{
959 if (Finalized()) { y.UseDevice(true); }
960 y = 0.0;
961 AddMultTranspose(x, y);
962}
963
965 const real_t a) const
966{
967 MFEM_ASSERT(height == x.Size(), "Input vector size (" << x.Size()
968 << ") must match matrix height (" << height << ")");
969 MFEM_ASSERT(width == y.Size(), "Output vector size (" << y.Size()
970 << ") must match matrix width (" << width << ")");
971
972 if (!Finalized())
973 {
974 real_t *yp = y.HostReadWrite();
975 const real_t *xp = x.HostRead();
976 // The matrix is not finalized, but multiplication is still possible
977 for (int i = 0; i < height; i++)
978 {
979 RowNode *row = Rows[i];
980 real_t b = a * xp[i];
981 for ( ; row != NULL; row = row->Prev)
982 {
983 yp[row->Column] += row->Value * b;
984 }
985 }
986 return;
987 }
988
990 if (At)
991 {
992 At->AddMult(x, y, a);
993 }
994 else
995 {
996 real_t *yp = y.HostReadWrite();
997 const real_t *xp = x.HostRead();
998
999 const int *Ip = HostRead(I, height+1);
1000 const int nnz = Ip[height];
1001 const int *Jp = HostRead(J, nnz);
1002 const real_t *Ap = HostRead(A, nnz);
1003
1004 for (int i = 0; i < height; i++)
1005 {
1006 const real_t xi = a * xp[i];
1007 const int end = Ip[i+1];
1008 for (int j = Ip[i]; j < end; j++)
1009 {
1010 const int Jj = Jp[j];
1011 yp[Jj] += Ap[j] * xi;
1012 }
1013 }
1014 }
1015}
1016
1018{
1019 if (At == NULL)
1020 {
1021 At = Transpose(*this);
1022 }
1023}
1024
1026{
1027 delete At;
1028 At = NULL;
1029}
1030
1032{
1034 {
1036 }
1037}
1038
1040 const Array<int> &rows, const Vector &x, Vector &y) const
1041{
1042 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
1043
1044 const int n = rows.Size();
1045 const int nnz = J.Capacity();
1046 auto d_rows = rows.Read();
1047 auto d_I = Read(I, height+1);
1048 auto d_J = Read(J, nnz);
1049 auto d_A = Read(A, nnz);
1050 auto d_x = x.Read();
1051 auto d_y = y.Write();
1052 mfem::forall(n, [=] MFEM_HOST_DEVICE (int i)
1053 {
1054 const int r = d_rows[i];
1055 const int end = d_I[r + 1];
1056 real_t a = 0.0;
1057 for (int j = d_I[r]; j < end; j++)
1058 {
1059 a += d_A[j] * d_x[d_J[j]];
1060 }
1061 d_y[r] = a;
1062 });
1063}
1064
1066 const Array<int> &rows, const Vector &x, Vector &y, const real_t a) const
1067{
1068 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
1069
1070 for (int i = 0; i < rows.Size(); i++)
1071 {
1072 int r = rows[i];
1073 int end = I[r + 1];
1074 real_t val = 0.0;
1075 for (int j = I[r]; j < end; j++)
1076 {
1077 val += A[j] * x(J[j]);
1078 }
1079 y(r) += a * val;
1080 }
1081}
1082
1084{
1085 MFEM_ASSERT(Finalized(), "Matrix must be finalized.");
1086 MFEM_ASSERT(x.Size() == Width(), "Input vector size (" << x.Size()
1087 << ") must match matrix width (" << Width() << ")");
1088
1090
1091 const int height = Height();
1092 const int nnz = J.Capacity();
1093 auto d_I = Read(I, height+1);
1094 auto d_J = Read(J, nnz);
1095 auto d_x = Read(x.GetMemory(), x.Size());
1096 auto d_y = Write(y.GetMemory(), y.Size());
1097 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
1098 {
1099 bool d_yi = false;
1100 const int end = d_I[i+1];
1101 for (int j = d_I[i]; j < end; j++)
1102 {
1103 if (d_x[d_J[j]])
1104 {
1105 d_yi = true;
1106 break;
1107 }
1108 }
1109 d_y[i] = d_yi;
1110 });
1111}
1112
1114 Array<int> &y) const
1115{
1116 MFEM_ASSERT(Finalized(), "Matrix must be finalized.");
1117 MFEM_ASSERT(x.Size() == Height(), "Input vector size (" << x.Size()
1118 << ") must match matrix height (" << Height() << ")");
1119
1120 y.SetSize(Width());
1121 y = 0;
1122
1123 for (int i = 0; i < Height(); i++)
1124 {
1125 if (x[i])
1126 {
1127 int end = I[i+1];
1128 for (int j = I[i]; j < end; j++)
1129 {
1130 y[J[j]] = x[i];
1131 }
1132 }
1133 }
1134}
1135
1136void SparseMatrix::AbsMult(const Vector &x, Vector &y) const
1137{
1138 MFEM_ASSERT(width == x.Size(), "Input vector size (" << x.Size()
1139 << ") must match matrix width (" << width << ")");
1140 MFEM_ASSERT(height == y.Size(), "Output vector size (" << y.Size()
1141 << ") must match matrix height (" << height << ")");
1142
1143 if (Finalized()) { y.UseDevice(true); }
1144 y = 0.0;
1145
1146 if (!Finalized())
1147 {
1148 const real_t *xp = x.HostRead();
1149 real_t *yp = y.HostReadWrite();
1150
1151 // The matrix is not finalized, but multiplication is still possible
1152 for (int i = 0; i < height; i++)
1153 {
1154 RowNode *row = Rows[i];
1155 real_t b = 0.0;
1156 for ( ; row != NULL; row = row->Prev)
1157 {
1158 b += std::abs(row->Value) * xp[row->Column];
1159 }
1160 *yp += b;
1161 yp++;
1162 }
1163 return;
1164 }
1165
1166 const int height = this->height;
1167 const int nnz = J.Capacity();
1168 auto d_I = Read(I, height+1);
1169 auto d_J = Read(J, nnz);
1170 auto d_A = Read(A, nnz);
1171 auto d_x = x.Read();
1172 auto d_y = y.ReadWrite();
1173 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
1174 {
1175 real_t d = 0.0;
1176 const int end = d_I[i+1];
1177 for (int j = d_I[i]; j < end; j++)
1178 {
1179 d += std::abs(d_A[j]) * d_x[d_J[j]];
1180 }
1181 d_y[i] += d;
1182 });
1183}
1184
1186{
1187 MFEM_ASSERT(height == x.Size(), "Input vector size (" << x.Size()
1188 << ") must match matrix height (" << height << ")");
1189 MFEM_ASSERT(width == y.Size(), "Output vector size (" << y.Size()
1190 << ") must match matrix width (" << width << ")");
1191
1192 y = 0.0;
1193
1194 if (!Finalized())
1195 {
1196 real_t *yp = y.GetData();
1197 // The matrix is not finalized, but multiplication is still possible
1198 for (int i = 0; i < height; i++)
1199 {
1200 RowNode *row = Rows[i];
1201 real_t b = x(i);
1202 for ( ; row != NULL; row = row->Prev)
1203 {
1204 yp[row->Column] += fabs(row->Value) * b;
1205 }
1206 }
1207 return;
1208 }
1209
1211 if (At)
1212 {
1213 At->AbsMult(x, y);
1214 }
1215 else
1216 {
1217 for (int i = 0; i < height; i++)
1218 {
1219 const real_t xi = x[i];
1220 const int end = I[i+1];
1221 for (int j = I[i]; j < end; j++)
1222 {
1223 const int Jj = J[j];
1224 y[Jj] += std::abs(A[j]) * xi;
1225 }
1226 }
1227 }
1228}
1229
1231{
1232 MFEM_ASSERT(x.Size() == Width(), "x.Size() = " << x.Size()
1233 << " must be equal to Width() = " << Width());
1234 MFEM_ASSERT(y.Size() == Height(), "y.Size() = " << y.Size()
1235 << " must be equal to Height() = " << Height());
1236
1237 x.HostRead();
1238 y.HostRead();
1239 if (Finalized())
1240 {
1241 const int nnz = J.Capacity();
1242 HostRead(I, height+1);
1243 HostRead(J, nnz);
1244 HostRead(A, nnz);
1245 }
1246
1247 real_t prod = 0.0;
1248 for (int i = 0; i < height; i++)
1249 {
1250 real_t a = 0.0;
1251 if (A)
1252 {
1253 for (int j = I[i], end = I[i+1]; j < end; j++)
1254 {
1255 a += A[j] * x(J[j]);
1256 }
1257 }
1258 else
1259 {
1260 for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)
1261 {
1262 a += np->Value * x(np->Column);
1263 }
1264 }
1265 prod += a * y(i);
1266 }
1267
1268 return prod;
1269}
1270
1272{
1273 if (Finalized())
1274 {
1275 auto d_I = ReadI();
1276 auto d_A = ReadData();
1277 auto d_x = x.Write();
1278 mfem::forall(height, [=] MFEM_HOST_DEVICE (int i)
1279 {
1280 real_t sum = 0.0;
1281 for (int j = d_I[i], end = d_I[i+1]; j < end; j++)
1282 {
1283 sum += d_A[j];
1284 }
1285 d_x[i] = sum;
1286 });
1287 }
1288 else
1289 {
1290 for (int i = 0; i < height; i++)
1291 {
1292 real_t a = 0.0;
1293 for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)
1294 {
1295 a += np->Value;
1296 }
1297 x(i) = a;
1298 }
1299 }
1300}
1301
1303{
1304 MFEM_VERIFY(irow < height,
1305 "row " << irow << " not in matrix with height " << height);
1306
1307 real_t a = 0.0;
1308 if (A)
1309 {
1310 for (int j = I[irow], end = I[irow+1]; j < end; j++)
1311 {
1312 a += fabs(A[j]);
1313 }
1314 }
1315 else
1316 {
1317 for (RowNode *np = Rows[irow]; np != NULL; np = np->Prev)
1318 {
1319 a += fabs(np->Value);
1320 }
1321 }
1322
1323 return a;
1324}
1325
1326void SparseMatrix::Threshold(real_t tol, bool fix_empty_rows)
1327{
1328 MFEM_ASSERT(Finalized(), "Matrix must be finalized.");
1329 real_t atol;
1330 atol = std::abs(tol);
1331
1332 fix_empty_rows = height == width ? fix_empty_rows : false;
1333
1334 real_t *newA;
1335 int *newI, *newJ;
1336 int i, j, nz;
1337
1338 newI = Memory<int>(height+1);
1339 newI[0] = 0;
1340 for (i = 0, nz = 0; i < height; i++)
1341 {
1342 bool found = false;
1343 for (j = I[i]; j < I[i+1]; j++)
1344 if (std::abs(A[j]) > atol)
1345 {
1346 found = true;
1347 nz++;
1348 }
1349 if (fix_empty_rows && !found) { nz++; }
1350 newI[i+1] = nz;
1351 }
1352
1353 newJ = Memory<int>(nz);
1354 newA = Memory<real_t>(nz);
1355 // Assume we're sorted until we find out otherwise
1356 isSorted = true;
1357 for (i = 0, nz = 0; i < height; i++)
1358 {
1359 bool found = false;
1360 int lastCol = -1;
1361 for (j = I[i]; j < I[i+1]; j++)
1362 if (std::abs(A[j]) > atol)
1363 {
1364 found = true;
1365 newJ[nz] = J[j];
1366 newA[nz] = A[j];
1367 if ( lastCol > newJ[nz] )
1368 {
1369 isSorted = false;
1370 }
1371 lastCol = newJ[nz];
1372 nz++;
1373 }
1374 if (fix_empty_rows && !found)
1375 {
1376 newJ[nz] = i;
1377 newA[nz] = 0.0;
1378 nz++;
1379 }
1380 }
1381 Destroy();
1382 I.Wrap(newI, height+1, true);
1383 J.Wrap(newJ, I[height], true);
1384 A.Wrap(newA, I[height], true);
1385}
1386
1387void SparseMatrix::Finalize(int skip_zeros, bool fix_empty_rows)
1388{
1389 int i, j, nr, nz;
1390 RowNode *aux;
1391
1392 if (Finalized())
1393 {
1394 return;
1395 }
1396
1397 delete [] ColPtrNode;
1398 ColPtrNode = NULL;
1399
1400 I.New(height+1);
1401 I[0] = 0;
1402 for (i = 1; i <= height; i++)
1403 {
1404 nr = 0;
1405 for (aux = Rows[i-1]; aux != NULL; aux = aux->Prev)
1406 {
1407 if (skip_zeros && aux->Value == 0.0)
1408 {
1409 if (skip_zeros == 2) { continue; }
1410 if ((i-1) != aux->Column) { continue; }
1411
1412 bool found = false;
1413 real_t found_val = 0.0; // init to suppress gcc warning
1414 for (RowNode *other = Rows[aux->Column]; other != NULL; other = other->Prev)
1415 {
1416 if (other->Column == (i-1))
1417 {
1418 found = true;
1419 found_val = other->Value;
1420 break;
1421 }
1422 }
1423 if (found && found_val == 0.0) { continue; }
1424
1425 }
1426 nr++;
1427 }
1428 if (fix_empty_rows && !nr) { nr = 1; }
1429 I[i] = I[i-1] + nr;
1430 }
1431
1432 nz = I[height];
1433 J.New(nz);
1434 A.New(nz);
1435 // Assume we're sorted until we find out otherwise
1436 isSorted = true;
1437 for (j = i = 0; i < height; i++)
1438 {
1439 int lastCol = -1;
1440 nr = 0;
1441 for (aux = Rows[i]; aux != NULL; aux = aux->Prev)
1442 {
1443 if (skip_zeros && aux->Value == 0.0)
1444 {
1445 if (skip_zeros == 2) { continue; }
1446 if (i != aux->Column) { continue; }
1447
1448 bool found = false;
1449 real_t found_val = 0.0; // init to suppress gcc warning
1450 for (RowNode *other = Rows[aux->Column]; other != NULL; other = other->Prev)
1451 {
1452 if (other->Column == i)
1453 {
1454 found = true;
1455 found_val = other->Value;
1456 break;
1457 }
1458 }
1459 if (found && found_val == 0.0) { continue; }
1460 }
1461
1462 J[j] = aux->Column;
1463 A[j] = aux->Value;
1464
1465 if ( lastCol > J[j] )
1466 {
1467 isSorted = false;
1468 }
1469 lastCol = J[j];
1470
1471 j++;
1472 nr++;
1473 }
1474 if (fix_empty_rows && !nr)
1475 {
1476 J[j] = i;
1477 A[j] = 1.0;
1478 j++;
1479 }
1480 }
1481
1482#ifdef MFEM_USE_MEMALLOC
1483 delete NodesMem;
1484 NodesMem = NULL;
1485#else
1486 for (i = 0; i < height; i++)
1487 {
1488 RowNode *node_p = Rows[i];
1489 while (node_p != NULL)
1490 {
1491 aux = node_p;
1492 node_p = node_p->Prev;
1493 delete aux;
1494 }
1495 }
1496#endif
1497
1498 delete [] Rows;
1499 Rows = NULL;
1500}
1501
1503{
1504 int br = blocks.NumRows(), bc = blocks.NumCols();
1505 int nr = (height + br - 1)/br, nc = (width + bc - 1)/bc;
1506
1507 for (int j = 0; j < bc; j++)
1508 {
1509 for (int i = 0; i < br; i++)
1510 {
1511 int *bI = Memory<int>(nr + 1);
1512 for (int k = 0; k <= nr; k++)
1513 {
1514 bI[k] = 0;
1515 }
1516 blocks(i,j) = new SparseMatrix(bI, NULL, NULL, nr, nc);
1517 }
1518 }
1519
1520 for (int gr = 0; gr < height; gr++)
1521 {
1522 int bi = gr/nr, i = gr%nr + 1;
1523 if (Finalized())
1524 {
1525 for (int j = I[gr]; j < I[gr+1]; j++)
1526 {
1527 if (A[j] != 0.0)
1528 {
1529 blocks(bi, J[j]/nc)->I[i]++;
1530 }
1531 }
1532 }
1533 else
1534 {
1535 for (RowNode *n_p = Rows[gr]; n_p != NULL; n_p = n_p->Prev)
1536 {
1537 if (n_p->Value != 0.0)
1538 {
1539 blocks(bi, n_p->Column/nc)->I[i]++;
1540 }
1541 }
1542 }
1543 }
1544
1545 for (int j = 0; j < bc; j++)
1546 {
1547 for (int i = 0; i < br; i++)
1548 {
1549 SparseMatrix &b = *blocks(i,j);
1550 int nnz = 0, rs;
1551 for (int k = 1; k <= nr; k++)
1552 {
1553 rs = b.I[k], b.I[k] = nnz, nnz += rs;
1554 }
1555 b.J.New(nnz);
1556 b.A.New(nnz);
1557 }
1558 }
1559
1560 for (int gr = 0; gr < height; gr++)
1561 {
1562 int bi = gr/nr, i = gr%nr + 1;
1563 if (Finalized())
1564 {
1565 for (int j = I[gr]; j < I[gr+1]; j++)
1566 {
1567 if (A[j] != 0.0)
1568 {
1569 SparseMatrix &b = *blocks(bi, J[j]/nc);
1570 b.J[b.I[i]] = J[j] % nc;
1571 b.A[b.I[i]] = A[j];
1572 b.I[i]++;
1573 }
1574 }
1575 }
1576 else
1577 {
1578 for (RowNode *n_p = Rows[gr]; n_p != NULL; n_p = n_p->Prev)
1579 {
1580 if (n_p->Value != 0.0)
1581 {
1582 SparseMatrix &b = *blocks(bi, n_p->Column/nc);
1583 b.J[b.I[i]] = n_p->Column % nc;
1584 b.A[b.I[i]] = n_p->Value;
1585 b.I[i]++;
1586 }
1587 }
1588 }
1589 }
1590}
1591
1593{
1594 if (height != width)
1595 {
1596 return infinity();
1597 }
1598
1599 real_t symm = 0.0;
1600 if (Empty())
1601 {
1602 // return 0.0;
1603 }
1604 else if (Finalized())
1605 {
1606 for (int i = 1; i < height; i++)
1607 {
1608 for (int j = I[i]; j < I[i+1]; j++)
1609 {
1610 if (J[j] < i)
1611 {
1612 symm = std::max(symm, std::abs(A[j]-(*this)(J[j],i)));
1613 }
1614 }
1615 }
1616 }
1617 else
1618 {
1619 for (int i = 0; i < height; i++)
1620 {
1621 for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)
1622 {
1623 int col = node_p->Column;
1624 if (col < i)
1625 {
1626 symm = std::max(symm, std::abs(node_p->Value-(*this)(col,i)));
1627 }
1628 }
1629 }
1630 }
1631 return symm;
1632}
1633
1635{
1636 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
1637
1638 int i, j;
1639 for (i = 1; i < height; i++)
1640 {
1641 for (j = I[i]; j < I[i+1]; j++)
1642 {
1643 if (J[j] < i)
1644 {
1645 A[j] += (*this)(J[j],i);
1646 A[j] *= 0.5;
1647 (*this)(J[j],i) = A[j];
1648 }
1649 }
1650 }
1651}
1652
1654{
1655 if (Finalized())
1656 {
1657 HostReadI();
1658 return I[height];
1659 }
1660 else
1661 {
1662 int nnz = 0;
1663
1664 for (int i = 0; i < height; i++)
1665 {
1666 for (RowNode *node_p = Rows[i]; node_p != NULL; node_p = node_p->Prev)
1667 {
1668 nnz++;
1669 }
1670 }
1671
1672 return nnz;
1673 }
1674}
1675
1677{
1678 real_t m = 0.0;
1679
1680 if (A)
1681 {
1682 int nnz = I[height];
1683 for (int j = 0; j < nnz; j++)
1684 {
1685 m = std::max(m, std::abs(A[j]));
1686 }
1687 }
1688 else
1689 {
1690 for (int i = 0; i < height; i++)
1691 {
1692 for (RowNode *n_p = Rows[i]; n_p != NULL; n_p = n_p->Prev)
1693 {
1694 m = std::max(m, std::abs(n_p->Value));
1695 }
1696 }
1697 }
1698 return m;
1699}
1700
1702{
1703 int counter = 0;
1704
1705 if (A)
1706 {
1707 const int nz = I[height];
1708 const real_t *Ap = A;
1709
1710 for (int i = 0; i < nz; i++)
1711 {
1712 counter += (std::abs(Ap[i]) <= tol);
1713 }
1714 }
1715 else
1716 {
1717 for (int i = 0; i < height; i++)
1718 {
1719 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1720 {
1721 counter += (std::abs(aux->Value) <= tol);
1722 }
1723 }
1724 }
1725
1726 return counter;
1727}
1728
1730{
1731 if (Empty())
1732 {
1733 return 0;
1734 }
1735 else if (Finalized())
1736 {
1737 return mfem::CheckFinite(A, I[height]);
1738 }
1739 else
1740 {
1741 int counter = 0;
1742 for (int i = 0; i < height; i++)
1743 {
1744 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1745 {
1746 counter += !IsFinite(aux->Value);
1747 }
1748 }
1749 return counter;
1750 }
1751}
1752
1754{
1755 return NULL;
1756}
1757
1758void SparseMatrix::EliminateRow(int row, const real_t sol, Vector &rhs)
1759{
1760 RowNode *aux;
1761
1762 MFEM_ASSERT(row < height && row >= 0,
1763 "Row " << row << " not in matrix of height " << height);
1764
1765 MFEM_VERIFY(!Finalized(), "Matrix must NOT be finalized.");
1766
1767 for (aux = Rows[row]; aux != NULL; aux = aux->Prev)
1768 {
1769 rhs(aux->Column) -= sol * aux->Value;
1770 aux->Value = 0.0;
1771 }
1772}
1773
1775{
1776 RowNode *aux;
1777
1778 MFEM_ASSERT(row < height && row >= 0,
1779 "Row " << row << " not in matrix of height " << height);
1780 MFEM_ASSERT(dpolicy != DIAG_KEEP, "Diagonal policy must not be DIAG_KEEP");
1781 MFEM_ASSERT(dpolicy != DIAG_ONE || height == width,
1782 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1783 << height << ", width = " << width);
1784
1785 if (Rows == NULL)
1786 {
1787 for (int i=I[row]; i < I[row+1]; ++i)
1788 {
1789 A[i]=0.0;
1790 }
1791 }
1792 else
1793 {
1794 for (aux = Rows[row]; aux != NULL; aux = aux->Prev)
1795 {
1796 aux->Value = 0.0;
1797 }
1798 }
1799
1800 if (dpolicy == DIAG_ONE)
1801 {
1802 SearchRow(row, row) = 1.;
1803 }
1804}
1805
1807{
1808 MFEM_ASSERT(col < width && col >= 0,
1809 "Col " << col << " not in matrix of width " << width);
1810 MFEM_ASSERT(dpolicy != DIAG_KEEP, "Diagonal policy must not be DIAG_KEEP");
1811 MFEM_ASSERT(dpolicy != DIAG_ONE || height == width,
1812 "if dpolicy == DIAG_ONE, matrix must be square, not height = "
1813 << height << ", width = " << width);
1814
1815 if (Rows == NULL)
1816 {
1817 const int nnz = I[height];
1818 for (int jpos = 0; jpos != nnz; ++jpos)
1819 {
1820 if (J[jpos] == col)
1821 {
1822 A[jpos] = 0.0;
1823 }
1824 }
1825 }
1826 else
1827 {
1828 for (int i = 0; i < height; i++)
1829 {
1830 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1831 {
1832 if (aux->Column == col)
1833 {
1834 aux->Value = 0.0;
1835 break;
1836 }
1837 }
1838 }
1839 }
1840
1841 if (dpolicy == DIAG_ONE)
1842 {
1843 SearchRow(col, col) = 1.0;
1844 }
1845}
1846
1848 Vector *b)
1849{
1850 if (Rows == NULL)
1851 {
1852 for (int i = 0; i < height; i++)
1853 {
1854 for (int jpos = I[i]; jpos != I[i+1]; ++jpos)
1855 {
1856 if (cols[ J[jpos]])
1857 {
1858 if (x && b)
1859 {
1860 (*b)(i) -= A[jpos] * (*x)( J[jpos] );
1861 }
1862 A[jpos] = 0.0;
1863 }
1864 }
1865 }
1866 }
1867 else
1868 {
1869 for (int i = 0; i < height; i++)
1870 {
1871 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
1872 {
1873 if (cols[aux -> Column])
1874 {
1875 if (x && b)
1876 {
1877 (*b)(i) -= aux -> Value * (*x)(aux -> Column);
1878 }
1879 aux->Value = 0.0;
1880 }
1881 }
1882 }
1883 }
1884}
1885
1887{
1888 if (Rows)
1889 {
1890 RowNode *nd;
1891 for (int row = 0; row < height; row++)
1892 {
1893 for (nd = Rows[row]; nd != NULL; nd = nd->Prev)
1894 {
1895 if (col_marker[nd->Column])
1896 {
1897 Ae.Add(row, nd->Column, nd->Value);
1898 nd->Value = 0.0;
1899 }
1900 }
1901 }
1902 }
1903 else
1904 {
1905 for (int row = 0; row < height; row++)
1906 {
1907 for (int j = I[row]; j < I[row+1]; j++)
1908 {
1909 if (col_marker[J[j]])
1910 {
1911 Ae.Add(row, J[j], A[j]);
1912 A[j] = 0.0;
1913 }
1914 }
1915 }
1916 }
1917}
1918
1919
1920void SparseMatrix::EliminateRowCol(int rc, const real_t sol, Vector &rhs,
1921 DiagonalPolicy dpolicy)
1922{
1923 MFEM_ASSERT(rc < height && rc >= 0,
1924 "Row " << rc << " not in matrix of height " << height);
1928
1929 if (Rows == NULL)
1930 {
1931 for (int j = I[rc]; j < I[rc+1]; j++)
1932 {
1933 const int col = J[j];
1934 if (col == rc)
1935 {
1936 switch (dpolicy)
1937 {
1938 case DIAG_KEEP:
1939 rhs(rc) = A[j] * sol;
1940 break;
1941 case DIAG_ONE:
1942 A[j] = 1.0;
1943 rhs(rc) = sol;
1944 break;
1945 case DIAG_ZERO:
1946 A[j] = 0.;
1947 rhs(rc) = 0.;
1948 break;
1949 default:
1950 mfem_error("SparseMatrix::EliminateRowCol () #2");
1951 break;
1952 }
1953 }
1954 else
1955 {
1956 A[j] = 0.0;
1957 for (int k = I[col]; 1; k++)
1958 {
1959 if (k == I[col+1])
1960 {
1961 mfem_error("SparseMatrix::EliminateRowCol () #3");
1962 }
1963 else if (J[k] == rc)
1964 {
1965 rhs(col) -= sol * A[k];
1966 A[k] = 0.0;
1967 break;
1968 }
1969 }
1970 }
1971 }
1972 }
1973 else
1974 {
1975 for (RowNode *aux = Rows[rc]; aux != NULL; aux = aux->Prev)
1976 {
1977 const int col = aux->Column;
1978 if (col == rc)
1979 {
1980 switch (dpolicy)
1981 {
1982 case DIAG_KEEP:
1983 rhs(rc) = aux->Value * sol;
1984 break;
1985 case DIAG_ONE:
1986 aux->Value = 1.0;
1987 rhs(rc) = sol;
1988 break;
1989 case DIAG_ZERO:
1990 aux->Value = 0.;
1991 rhs(rc) = 0.;
1992 break;
1993 default:
1994 mfem_error("SparseMatrix::EliminateRowCol () #4");
1995 break;
1996 }
1997 }
1998 else
1999 {
2000 aux->Value = 0.0;
2001 for (RowNode *node = Rows[col]; 1; node = node->Prev)
2002 {
2003 if (node == NULL)
2004 {
2005 mfem_error("SparseMatrix::EliminateRowCol () #5");
2006 }
2007 else if (node->Column == rc)
2008 {
2009 rhs(col) -= sol * node->Value;
2010 node->Value = 0.0;
2011 break;
2012 }
2013 }
2014 }
2015 }
2016 }
2017}
2018
2020 DenseMatrix &rhs,
2021 DiagonalPolicy dpolicy)
2022{
2023 MFEM_ASSERT(rc < height && rc >= 0,
2024 "Row " << rc << " not in matrix of height " << height);
2025 MFEM_ASSERT(sol.Size() == rhs.Width(), "solution size (" << sol.Size()
2026 << ") must match rhs width (" << rhs.Width() << ")");
2027
2028 const int num_rhs = rhs.Width();
2029 if (Rows == NULL)
2030 {
2031 for (int j = I[rc]; j < I[rc+1]; j++)
2032 {
2033 const int col = J[j];
2034 if (col == rc)
2035 {
2036 switch (dpolicy)
2037 {
2038 case DIAG_KEEP:
2039 for (int r = 0; r < num_rhs; r++)
2040 {
2041 rhs(rc,r) = A[j] * sol(r);
2042 }
2043 break;
2044 case DIAG_ONE:
2045 A[j] = 1.0;
2046 for (int r = 0; r < num_rhs; r++)
2047 {
2048 rhs(rc,r) = sol(r);
2049 }
2050 break;
2051 case DIAG_ZERO:
2052 A[j] = 0.;
2053 for (int r = 0; r < num_rhs; r++)
2054 {
2055 rhs(rc,r) = 0.;
2056 }
2057 break;
2058 default:
2059 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #3");
2060 break;
2061 }
2062 }
2063 else
2064 {
2065 A[j] = 0.0;
2066 for (int k = I[col]; 1; k++)
2067 {
2068 if (k == I[col+1])
2069 {
2070 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #4");
2071 }
2072 else if (J[k] == rc)
2073 {
2074 for (int r = 0; r < num_rhs; r++)
2075 {
2076 rhs(col,r) -= sol(r) * A[k];
2077 }
2078 A[k] = 0.0;
2079 break;
2080 }
2081 }
2082 }
2083 }
2084 }
2085 else
2086 {
2087 for (RowNode *aux = Rows[rc]; aux != NULL; aux = aux->Prev)
2088 {
2089 const int col = aux->Column;
2090 if (col == rc)
2091 {
2092 switch (dpolicy)
2093 {
2094 case DIAG_KEEP:
2095 for (int r = 0; r < num_rhs; r++)
2096 {
2097 rhs(rc,r) = aux->Value * sol(r);
2098 }
2099 break;
2100 case DIAG_ONE:
2101 aux->Value = 1.0;
2102 for (int r = 0; r < num_rhs; r++)
2103 {
2104 rhs(rc,r) = sol(r);
2105 }
2106 break;
2107 case DIAG_ZERO:
2108 aux->Value = 0.;
2109 for (int r = 0; r < num_rhs; r++)
2110 {
2111 rhs(rc,r) = 0.;
2112 }
2113 break;
2114 default:
2115 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #5");
2116 break;
2117 }
2118 }
2119 else
2120 {
2121 aux->Value = 0.0;
2122 for (RowNode *node = Rows[col]; 1; node = node->Prev)
2123 {
2124 if (node == NULL)
2125 {
2126 mfem_error("SparseMatrix::EliminateRowColMultipleRHS() #6");
2127 }
2128 else if (node->Column == rc)
2129 {
2130 for (int r = 0; r < num_rhs; r++)
2131 {
2132 rhs(col,r) -= sol(r) * node->Value;
2133 }
2134 node->Value = 0.0;
2135 break;
2136 }
2137 }
2138 }
2139 }
2140 }
2141}
2142
2144{
2145 MFEM_ASSERT(rc < height && rc >= 0,
2146 "Row " << rc << " not in matrix of height " << height);
2147
2148 if (Rows == NULL)
2149 {
2150 const auto &II = this->I; // only use const access for I
2151 const auto &JJ = this->J; // only use const access for J
2152 for (int j = II[rc]; j < II[rc+1]; j++)
2153 {
2154 const int col = JJ[j];
2155 if (col == rc)
2156 {
2157 if (dpolicy == DIAG_ONE)
2158 {
2159 A[j] = 1.0;
2160 }
2161 else if (dpolicy == DIAG_ZERO)
2162 {
2163 A[j] = 0.0;
2164 }
2165 }
2166 else
2167 {
2168 A[j] = 0.0;
2169 for (int k = II[col]; 1; k++)
2170 {
2171 if (k == II[col+1])
2172 {
2173 mfem_error("SparseMatrix::EliminateRowCol() #2");
2174 }
2175 else if (JJ[k] == rc)
2176 {
2177 A[k] = 0.0;
2178 break;
2179 }
2180 }
2181 }
2182 }
2183 }
2184 else
2185 {
2186 RowNode *aux, *node;
2187
2188 for (aux = Rows[rc]; aux != NULL; aux = aux->Prev)
2189 {
2190 const int col = aux->Column;
2191 if (col == rc)
2192 {
2193 if (dpolicy == DIAG_ONE)
2194 {
2195 aux->Value = 1.0;
2196 }
2197 else if (dpolicy == DIAG_ZERO)
2198 {
2199 aux->Value = 0.;
2200 }
2201 }
2202 else
2203 {
2204 aux->Value = 0.0;
2205 for (node = Rows[col]; 1; node = node->Prev)
2206 {
2207 if (node == NULL)
2208 {
2209 mfem_error("SparseMatrix::EliminateRowCol() #3");
2210 }
2211 else if (node->Column == rc)
2212 {
2213 node->Value = 0.0;
2214 break;
2215 }
2216 }
2217 }
2218 }
2219 }
2220}
2221
2222// This is almost identical to EliminateRowCol(int, int), except for
2223// the A[j] = value; and aux->Value = value; lines.
2225{
2226 MFEM_ASSERT(rc < height && rc >= 0,
2227 "Row " << rc << " not in matrix of height " << height);
2228
2229 if (Rows == NULL)
2230 {
2231 for (int j = I[rc]; j < I[rc+1]; j++)
2232 {
2233 const int col = J[j];
2234 if (col == rc)
2235 {
2236 A[j] = value;
2237 }
2238 else
2239 {
2240 A[j] = 0.0;
2241 for (int k = I[col]; 1; k++)
2242 {
2243 if (k == I[col+1])
2244 {
2245 mfem_error("SparseMatrix::EliminateRowCol() #2");
2246 }
2247 else if (J[k] == rc)
2248 {
2249 A[k] = 0.0;
2250 break;
2251 }
2252 }
2253 }
2254 }
2255 }
2256 else
2257 {
2258 RowNode *aux, *node;
2259
2260 for (aux = Rows[rc]; aux != NULL; aux = aux->Prev)
2261 {
2262 const int col = aux->Column;
2263 if (col == rc)
2264 {
2265 aux->Value = value;
2266 }
2267 else
2268 {
2269 aux->Value = 0.0;
2270 for (node = Rows[col]; 1; node = node->Prev)
2271 {
2272 if (node == NULL)
2273 {
2274 mfem_error("SparseMatrix::EliminateRowCol() #3");
2275 }
2276 else if (node->Column == rc)
2277 {
2278 node->Value = 0.0;
2279 break;
2280 }
2281 }
2282 }
2283 }
2284 }
2285}
2286
2288 DiagonalPolicy dpolicy)
2289{
2290 if (Rows)
2291 {
2292 RowNode *nd, *nd2;
2293 for (nd = Rows[rc]; nd != NULL; nd = nd->Prev)
2294 {
2295 const int col = nd->Column;
2296 if (col == rc)
2297 {
2298 switch (dpolicy)
2299 {
2300 case DIAG_ONE:
2301 Ae.Add(rc, rc, nd->Value - 1.0);
2302 nd->Value = 1.0;
2303 break;
2304 case DIAG_ZERO:
2305 Ae.Add(rc, rc, nd->Value);
2306 nd->Value = 0.;
2307 break;
2308 case DIAG_KEEP:
2309 break;
2310 default:
2311 mfem_error("SparseMatrix::EliminateRowCol #1");
2312 break;
2313 }
2314 }
2315 else
2316 {
2317 Ae.Add(rc, col, nd->Value);
2318 nd->Value = 0.0;
2319 for (nd2 = Rows[col]; 1; nd2 = nd2->Prev)
2320 {
2321 if (nd2 == NULL)
2322 {
2323 mfem_error("SparseMatrix::EliminateRowCol #2");
2324 }
2325 else if (nd2->Column == rc)
2326 {
2327 Ae.Add(col, rc, nd2->Value);
2328 nd2->Value = 0.0;
2329 break;
2330 }
2331 }
2332 }
2333 }
2334 }
2335 else
2336 {
2337 for (int j = I[rc]; j < I[rc+1]; j++)
2338 {
2339 const int col = J[j];
2340 if (col == rc)
2341 {
2342 switch (dpolicy)
2343 {
2344 case DIAG_ONE:
2345 Ae.Add(rc, rc, A[j] - 1.0);
2346 A[j] = 1.0;
2347 break;
2348 case DIAG_ZERO:
2349 Ae.Add(rc, rc, A[j]);
2350 A[j] = 0.;
2351 break;
2352 case DIAG_KEEP:
2353 break;
2354 default:
2355 mfem_error("SparseMatrix::EliminateRowCol #3");
2356 break;
2357 }
2358 }
2359 else
2360 {
2361 Ae.Add(rc, col, A[j]);
2362 A[j] = 0.0;
2363 for (int k = I[col]; true; k++)
2364 {
2365 if (k == I[col+1])
2366 {
2367 mfem_error("SparseMatrix::EliminateRowCol #4");
2368 }
2369 else if (J[k] == rc)
2370 {
2371 Ae.Add(col, rc, A[k]);
2372 A[k] = 0.0;
2373 break;
2374 }
2375 }
2376 }
2377 }
2378 }
2379}
2380
2382 DiagonalPolicy diag_policy)
2383{
2384 const int n_ess_dofs = ess_dofs.Size();
2385 const auto ess_dofs_d = ess_dofs.Read();
2386 const auto dI = ReadI();
2387 const auto dJ = ReadJ();
2388 auto dA = ReadWriteData();
2389
2390 mfem::forall(n_ess_dofs, [=] MFEM_HOST_DEVICE (int i)
2391 {
2392 const int idof = ess_dofs_d[i];
2393 for (int j=dI[idof]; j<dI[idof+1]; ++j)
2394 {
2395 const int jdof = dJ[j];
2396 if (jdof != idof)
2397 {
2398 dA[j] = 0.0;
2399 for (int k=dI[jdof]; k<dI[jdof+1]; ++k)
2400 {
2401 if (dJ[k] == idof)
2402 {
2403 dA[k] = 0.0;
2404 break;
2405 }
2406 }
2407 }
2408 else
2409 {
2410 if (diag_policy == DiagonalPolicy::DIAG_ONE)
2411 {
2412 dA[j] = 1.0;
2413 }
2414 else if (diag_policy == DiagonalPolicy::DIAG_ZERO)
2415 {
2416 dA[j] = 0.0;
2417 }
2418 // else (diag_policy == DiagonalPolicy::DIAG_KEEP)
2419 }
2420 }
2421 });
2422}
2423
2425{
2426 for (int i = 0; i < height; i++)
2427 {
2428 if (I[i+1] == I[i]+1 && fabs(A[I[i]]) < 1e-16)
2429 {
2430 A[I[i]] = 1.0;
2431 }
2432 }
2433}
2434
2436{
2437 for (int i = 0; i < height; i++)
2438 {
2439 real_t zero = 0.0;
2440 for (int j = I[i]; j < I[i+1]; j++)
2441 {
2442 zero += fabs(A[j]);
2443 }
2444 if (zero <= threshold)
2445 {
2446 for (int j = I[i]; j < I[i+1]; j++)
2447 {
2448 A[j] = (J[j] == i) ? 1.0 : 0.0;
2449 }
2450 }
2451 }
2452}
2453
2455{
2456 if (!Finalized())
2457 {
2458 real_t *yp = y.GetData();
2459 const real_t *xp = x.GetData();
2460 RowNode *diag_p, *n_p, **R = Rows;
2461
2462 const int s = height;
2463 for (int i = 0; i < s; i++)
2464 {
2465 real_t sum = 0.0;
2466 diag_p = NULL;
2467 for (n_p = R[i]; n_p != NULL; n_p = n_p->Prev)
2468 {
2469 const int c = n_p->Column;
2470 if (c == i)
2471 {
2472 diag_p = n_p;
2473 }
2474 else
2475 {
2476 sum += n_p->Value * yp[c];
2477 }
2478 }
2479
2480 if (diag_p != NULL && diag_p->Value != 0.0)
2481 {
2482 yp[i] = (xp[i] - sum) / diag_p->Value;
2483 }
2484 else if (xp[i] == sum)
2485 {
2486 yp[i] = sum;
2487 }
2488 else
2489 {
2490 mfem_error("SparseMatrix::Gauss_Seidel_forw()");
2491 }
2492 }
2493 }
2494 else
2495 {
2496 const int s = height;
2497 const int nnz = J.Capacity();
2498 const int *Ip = HostRead(I, s+1);
2499 const int *Jp = HostRead(J, nnz);
2500 const real_t *Ap = HostRead(A, nnz);
2501 real_t *yp = y.HostReadWrite();
2502 const real_t *xp = x.HostRead();
2503
2504 for (int i = 0, j = Ip[0]; i < s; i++)
2505 {
2506 const int end = Ip[i+1];
2507 real_t sum = 0.0;
2508 int d = -1;
2509 for ( ; j < end; j++)
2510 {
2511 const int c = Jp[j];
2512 if (c == i)
2513 {
2514 d = j;
2515 }
2516 else
2517 {
2518 sum += Ap[j] * yp[c];
2519 }
2520 }
2521
2522 if (d >= 0 && Ap[d] != 0.0)
2523 {
2524 yp[i] = (xp[i] - sum) / Ap[d];
2525 }
2526 else if (xp[i] == sum)
2527 {
2528 yp[i] = sum;
2529 }
2530 else
2531 {
2532 mfem_error("SparseMatrix::Gauss_Seidel_forw(...) #2");
2533 }
2534 }
2535 }
2536}
2537
2539{
2540 if (!Finalized())
2541 {
2542 real_t *yp = y.GetData();
2543 const real_t *xp = x.GetData();
2544 RowNode *diag_p, *n_p, **R = Rows;
2545
2546 for (int i = height-1; i >= 0; i--)
2547 {
2548 real_t sum = 0.;
2549 diag_p = NULL;
2550 for (n_p = R[i]; n_p != NULL; n_p = n_p->Prev)
2551 {
2552 const int c = n_p->Column;
2553 if (c == i)
2554 {
2555 diag_p = n_p;
2556 }
2557 else
2558 {
2559 sum += n_p->Value * yp[c];
2560 }
2561 }
2562
2563 if (diag_p != NULL && diag_p->Value != 0.0)
2564 {
2565 yp[i] = (xp[i] - sum) / diag_p->Value;
2566 }
2567 else if (xp[i] == sum)
2568 {
2569 yp[i] = sum;
2570 }
2571 else
2572 {
2573 mfem_error("SparseMatrix::Gauss_Seidel_back()");
2574 }
2575 }
2576 }
2577 else
2578 {
2579 const int s = height;
2580 const int nnz = J.Capacity();
2581 const int *Ip = HostRead(I, s+1);
2582 const int *Jp = HostRead(J, nnz);
2583 const real_t *Ap = HostRead(A, nnz);
2584 real_t *yp = y.HostReadWrite();
2585 const real_t *xp = x.HostRead();
2586
2587 for (int i = s-1, j = Ip[s]-1; i >= 0; i--)
2588 {
2589 const int beg = Ip[i];
2590 real_t sum = 0.;
2591 int d = -1;
2592 for ( ; j >= beg; j--)
2593 {
2594 const int c = Jp[j];
2595 if (c == i)
2596 {
2597 d = j;
2598 }
2599 else
2600 {
2601 sum += Ap[j] * yp[c];
2602 }
2603 }
2604
2605 if (d >= 0 && Ap[d] != 0.0)
2606 {
2607 yp[i] = (xp[i] - sum) / Ap[d];
2608 }
2609 else if (xp[i] == sum)
2610 {
2611 yp[i] = sum;
2612 }
2613 else
2614 {
2615 mfem_error("SparseMatrix::Gauss_Seidel_back(...) #2");
2616 }
2617 }
2618 }
2619}
2620
2622{
2623 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2624
2625 real_t sc = 1.0;
2626 for (int i = 0; i < height; i++)
2627 {
2628 int d = -1;
2629 real_t norm = 0.0;
2630 for (int j = I[i]; j < I[i+1]; j++)
2631 {
2632 if (J[j] == i)
2633 {
2634 d = j;
2635 }
2636 norm += fabs(A[j]);
2637 }
2638 if (d >= 0 && A[d] != 0.0)
2639 {
2640 real_t a = 1.8 * fabs(A[d]) / norm;
2641 if (a < sc)
2642 {
2643 sc = a;
2644 }
2645 }
2646 else
2647 {
2648 mfem_error("SparseMatrix::GetJacobiScaling() #2");
2649 }
2650 }
2651 return sc;
2652}
2653
2654void SparseMatrix::Jacobi(const Vector &b, const Vector &x0, Vector &x1,
2655 real_t sc, bool use_abs_diag) const
2656{
2657 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2658
2659 for (int i = 0; i < height; i++)
2660 {
2661 int d = -1;
2662 real_t sum = b(i);
2663 for (int j = I[i]; j < I[i+1]; j++)
2664 {
2665 if (J[j] == i)
2666 {
2667 d = j;
2668 }
2669 else
2670 {
2671 sum -= A[j] * x0(J[j]);
2672 }
2673 }
2674 if (d >= 0 && A[d] != 0.0)
2675 {
2676 const real_t diag = (use_abs_diag) ? fabs(A[d]) : A[d];
2677 x1(i) = sc * (sum / diag) + (1.0 - sc) * x0(i);
2678 }
2679 else
2680 {
2681 mfem_error("SparseMatrix::Jacobi(...) #2");
2682 }
2683 }
2684}
2685
2687 real_t sc, bool use_abs_diag) const
2688{
2689 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2690
2691 const int H = height;
2692 const int nnz = J.Capacity();
2693 const bool use_dev = b.UseDevice() || x.UseDevice();
2694
2695 const auto Ap = Read(A, nnz, use_dev);
2696 const auto Ip = Read(I, height+1, use_dev);
2697 const auto Jp = Read(J, nnz, use_dev);
2698
2699 const auto bp = b.Read(use_dev);
2700 auto xp = x.Write(use_dev);
2701
2702 mfem::forall_switch(use_dev, H, [=] MFEM_HOST_DEVICE (int i)
2703 {
2704 const int end = Ip[i+1];
2705 for (int j = Ip[i]; true; j++)
2706 {
2707 if (j == end)
2708 {
2709 MFEM_ABORT_KERNEL("Diagonal not found in SparseMatrix::DiagScale");
2710 }
2711 if (Jp[j] == i)
2712 {
2713 const real_t diag = (use_abs_diag) ? fabs(Ap[j]) : Ap[j];
2714 if (diag == 0.0)
2715 {
2716 MFEM_ABORT_KERNEL("Zero diagonal in SparseMatrix::DiagScale");
2717 }
2718 xp[i] = sc * bp[i] / diag;
2719 break;
2720 }
2721 }
2722 });
2723}
2724
2725template <bool useFabs>
2726static void JacobiDispatch(const Vector &b, const Vector &x0, Vector &x1,
2727 const Memory<int> &I, const Memory<int> &J,
2728 const Memory<real_t> &A, const int height,
2729 const real_t sc)
2730{
2731 const bool useDevice = b.UseDevice() || x0.UseDevice() || x1.UseDevice();
2732
2733 const auto bp = b.Read(useDevice);
2734 const auto x0p = x0.Read(useDevice);
2735 auto x1p = x1.Write(useDevice);
2736
2737 const auto Ip = Read(I, height+1, useDevice);
2738 const auto Jp = Read(J, J.Capacity(), useDevice);
2739 const auto Ap = Read(A, J.Capacity(), useDevice);
2740
2741 mfem::forall_switch(useDevice, height, [=] MFEM_HOST_DEVICE (int i)
2742 {
2743 real_t resi = bp[i], norm = 0.0;
2744 for (int j = Ip[i]; j < Ip[i+1]; j++)
2745 {
2746 resi -= Ap[j] * x0p[Jp[j]];
2747 if (useFabs)
2748 {
2749 norm += fabs(Ap[j]);
2750 }
2751 else
2752 {
2753 norm += Ap[j];
2754 }
2755 }
2756 if (norm > 0.0)
2757 {
2758 x1p[i] = x0p[i] + sc * resi / norm;
2759 }
2760 else
2761 {
2762 if (useFabs)
2763 {
2764 MFEM_ABORT_KERNEL("L1 norm of row is zero.");
2765 }
2766 else
2767 {
2768 MFEM_ABORT_KERNEL("sum of row is zero.");
2769 }
2770 }
2771 });
2772}
2773
2774void SparseMatrix::Jacobi2(const Vector &b, const Vector &x0, Vector &x1,
2775 real_t sc) const
2776{
2777 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2778 JacobiDispatch<true>(b,x0,x1,I,J,A,height,sc);
2779}
2780
2781void SparseMatrix::Jacobi3(const Vector &b, const Vector &x0, Vector &x1,
2782 real_t sc) const
2783{
2784 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
2785 JacobiDispatch<false>(b,x0,x1,I,J,A,height,sc);
2786}
2787
2789 const DenseMatrix &subm, int skip_zeros)
2790{
2791 int i, j, gi, gj, s, t;
2792 real_t a;
2793
2794 if (Finalized())
2795 {
2796 HostReadI();
2797 HostReadJ();
2799 }
2800
2801 for (i = 0; i < rows.Size(); i++)
2802 {
2803 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2804 else { s = 1; }
2805 MFEM_ASSERT(gi < height,
2806 "Trying to insert a row " << gi << " outside the matrix height "
2807 << height);
2808 SetColPtr(gi);
2809 for (j = 0; j < cols.Size(); j++)
2810 {
2811 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2812 else { t = s; }
2813 MFEM_ASSERT(gj < width,
2814 "Trying to insert a column " << gj << " outside the matrix width "
2815 << width);
2816 a = subm(i, j);
2817 if (skip_zeros && a == 0.0)
2818 {
2819 // Skip assembly of zero elements if either:
2820 // (i) user specified to skip zeros regardless of symmetry, or
2821 // (ii) symmetry is not broken.
2822 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2823 {
2824 continue;
2825 }
2826 }
2827 if (t < 0) { a = -a; }
2828 _Add_(gj, a);
2829 }
2830 ClearColPtr();
2831 }
2832}
2833
2834void SparseMatrix::Set(const int i, const int j, const real_t val)
2835{
2836 real_t a = val;
2837 int gi, gj, s, t;
2838
2839 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2840 else { s = 1; }
2841 MFEM_ASSERT(gi < height,
2842 "Trying to set a row " << gi << " outside the matrix height "
2843 << height);
2844 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2845 else { t = s; }
2846 MFEM_ASSERT(gj < width,
2847 "Trying to set a column " << gj << " outside the matrix width "
2848 << width);
2849 if (t < 0) { a = -a; }
2850 _Set_(gi, gj, a);
2851}
2852
2853void SparseMatrix::Add(const int i, const int j, const real_t val)
2854{
2855 int gi, gj, s, t;
2856 real_t a = val;
2857
2858 if ((gi=i) < 0) { gi = -1-gi, s = -1; }
2859 else { s = 1; }
2860 MFEM_ASSERT(gi < height,
2861 "Trying to insert a row " << gi << " outside the matrix height "
2862 << height);
2863 if ((gj=j) < 0) { gj = -1-gj, t = -s; }
2864 else { t = s; }
2865 MFEM_ASSERT(gj < width,
2866 "Trying to insert a column " << gj << " outside the matrix width "
2867 << width);
2868 if (t < 0) { a = -a; }
2869 _Add_(gi, gj, a);
2870}
2871
2873 const DenseMatrix &subm, int skip_zeros)
2874{
2875 int i, j, gi, gj, s, t;
2876 real_t a;
2877
2878 for (i = 0; i < rows.Size(); i++)
2879 {
2880 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2881 else { s = 1; }
2882 MFEM_ASSERT(gi < height,
2883 "Trying to set a row " << gi << " outside the matrix height "
2884 << height);
2885 SetColPtr(gi);
2886 for (j = 0; j < cols.Size(); j++)
2887 {
2888 a = subm(i, j);
2889 if (skip_zeros && a == 0.0)
2890 {
2891 // Skip assembly of zero elements if either:
2892 // (i) user specified to skip zeros regardless of symmetry, or
2893 // (ii) symmetry is not broken.
2894 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2895 {
2896 continue;
2897 }
2898 }
2899 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2900 else { t = s; }
2901 MFEM_ASSERT(gj < width,
2902 "Trying to set a column " << gj << " outside the matrix width "
2903 << width);
2904 if (t < 0) { a = -a; }
2905 _Set_(gj, a);
2906 }
2907 ClearColPtr();
2908 }
2909}
2910
2912 const Array<int> &cols,
2913 const DenseMatrix &subm,
2914 int skip_zeros)
2915{
2916 int i, j, gi, gj, s, t;
2917 real_t a;
2918
2919 for (i = 0; i < rows.Size(); i++)
2920 {
2921 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2922 else { s = 1; }
2923 MFEM_ASSERT(gi < height,
2924 "Trying to set a row " << gi << " outside the matrix height "
2925 << height);
2926 SetColPtr(gi);
2927 for (j = 0; j < cols.Size(); j++)
2928 {
2929 a = subm(j, i);
2930 if (skip_zeros && a == 0.0)
2931 {
2932 // Skip assembly of zero elements if either:
2933 // (i) user specified to skip zeros regardless of symmetry, or
2934 // (ii) symmetry is not broken.
2935 if (skip_zeros == 2 || &rows != &cols || subm(j, i) == 0.0)
2936 {
2937 continue;
2938 }
2939 }
2940 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2941 else { t = s; }
2942 MFEM_ASSERT(gj < width,
2943 "Trying to set a column " << gj << " outside the matrix width "
2944 << width);
2945 if (t < 0) { a = -a; }
2946 _Set_(gj, a);
2947 }
2948 ClearColPtr();
2949 }
2950}
2951
2953 DenseMatrix &subm) const
2954{
2955 int i, j, gi, gj, s, t;
2956 real_t a;
2957
2958 for (i = 0; i < rows.Size(); i++)
2959 {
2960 if ((gi=rows[i]) < 0) { gi = -1-gi, s = -1; }
2961 else { s = 1; }
2962 MFEM_ASSERT(gi < height,
2963 "Trying to read a row " << gi << " outside the matrix height "
2964 << height);
2965 SetColPtr(gi);
2966 for (j = 0; j < cols.Size(); j++)
2967 {
2968 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
2969 else { t = s; }
2970 MFEM_ASSERT(gj < width,
2971 "Trying to read a column " << gj << " outside the matrix width "
2972 << width);
2973 a = _Get_(gj);
2974 subm(i, j) = (t < 0) ? (-a) : (a);
2975 }
2976 ClearColPtr();
2977 }
2978}
2979
2980bool SparseMatrix::RowIsEmpty(const int row) const
2981{
2982 int gi;
2983
2984 if ((gi=row) < 0)
2985 {
2986 gi = -1-gi;
2987 }
2988 MFEM_ASSERT(gi < height,
2989 "Trying to query a row " << gi << " outside the matrix height "
2990 << height);
2991 if (Rows)
2992 {
2993 return (Rows[gi] == NULL);
2994 }
2995 else
2996 {
2997 return (I[gi] == I[gi+1]);
2998 }
2999}
3000
3001int SparseMatrix::GetRow(const int row, Array<int> &cols, Vector &srow) const
3002{
3003 RowNode *n;
3004 int j, gi;
3005
3006 if ((gi=row) < 0) { gi = -1-gi; }
3007 MFEM_ASSERT(gi < height,
3008 "Trying to read a row " << gi << " outside the matrix height "
3009 << height);
3010 if (Rows)
3011 {
3012 for (n = Rows[gi], j = 0; n; n = n->Prev)
3013 {
3014 j++;
3015 }
3016 cols.SetSize(j);
3017 srow.SetSize(j);
3018 for (n = Rows[gi], j = 0; n; n = n->Prev, j++)
3019 {
3020 cols[j] = n->Column;
3021 srow(j) = n->Value;
3022 }
3023 if (row < 0)
3024 {
3025 srow.Neg();
3026 }
3027
3028 return 0;
3029 }
3030 else
3031 {
3032 j = I[gi];
3033 cols.MakeRef(const_cast<int*>((const int*)J) + j, I[gi+1]-j);
3034 srow.NewDataAndSize(
3035 const_cast<real_t*>((const real_t*)A) + j, cols.Size());
3036 MFEM_ASSERT(row >= 0, "Row not valid: " << row << ", height: " << height);
3037 return 1;
3038 }
3039}
3040
3041void SparseMatrix::SetRow(const int row, const Array<int> &cols,
3042 const Vector &srow)
3043{
3044 int gi, gj, s, t;
3045 real_t a;
3046
3047 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
3048 else { s = 1; }
3049 MFEM_ASSERT(gi < height,
3050 "Trying to set a row " << gi << " outside the matrix height "
3051 << height);
3052
3053 if (!Finalized())
3054 {
3055 SetColPtr(gi);
3056 for (int j = 0; j < cols.Size(); j++)
3057 {
3058 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3059 else { t = s; }
3060 MFEM_ASSERT(gj < width,
3061 "Trying to set a column " << gj << " outside the matrix"
3062 " width " << width);
3063 a = srow(j);
3064 if (t < 0) { a = -a; }
3065 _Set_(gj, a);
3066 }
3067 ClearColPtr();
3068 }
3069 else
3070 {
3071 MFEM_ASSERT(cols.Size() == RowSize(gi), "");
3072 MFEM_ASSERT(cols.Size() == srow.Size(), "");
3073
3074 for (int i = I[gi], j = 0; j < cols.Size(); j++, i++)
3075 {
3076 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3077 else { t = s; }
3078 MFEM_ASSERT(gj < width,
3079 "Trying to set a column " << gj << " outside the matrix"
3080 " width " << width);
3081
3082 J[i] = gj;
3083 A[i] = srow[j] * t;
3084 }
3085 }
3086}
3087
3088void SparseMatrix::AddRow(const int row, const Array<int> &cols,
3089 const Vector &srow)
3090{
3091 int j, gi, gj, s, t;
3092 real_t a;
3093
3094 MFEM_VERIFY(!Finalized(), "Matrix must NOT be finalized.");
3095
3096 if ((gi=row) < 0) { gi = -1-gi, s = -1; }
3097 else { s = 1; }
3098 MFEM_ASSERT(gi < height,
3099 "Trying to insert a row " << gi << " outside the matrix height "
3100 << height);
3101 SetColPtr(gi);
3102 for (j = 0; j < cols.Size(); j++)
3103 {
3104 if ((gj=cols[j]) < 0) { gj = -1-gj, t = -s; }
3105 else { t = s; }
3106 MFEM_ASSERT(gj < width,
3107 "Trying to insert a column " << gj << " outside the matrix width "
3108 << width);
3109 a = srow(j);
3110 if (a == 0.0)
3111 {
3112 continue;
3113 }
3114 if (t < 0) { a = -a; }
3115 _Add_(gj, a);
3116 }
3117 ClearColPtr();
3118}
3119
3120void SparseMatrix::ScaleRow(const int row, const real_t scale)
3121{
3122 int i;
3123
3124 if ((i=row) < 0)
3125 {
3126 i = -1-i;
3127 }
3128 if (Rows != NULL)
3129 {
3130 RowNode *aux;
3131
3132 for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)
3133 {
3134 aux -> Value *= scale;
3135 }
3136 }
3137 else
3138 {
3139 int j, end = I[i+1];
3140
3141 for (j = I[i]; j < end; j++)
3142 {
3143 A[j] *= scale;
3144 }
3145 }
3146}
3147
3149{
3150 real_t scale;
3151 if (Rows != NULL)
3152 {
3153 RowNode *aux;
3154 for (int i=0; i < height; ++i)
3155 {
3156 scale = sl(i);
3157 for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)
3158 {
3159 aux -> Value *= scale;
3160 }
3161 }
3162 }
3163 else
3164 {
3165 int j, end;
3166
3167 for (int i=0; i < height; ++i)
3168 {
3169 end = I[i+1];
3170 scale = sl(i);
3171 for (j = I[i]; j < end; j++)
3172 {
3173 A[j] *= scale;
3174 }
3175 }
3176 }
3177}
3178
3180{
3181 if (Rows != NULL)
3182 {
3183 RowNode *aux;
3184 for (int i=0; i < height; ++i)
3185 {
3186 for (aux = Rows[i]; aux != NULL; aux = aux -> Prev)
3187 {
3188 aux -> Value *= sr(aux->Column);
3189 }
3190 }
3191 }
3192 else
3193 {
3194 int j, end;
3195
3196 for (int i=0; i < height; ++i)
3197 {
3198 end = I[i+1];
3199 for (j = I[i]; j < end; j++)
3200 {
3201 A[j] *= sr(J[j]);
3202 }
3203 }
3204 }
3205}
3206
3208{
3209 MFEM_ASSERT(height == B.height && width == B.width,
3210 "Mismatch of this matrix size and rhs. This height = "
3211 << height << ", width = " << width << ", B.height = "
3212 << B.height << ", B.width = " << B.width);
3213
3214 for (int i = 0; i < height; i++)
3215 {
3216 SetColPtr(i);
3217 if (B.Rows)
3218 {
3219 for (RowNode *aux = B.Rows[i]; aux != NULL; aux = aux->Prev)
3220 {
3221 _Add_(aux->Column, aux->Value);
3222 }
3223 }
3224 else
3225 {
3226 for (int j = B.I[i]; j < B.I[i+1]; j++)
3227 {
3228 _Add_(B.J[j], B.A[j]);
3229 }
3230 }
3231 ClearColPtr();
3232 }
3233
3234 return (*this);
3235}
3236
3238{
3239 for (int i = 0; i < height; i++)
3240 {
3241 B.SetColPtr(i);
3242 if (Rows)
3243 {
3244 for (RowNode *np = Rows[i]; np != NULL; np = np->Prev)
3245 {
3246 np->Value += a * B._Get_(np->Column);
3247 }
3248 }
3249 else
3250 {
3251 for (int j = I[i]; j < I[i+1]; j++)
3252 {
3253 A[j] += a * B._Get_(J[j]);
3254 }
3255 }
3256 B.ClearColPtr();
3257 }
3258}
3259
3261{
3262 if (Rows == NULL)
3263 {
3264 const int nnz = J.Capacity();
3265 real_t *h_A = HostWrite(A, nnz);
3266 for (int i = 0; i < nnz; i++)
3267 {
3268 h_A[i] = a;
3269 }
3270 }
3271 else
3272 {
3273 for (int i = 0; i < height; i++)
3274 {
3275 for (RowNode *node_p = Rows[i]; node_p != NULL;
3276 node_p = node_p -> Prev)
3277 {
3278 node_p -> Value = a;
3279 }
3280 }
3281 }
3282
3283 return (*this);
3284}
3285
3287{
3288 if (Rows == NULL)
3289 {
3290 for (int i = 0, nnz = I[height]; i < nnz; i++)
3291 {
3292 A[i] *= a;
3293 }
3294 }
3295 else
3296 {
3297 for (int i = 0; i < height; i++)
3298 {
3299 for (RowNode *node_p = Rows[i]; node_p != NULL;
3300 node_p = node_p -> Prev)
3301 {
3302 node_p -> Value *= a;
3303 }
3304 }
3305 }
3306
3307 return (*this);
3308}
3309
3310void SparseMatrix::Print(std::ostream & os, int width_) const
3311{
3312 int i, j;
3313
3314 if (A.Empty())
3315 {
3316 RowNode *nd;
3317 for (i = 0; i < height; i++)
3318 {
3319 os << "[row " << i << "]\n";
3320 for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++)
3321 {
3322 os << " (" << nd->Column << "," << nd->Value << ")";
3323 if ( !((j+1) % width_) )
3324 {
3325 os << '\n';
3326 }
3327 }
3328 if (j % width_)
3329 {
3330 os << '\n';
3331 }
3332 }
3333 return;
3334 }
3335
3336 // HostRead forces synchronization
3337 HostReadI();
3338 HostReadJ();
3339 HostReadData();
3340 for (i = 0; i < height; i++)
3341 {
3342 os << "[row " << i << "]\n";
3343 for (j = I[i]; j < I[i+1]; j++)
3344 {
3345 os << " (" << J[j] << "," << A[j] << ")";
3346 if ( !((j+1-I[i]) % width_) )
3347 {
3348 os << '\n';
3349 }
3350 }
3351 if ((j-I[i]) % width_)
3352 {
3353 os << '\n';
3354 }
3355 }
3356}
3357
3358void SparseMatrix::PrintMatlab(std::ostream & os) const
3359{
3360 os << "% size " << height << " " << width << "\n";
3361 os << "% Non Zeros " << NumNonZeroElems() << "\n";
3362
3363 int i, j;
3364 ios::fmtflags old_fmt = os.flags();
3365 os.setf(ios::scientific);
3366 std::streamsize old_prec = os.precision(14);
3367
3368 if (A.Empty())
3369 {
3370 RowNode *nd;
3371 for (i = 0; i < height; i++)
3372 {
3373 for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++)
3374 {
3375 os << i+1 << " " << nd->Column+1 << " " << nd->Value << '\n';
3376 }
3377 }
3378 }
3379 else
3380 {
3381 // HostRead forces synchronization
3382 HostReadI();
3383 HostReadJ();
3384 HostReadData();
3385 for (i = 0; i < height; i++)
3386 {
3387 for (j = I[i]; j < I[i+1]; j++)
3388 {
3389 os << i+1 << " " << J[j]+1 << " " << A[j] << '\n';
3390 }
3391 }
3392 }
3393 // Write a zero entry at (m,n) to make sure MATLAB doesn't shrink the matrix
3394 os << height << " " << width << " 0.0\n";
3395 os.precision(old_prec);
3396 os.flags(old_fmt);
3397}
3398
3399void SparseMatrix::PrintMathematica(std::ostream & os) const
3400{
3401 int i, j;
3402 ios::fmtflags old_fmt = os.flags();
3403 os.setf(ios::scientific);
3404 std::streamsize old_prec = os.precision(14);
3405
3406 os << "(* Read file into Mathematica using: "
3407 << "myMat = Get[\"this_file_name\"] *)\n";
3408 os << "SparseArray[";
3409
3410 if (A == NULL)
3411 {
3412 RowNode *nd;
3413 int c = 0;
3414 os << "{\n";
3415 for (i = 0; i < height; i++)
3416 {
3417 for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++, c++)
3418 {
3419 os << "{"<< i+1 << ", " << nd->Column+1
3420 << "} -> Internal`StringToMReal[\"" << nd->Value << "\"]";
3421 if (c < NumNonZeroElems() - 1) { os << ","; }
3422 os << '\n';
3423 }
3424 }
3425 os << "}\n";
3426 }
3427 else
3428 {
3429 // HostRead forces synchronization
3430 HostReadI();
3431 HostReadJ();
3432 HostReadData();
3433 int c = 0;
3434 os << "{\n";
3435 for (i = 0; i < height; i++)
3436 {
3437 for (j = I[i]; j < I[i+1]; j++, c++)
3438 {
3439 os << "{" << i+1 << ", " << J[j]+1
3440 << "} -> Internal`StringToMReal[\"" << A[j] << "\"]";
3441 if (c < NumNonZeroElems() - 1) { os << ","; }
3442 os << '\n';
3443 }
3444 }
3445 os << "}";
3446 }
3447
3448 os << ",{" << height << "," << width << "}]\n";
3449
3450 os.precision(old_prec);
3451 os.flags(old_fmt);
3452}
3453
3454void SparseMatrix::PrintMM(std::ostream & os) const
3455{
3456 int i, j;
3457 ios::fmtflags old_fmt = os.flags();
3458 os.setf(ios::scientific);
3459 std::streamsize old_prec = os.precision(14);
3460
3461 os << "%%MatrixMarket matrix coordinate real general" << '\n'
3462 << "% Generated by MFEM" << '\n';
3463
3464 os << height << " " << width << " " << NumNonZeroElems() << '\n';
3465
3466 if (A.Empty())
3467 {
3468 RowNode *nd;
3469 for (i = 0; i < height; i++)
3470 {
3471 for (nd = Rows[i], j = 0; nd != NULL; nd = nd->Prev, j++)
3472 {
3473 os << i+1 << " " << nd->Column+1 << " " << nd->Value << '\n';
3474 }
3475 }
3476 }
3477 else
3478 {
3479 // HostRead forces synchronization
3480 HostReadI();
3481 HostReadJ();
3482 HostReadData();
3483 for (i = 0; i < height; i++)
3484 {
3485 for (j = I[i]; j < I[i+1]; j++)
3486 {
3487 os << i+1 << " " << J[j]+1 << " " << A[j] << '\n';
3488 }
3489 }
3490 }
3491 os.precision(old_prec);
3492 os.flags(old_fmt);
3493}
3494
3495void SparseMatrix::PrintCSR(std::ostream & os) const
3496{
3497 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
3498
3499 int i;
3500
3501 os << height << '\n'; // number of rows
3502
3503 // HostRead forces synchronization
3504 HostReadI();
3505 HostReadJ();
3506 HostReadData();
3507 for (i = 0; i <= height; i++)
3508 {
3509 os << I[i]+1 << '\n';
3510 }
3511
3512 for (i = 0; i < I[height]; i++)
3513 {
3514 os << J[i]+1 << '\n';
3515 }
3516
3517 for (i = 0; i < I[height]; i++)
3518 {
3519 os << A[i] << '\n';
3520 }
3521}
3522
3523void SparseMatrix::PrintCSR2(std::ostream & os) const
3524{
3525 MFEM_VERIFY(Finalized(), "Matrix must be finalized.");
3526
3527 int i;
3528
3529 os << height << '\n'; // number of rows
3530 os << width << '\n'; // number of columns
3531
3532 // HostRead forces synchronization
3533 HostReadI();
3534 HostReadJ();
3535 HostReadData();
3536 for (i = 0; i <= height; i++)
3537 {
3538 os << I[i] << '\n';
3539 }
3540
3541 for (i = 0; i < I[height]; i++)
3542 {
3543 os << J[i] << '\n';
3544 }
3545
3546 for (i = 0; i < I[height]; i++)
3547 {
3548 os << A[i] << '\n';
3549 }
3550}
3551
3552void SparseMatrix::PrintInfo(std::ostream &os) const
3553{
3554 const real_t MiB = 1024.*1024;
3555 int nnz = NumNonZeroElems();
3556 real_t pz = 100./nnz;
3557 int nz = CountSmallElems(0.0);
3558 real_t max_norm = MaxNorm();
3559 real_t symm = IsSymmetric();
3560 int nnf = CheckFinite();
3561 int ns12 = CountSmallElems(1e-12*max_norm);
3562 int ns15 = CountSmallElems(1e-15*max_norm);
3563 int ns18 = CountSmallElems(1e-18*max_norm);
3564
3565 os <<
3566 "SparseMatrix statistics:\n"
3567 " Format : " <<
3568 (Empty() ? "(empty)" : (Finalized() ? "CSR" : "LIL")) << "\n"
3569 " Dimensions : " << height << " x " << width << "\n"
3570 " Number of entries (total) : " << nnz << "\n"
3571 " Number of entries (per row) : " << 1.*nnz/Height() << "\n"
3572 " Number of stored zeros : " << nz*pz << "% (" << nz << ")\n"
3573 " Number of Inf/Nan entries : " << nnf*pz << "% ("<< nnf << ")\n"
3574 " Norm, max |a_ij| : " << max_norm << "\n"
3575 " Symmetry, max |a_ij-a_ji| : " << symm << "\n"
3576 " Number of small entries:\n"
3577 " |a_ij| <= 1e-12*Norm : " << ns12*pz << "% (" << ns12 << ")\n"
3578 " |a_ij| <= 1e-15*Norm : " << ns15*pz << "% (" << ns15 << ")\n"
3579 " |a_ij| <= 1e-18*Norm : " << ns18*pz << "% (" << ns18 << ")\n";
3580 if (Finalized())
3581 {
3582 os << " Memory used by CSR : " <<
3583 (sizeof(int)*(height+1+nnz)+sizeof(real_t)*nnz)/MiB << " MiB\n";
3584 }
3585 if (Rows != NULL)
3586 {
3587 size_t used_mem = sizeof(RowNode*)*height;
3588#ifdef MFEM_USE_MEMALLOC
3589 used_mem += NodesMem->MemoryUsage();
3590#else
3591 for (int i = 0; i < height; i++)
3592 {
3593 for (RowNode *aux = Rows[i]; aux != NULL; aux = aux->Prev)
3594 {
3595 used_mem += sizeof(RowNode);
3596 }
3597 }
3598#endif
3599 os << " Memory used by LIL : " << used_mem/MiB << " MiB\n";
3600 }
3601}
3602
3604{
3605 I.Delete();
3606 J.Delete();
3607 A.Delete();
3608
3609 if (Rows != NULL)
3610 {
3611#if !defined(MFEM_USE_MEMALLOC)
3612 for (int i = 0; i < height; i++)
3613 {
3614 RowNode *aux, *node_p = Rows[i];
3615 while (node_p != NULL)
3616 {
3617 aux = node_p;
3618 node_p = node_p->Prev;
3619 delete aux;
3620 }
3621 }
3622#endif
3623 delete [] Rows;
3624 }
3625
3626 delete [] ColPtrJ;
3627 delete [] ColPtrNode;
3628#ifdef MFEM_USE_MEMALLOC
3629 delete NodesMem;
3630#endif
3631 delete At;
3632
3634}
3635
3637{
3638 int awidth = 0;
3639 if (A)
3640 {
3641 const int *start_j = J;
3642 const int *end_j = J + I[height];
3643 for (const int *jptr = start_j; jptr != end_j; ++jptr)
3644 {
3645 awidth = std::max(awidth, *jptr + 1);
3646 }
3647 }
3648 else
3649 {
3650 RowNode *aux;
3651 for (int i = 0; i < height; i++)
3652 {
3653 for (aux = Rows[i]; aux != NULL; aux = aux->Prev)
3654 {
3655 awidth = std::max(awidth, aux->Column + 1);
3656 }
3657 }
3658 }
3659 return awidth;
3660}
3661
3663{
3664 int n = S.NumNonZeroElems();
3665 real_t * s = S.GetData();
3666
3667 for (int i = 0; i < n; i++)
3668 {
3669 s[i] = f(s[i]);
3670 }
3671}
3672
3674{
3675 MFEM_VERIFY(
3676 A.Finalized(),
3677 "Finalize must be called before Transpose. Use TransposeRowMatrix instead");
3678
3679 int i, j, end;
3680 const int *A_i, *A_j;
3681 int m, n, nnz, *At_i, *At_j;
3682 const real_t *A_data;
3683 real_t *At_data;
3684
3685 m = A.Height(); // number of rows of A
3686 n = A.Width(); // number of columns of A
3687 nnz = A.NumNonZeroElems();
3688 A_i = A.HostReadI();
3689 A_j = A.HostReadJ();
3690 A_data = A.HostReadData();
3691
3692 At_i = Memory<int>(n+1);
3693 At_j = Memory<int>(nnz);
3694 At_data = Memory<real_t>(nnz);
3695
3696 for (i = 0; i <= n; i++)
3697 {
3698 At_i[i] = 0;
3699 }
3700 for (i = 0; i < nnz; i++)
3701 {
3702 At_i[A_j[i]+1]++;
3703 }
3704 for (i = 1; i < n; i++)
3705 {
3706 At_i[i+1] += At_i[i];
3707 }
3708
3709 for (i = j = 0; i < m; i++)
3710 {
3711 end = A_i[i+1];
3712 for ( ; j < end; j++)
3713 {
3714 At_j[At_i[A_j[j]]] = i;
3715 At_data[At_i[A_j[j]]] = A_data[j];
3716 At_i[A_j[j]]++;
3717 }
3718 }
3719
3720 for (i = n; i > 0; i--)
3721 {
3722 At_i[i] = At_i[i-1];
3723 }
3724 At_i[0] = 0;
3725
3726 return new SparseMatrix(At_i, At_j, At_data, n, m);
3727}
3728
3730 int useActualWidth)
3731{
3732 int i, j;
3733 int m, n, nnz, *At_i, *At_j;
3734 real_t *At_data;
3735 Array<int> Acols;
3736 Vector Avals;
3737
3738 m = A.Height(); // number of rows of A
3739 if (useActualWidth)
3740 {
3741 n = 0;
3742 int tmp;
3743 for (i = 0; i < m; i++)
3744 {
3745 A.GetRow(i, Acols, Avals);
3746 if (Acols.Size())
3747 {
3748 tmp = Acols.Max();
3749 if (tmp > n)
3750 {
3751 n = tmp;
3752 }
3753 }
3754 }
3755 ++n;
3756 }
3757 else
3758 {
3759 n = A.Width(); // number of columns of A
3760 }
3761 nnz = A.NumNonZeroElems();
3762
3763 At_i = Memory<int>(n+1);
3764 At_j = Memory<int>(nnz);
3765 At_data = Memory<real_t>(nnz);
3766
3767 for (i = 0; i <= n; i++)
3768 {
3769 At_i[i] = 0;
3770 }
3771
3772 for (i = 0; i < m; i++)
3773 {
3774 A.GetRow(i, Acols, Avals);
3775 for (j = 0; j<Acols.Size(); ++j)
3776 {
3777 At_i[Acols[j]+1]++;
3778 }
3779 }
3780 for (i = 1; i < n; i++)
3781 {
3782 At_i[i+1] += At_i[i];
3783 }
3784
3785 for (i = 0; i < m; i++)
3786 {
3787 A.GetRow(i, Acols, Avals);
3788 for (j = 0; j<Acols.Size(); ++j)
3789 {
3790 At_j[At_i[Acols[j]]] = i;
3791 At_data[At_i[Acols[j]]] = Avals[j];
3792 At_i[Acols[j]]++;
3793 }
3794 }
3795
3796 for (i = n; i > 0; i--)
3797 {
3798 At_i[i] = At_i[i-1];
3799 }
3800 At_i[0] = 0;
3801
3802 return new SparseMatrix(At_i, At_j, At_data, n, m);
3803}
3804
3805
3807 SparseMatrix *OAB)
3808{
3809 int nrowsA, ncolsA, nrowsB, ncolsB;
3810 const int *A_i, *A_j, *B_i, *B_j;
3811 int *C_i, *C_j, *B_marker;
3812 const real_t *A_data, *B_data;
3813 real_t *C_data;
3814 int ia, ib, ic, ja, jb, num_nonzeros;
3815 int row_start, counter;
3816 real_t a_entry, b_entry;
3817 SparseMatrix *C;
3818
3819 nrowsA = A.Height();
3820 ncolsA = A.Width();
3821 nrowsB = B.Height();
3822 ncolsB = B.Width();
3823
3824 MFEM_VERIFY(ncolsA == nrowsB,
3825 "number of columns of A (" << ncolsA
3826 << ") must equal number of rows of B (" << nrowsB << ")");
3827
3828 A_i = A.HostReadI();
3829 A_j = A.HostReadJ();
3830 A_data = A.HostReadData();
3831 B_i = B.HostReadI();
3832 B_j = B.HostReadJ();
3833 B_data = B.HostReadData();
3834
3835 B_marker = new int[ncolsB];
3836
3837 for (ib = 0; ib < ncolsB; ib++)
3838 {
3839 B_marker[ib] = -1;
3840 }
3841
3842 if (OAB == NULL)
3843 {
3844 C_i = Memory<int>(nrowsA+1);
3845
3846 C_i[0] = num_nonzeros = 0;
3847 for (ic = 0; ic < nrowsA; ic++)
3848 {
3849 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3850 {
3851 ja = A_j[ia];
3852 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3853 {
3854 jb = B_j[ib];
3855 if (B_marker[jb] != ic)
3856 {
3857 B_marker[jb] = ic;
3858 num_nonzeros++;
3859 }
3860 }
3861 }
3862 C_i[ic+1] = num_nonzeros;
3863 }
3864
3865 C_j = Memory<int>(num_nonzeros);
3866 C_data = Memory<real_t>(num_nonzeros);
3867
3868 C = new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3869
3870 for (ib = 0; ib < ncolsB; ib++)
3871 {
3872 B_marker[ib] = -1;
3873 }
3874 }
3875 else
3876 {
3877 C = OAB;
3878
3879 MFEM_VERIFY(nrowsA == C->Height() && ncolsB == C->Width(),
3880 "Input matrix sizes do not match output sizes"
3881 << " nrowsA = " << nrowsA
3882 << ", C->Height() = " << C->Height()
3883 << " ncolsB = " << ncolsB
3884 << ", C->Width() = " << C->Width());
3885
3886 // C_i = C->HostReadI(); // not used
3887 C_j = C->HostWriteJ();
3888 C_data = C->HostWriteData();
3889 }
3890
3891 counter = 0;
3892 for (ic = 0; ic < nrowsA; ic++)
3893 {
3894 // row_start = C_i[ic];
3895 row_start = counter;
3896 for (ia = A_i[ic]; ia < A_i[ic+1]; ia++)
3897 {
3898 ja = A_j[ia];
3899 a_entry = A_data[ia];
3900 for (ib = B_i[ja]; ib < B_i[ja+1]; ib++)
3901 {
3902 jb = B_j[ib];
3903 b_entry = B_data[ib];
3904 if (B_marker[jb] < row_start)
3905 {
3906 B_marker[jb] = counter;
3907 if (OAB == NULL)
3908 {
3909 C_j[counter] = jb;
3910 }
3911 C_data[counter] = a_entry*b_entry;
3912 counter++;
3913 }
3914 else
3915 {
3916 C_data[B_marker[jb]] += a_entry*b_entry;
3917 }
3918 }
3919 }
3920 }
3921
3922 MFEM_VERIFY(
3923 OAB == NULL || counter == OAB->NumNonZeroElems(),
3924 "With pre-allocated output matrix, number of non-zeros ("
3925 << OAB->NumNonZeroElems()
3926 << ") did not match number of entries changed from matrix-matrix multiply, "
3927 << counter);
3928
3929 delete [] B_marker;
3930
3931 return C;
3932}
3933
3935{
3936 SparseMatrix *At = Transpose(A);
3937 SparseMatrix *AtB = Mult(*At, B);
3938 delete At;
3939 return AtB;
3940}
3941
3943 const AbstractSparseMatrix &B)
3944{
3945 int nrowsA, ncolsA, nrowsB, ncolsB;
3946 int *C_i, *C_j, *B_marker;
3947 real_t *C_data;
3948 int ia, ib, ic, ja, jb, num_nonzeros;
3949 int row_start, counter;
3950 real_t a_entry, b_entry;
3951 SparseMatrix *C;
3952
3953 nrowsA = A.Height();
3954 ncolsA = A.Width();
3955 nrowsB = B.Height();
3956 ncolsB = B.Width();
3957
3958 MFEM_VERIFY(ncolsA == nrowsB,
3959 "number of columns of A (" << ncolsA
3960 << ") must equal number of rows of B (" << nrowsB << ")");
3961
3962 B_marker = new int[ncolsB];
3963
3964 for (ib = 0; ib < ncolsB; ib++)
3965 {
3966 B_marker[ib] = -1;
3967 }
3968
3969 C_i = Memory<int>(nrowsA+1);
3970
3971 C_i[0] = num_nonzeros = 0;
3972
3973 Array<int> colsA, colsB;
3974 Vector dataA, dataB;
3975 for (ic = 0; ic < nrowsA; ic++)
3976 {
3977 A.GetRow(ic, colsA, dataA);
3978 for (ia = 0; ia < colsA.Size(); ia++)
3979 {
3980 ja = colsA[ia];
3981 B.GetRow(ja, colsB, dataB);
3982 for (ib = 0; ib < colsB.Size(); ib++)
3983 {
3984 jb = colsB[ib];
3985 if (B_marker[jb] != ic)
3986 {
3987 B_marker[jb] = ic;
3988 num_nonzeros++;
3989 }
3990 }
3991 }
3992 C_i[ic+1] = num_nonzeros;
3993 }
3994
3995 C_j = Memory<int>(num_nonzeros);
3996 C_data = Memory<real_t>(num_nonzeros);
3997
3998 C = new SparseMatrix(C_i, C_j, C_data, nrowsA, ncolsB);
3999
4000 for (ib = 0; ib < ncolsB; ib++)
4001 {
4002 B_marker[ib] = -1;
4003 }
4004
4005 counter = 0;
4006 for (ic = 0; ic < nrowsA; ic++)
4007 {
4008 row_start = counter;
4009 A.GetRow(ic, colsA, dataA);
4010 for (ia = 0; ia < colsA.Size(); ia++)
4011 {
4012 ja = colsA[ia];
4013 a_entry = dataA[ia];
4014 B.GetRow(ja, colsB, dataB);
4015 for (ib = 0; ib < colsB.Size(); ib++)
4016 {
4017 jb = colsB[ib];
4018 b_entry = dataB[ib];
4019 if (B_marker[jb] < row_start)
4020 {
4021 B_marker[jb] = counter;
4022 C_j[counter] = jb;
4023 C_data[counter] = a_entry*b_entry;
4024 counter++;
4025 }
4026 else
4027 {
4028 C_data[B_marker[jb]] += a_entry*b_entry;
4029 }
4030 }
4031 }
4032 }
4033
4034 delete [] B_marker;
4035
4036 return C;
4037}
4038
4040{
4041 DenseMatrix *C = new DenseMatrix(A.Height(), B.Width());
4042 Vector columnB, columnC;
4043 for (int j = 0; j < B.Width(); ++j)
4044 {
4045 B.GetColumnReference(j, columnB);
4046 C->GetColumnReference(j, columnC);
4047 A.Mult(columnB, columnC);
4048 }
4049 return C;
4050}
4051
4053{
4054 DenseMatrix R (P, 't'); // R = P^T
4055 DenseMatrix *AP = Mult (A, P);
4056 DenseMatrix *RAP_ = new DenseMatrix(R.Height(), AP->Width());
4057 Mult (R, *AP, *RAP_);
4058 delete AP;
4059 return RAP_;
4060}
4061
4063{
4064 SparseMatrix *R = Transpose(P);
4065 DenseMatrix *RA = Mult(*R, A);
4066 DenseMatrix AtP(*RA, 't');
4067 delete RA;
4068 DenseMatrix *RAtP = Mult(*R, AtP);
4069 delete R;
4070 DenseMatrix * RAP_ = new DenseMatrix(*RAtP, 't');
4071 delete RAtP;
4072 return RAP_;
4073}
4074
4076 SparseMatrix *ORAP)
4077{
4078 SparseMatrix *P = Transpose (R);
4079 SparseMatrix *AP = Mult (A, *P);
4080 delete P;
4081 SparseMatrix *RAP_ = Mult (R, *AP, ORAP);
4082 delete AP;
4083 return RAP_;
4084}
4085
4087 const SparseMatrix &P)
4088{
4089 SparseMatrix * R = Transpose(Rt);
4090 SparseMatrix * RA = Mult(*R,A);
4091 delete R;
4092 SparseMatrix * RAP_ = Mult(*RA, P);
4093 delete RA;
4094 return RAP_;
4095}
4096
4098 SparseMatrix *OAtDA)
4099{
4100 int i, At_nnz, *At_j;
4101 real_t *At_data;
4102
4103 SparseMatrix *At = Transpose (A);
4104 At_nnz = At -> NumNonZeroElems();
4105 At_j = At -> GetJ();
4106 At_data = At -> GetData();
4107 for (i = 0; i < At_nnz; i++)
4108 {
4109 At_data[i] *= D(At_j[i]);
4110 }
4111 SparseMatrix *AtDA = Mult (*At, A, OAtDA);
4112 delete At;
4113 return AtDA;
4114}
4115
4117 const SparseMatrix & B)
4118{
4119 int nrows = A.Height();
4120 int ncols = A.Width();
4121
4122 int * C_i = Memory<int>(nrows+1);
4123 int * C_j;
4124 real_t * C_data;
4125
4126 const int *A_i = A.HostReadI();
4127 const int *A_j = A.HostReadJ();
4128 const real_t *A_data = A.HostReadData();
4129
4130 const int *B_i = B.HostReadI();
4131 const int *B_j = B.HostReadJ();
4132 const real_t *B_data = B.HostReadData();
4133
4134 int * marker = new int[ncols];
4135 std::fill(marker, marker+ncols, -1);
4136
4137 int num_nonzeros = 0, jcol;
4138 C_i[0] = 0;
4139 for (int ic = 0; ic < nrows; ic++)
4140 {
4141 for (int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4142 {
4143 jcol = A_j[ia];
4144 marker[jcol] = ic;
4145 num_nonzeros++;
4146 }
4147 for (int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4148 {
4149 jcol = B_j[ib];
4150 if (marker[jcol] != ic)
4151 {
4152 marker[jcol] = ic;
4153 num_nonzeros++;
4154 }
4155 }
4156 C_i[ic+1] = num_nonzeros;
4157 }
4158
4159 C_j = Memory<int>(num_nonzeros);
4160 C_data = Memory<real_t>(num_nonzeros);
4161
4162 for (int ia = 0; ia < ncols; ia++)
4163 {
4164 marker[ia] = -1;
4165 }
4166
4167 int pos = 0;
4168 for (int ic = 0; ic < nrows; ic++)
4169 {
4170 for (int ia = A_i[ic]; ia < A_i[ic+1]; ia++)
4171 {
4172 jcol = A_j[ia];
4173 C_j[pos] = jcol;
4174 C_data[pos] = a*A_data[ia];
4175 marker[jcol] = pos;
4176 pos++;
4177 }
4178 for (int ib = B_i[ic]; ib < B_i[ic+1]; ib++)
4179 {
4180 jcol = B_j[ib];
4181 if (marker[jcol] < C_i[ic])
4182 {
4183 C_j[pos] = jcol;
4184 C_data[pos] = b*B_data[ib];
4185 marker[jcol] = pos;
4186 pos++;
4187 }
4188 else
4189 {
4190 C_data[marker[jcol]] += b*B_data[ib];
4191 }
4192 }
4193 }
4194
4195 delete[] marker;
4196 return new SparseMatrix(C_i, C_j, C_data, nrows, ncols);
4197}
4198
4200{
4201 return Add(1.,A,1.,B);
4202}
4203
4205{
4206 MFEM_ASSERT(Ai.Size() > 0, "invalid size Ai.Size() = " << Ai.Size());
4207
4208 SparseMatrix * accumulate = Ai[0];
4209 SparseMatrix * result = accumulate;
4210
4211 for (int i=1; i < Ai.Size(); ++i)
4212 {
4213 result = Add(*accumulate, *Ai[i]);
4214 if (i != 1)
4215 {
4216 delete accumulate;
4217 }
4218
4219 accumulate = result;
4220 }
4221
4222 return result;
4223}
4224
4225/// B += alpha * A
4226void Add(const SparseMatrix &A,
4228{
4229 for (int r = 0; r < B.Height(); r++)
4230 {
4231 const int * colA = A.GetRowColumns(r);
4232 const real_t * valA = A.GetRowEntries(r);
4233 for (int i=0; i<A.RowSize(r); i++)
4234 {
4235 B(r, colA[i]) += alpha * valA[i];
4236 }
4237 }
4238}
4239
4240/// Produces a block matrix with blocks A_{ij}*B
4242{
4243 int mA = A.Height(), nA = A.Width();
4244 int mB = B.Height(), nB = B.Width();
4245
4246 DenseMatrix *C = new DenseMatrix(mA * mB, nA * nB);
4247 *C = 0.0;
4248 for (int i=0; i<mA; i++)
4249 {
4250 for (int j=0; j<nA; j++)
4251 {
4252 C->AddMatrix(A(i,j), B, i * mB, j * nB);
4253 }
4254 }
4255 return C;
4256}
4257
4258/// Produces a block matrix with blocks A_{ij}*B
4260{
4261 int mA = A.Height(), nA = A.Width();
4262 int mB = B.Height(), nB = B.Width();
4263
4264 SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);
4265
4266 for (int i=0; i<mA; i++)
4267 {
4268 for (int j=0; j<nA; j++)
4269 {
4270 for (int r=0; r<mB; r++)
4271 {
4272 const int * colB = B.GetRowColumns(r);
4273 const real_t * valB = B.GetRowEntries(r);
4274
4275 for (int cj=0; cj<B.RowSize(r); cj++)
4276 {
4277 C->Set(i * mB + r, j * nB + colB[cj], A(i,j) * valB[cj]);
4278 }
4279 }
4280 }
4281 }
4282 C->Finalize();
4283
4284 return C;
4285}
4286
4287/// Produces a block matrix with blocks A_{ij}*B
4289{
4290 int mA = A.Height(), nA = A.Width();
4291 int mB = B.Height(), nB = B.Width();
4292
4293 SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);
4294
4295 for (int r=0; r<mA; r++)
4296 {
4297 const int * colA = A.GetRowColumns(r);
4298 const real_t * valA = A.GetRowEntries(r);
4299
4300 for (int aj=0; aj<A.RowSize(r); aj++)
4301 {
4302 for (int i=0; i<mB; i++)
4303 {
4304 for (int j=0; j<nB; j++)
4305 {
4306 C->Set(r * mB + i, colA[aj] * nB + j, valA[aj] * B(i, j));
4307 }
4308 }
4309 }
4310 }
4311 C->Finalize();
4312
4313 return C;
4314}
4315
4316/// Produces a block matrix with blocks A_{ij}*B
4318{
4319 int mA = A.Height(), nA = A.Width();
4320 int mB = B.Height(), nB = B.Width();
4321
4322 SparseMatrix *C = new SparseMatrix(mA * mB, nA * nB);
4323
4324 for (int ar=0; ar<mA; ar++)
4325 {
4326 const int * colA = A.GetRowColumns(ar);
4327 const real_t * valA = A.GetRowEntries(ar);
4328
4329 for (int aj=0; aj<A.RowSize(ar); aj++)
4330 {
4331 for (int br=0; br<mB; br++)
4332 {
4333 const int * colB = B.GetRowColumns(br);
4334 const real_t * valB = B.GetRowEntries(br);
4335
4336 for (int bj=0; bj<B.RowSize(br); bj++)
4337 {
4338 C->Set(ar * mB + br, colA[aj] * nB + colB[bj],
4339 valA[aj] * valB[bj]);
4340 }
4341 }
4342 }
4343 }
4344 C->Finalize();
4345
4346 return C;
4347}
4348
4350{
4351 mfem::Swap(width, other.width);
4352 mfem::Swap(height, other.height);
4353 mfem::Swap(I, other.I);
4354 mfem::Swap(J, other.J);
4355 mfem::Swap(A, other.A);
4356 mfem::Swap(Rows, other.Rows);
4358 mfem::Swap(ColPtrJ, other.ColPtrJ);
4360 mfem::Swap(At, other.At);
4361
4362#ifdef MFEM_USE_MEMALLOC
4364#endif
4365
4367}
4368
4370{
4371 Destroy();
4372#ifdef MFEM_USE_CUDA_OR_HIP
4374 {
4375 if (SparseMatrixCount==1)
4376 {
4377 if (handle)
4378 {
4379 MFEM_cu_or_hip(sparseDestroy)(handle);
4380 handle = nullptr;
4381 }
4382 if (dBuffer)
4383 {
4384 MFEM_Cu_or_Hip(MemFree)(dBuffer);
4385 dBuffer = nullptr;
4386 bufferSize = 0;
4387 }
4388 }
4390 }
4391#endif // MFEM_USE_CUDA_OR_HIP
4392}
4393
4394}
Abstract data type for sparse matrices.
Definition matrix.hpp:74
virtual int NumNonZeroElems() const =0
Returns the number of non-zeros in a matrix.
virtual int GetRow(const int row, Array< int > &cols, Vector &srow) const =0
Gets the columns indexes and values for row row.
Dynamic 2D array using row-major layout.
Definition array.hpp:392
int NumCols() const
Definition array.hpp:408
int NumRows() const
Definition array.hpp:407
Memory< T > & GetMemory()
Return a reference to the Memory object used by the Array.
Definition array.hpp:126
T Max() const
Find the maximal element in the array, using the comparison operator < for class T.
Definition array.cpp:68
void Sort()
Sorts the array in ascending order. This requires operator< to be defined for T.
Definition array.hpp:278
void SetSize(int nsize)
Change the logical size of the array, keep existing entries.
Definition array.hpp:758
int Size() const
Return the logical size of the array.
Definition array.hpp:147
void MakeRef(T *data_, int size_, bool own_data=false)
Make this Array a reference to a pointer.
Definition array.hpp:943
T * Write(bool on_dev=true)
Shortcut for mfem::Write(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:345
const T * Read(bool on_dev=true) const
Shortcut for mfem::Read(a.GetMemory(), a.Size(), on_dev).
Definition array.hpp:337
Data type dense matrix using column-major storage.
Definition densemat.hpp:24
void GetColumnReference(int c, Vector &col)
Definition densemat.hpp:319
void SetSize(int s)
Change the size of the DenseMatrix to s x s.
Definition densemat.hpp:108
void AddMatrix(DenseMatrix &A, int ro, int co)
Perform (ro+i,co+j)+=A(i,j) for 0<=i.
static bool Allows(unsigned long b_mask)
Return true if any of the backends in the backend mask, b_mask, are allowed.
Definition device.hpp:259
static MemoryType GetDeviceMemoryType()
Get the current Device MemoryType. This is the MemoryType used by most MFEM classes when allocating m...
Definition device.hpp:274
Abstract data type for matrix inverse.
Definition matrix.hpp:63
size_t MemoryUsage() const
Class used by MFEM to store pointers to host and/or device memory.
void SetHostPtrOwner(bool own) const
Set/clear the ownership flag for the host pointer. Ownership indicates whether the pointer will be de...
int Capacity() const
Return the size of the allocated memory.
MemoryType GetMemoryType() const
Return a MemoryType that is currently valid. If both the host and the device pointers are currently v...
bool Empty() const
Return true if the Memory object is empty, see Reset().
void CopyFrom(const Memory &src, int size)
Copy size entries from src to *this.
void Reset()
Reset the memory to be empty, ensuring that Delete() will be a no-op.
void Wrap(T *ptr, int size, bool own)
Wrap an externally allocated host pointer, ptr with the current host memory type returned by MemoryMa...
void Delete()
Delete the owned pointers and reset the Memory object.
void ClearOwnerFlags() const
Clear the ownership flags for the host and device pointers, as well as any internal data allocated by...
void New(int size)
Allocate host memory for size entries with the current host memory type returned by MemoryManager::Ge...
int width
Dimension of the input / number of columns in the matrix.
Definition operator.hpp:28
int Height() const
Get the height (size of output) of the Operator. Synonym with NumRows().
Definition operator.hpp:66
int height
Dimension of the output / number of rows in the matrix.
Definition operator.hpp:27
DiagonalPolicy
Defines operator diagonal policy upon elimination of rows and/or columns.
Definition operator.hpp:48
@ DIAG_ONE
Set the diagonal value to one.
Definition operator.hpp:50
@ DIAG_KEEP
Keep the diagonal value.
Definition operator.hpp:51
@ DIAG_ZERO
Set the diagonal value to zero.
Definition operator.hpp:49
int Width() const
Get the width (size of input) of the Operator. Synonym with NumCols().
Definition operator.hpp:72
RowNode * Prev
Definition sparsemat.hpp:45
Data type sparse matrix.
Definition sparsemat.hpp:51
int GetRow(const int row, Array< int > &cols, Vector &srow) const override
Extract all column indices and values from a given row.
void PrintCSR2(std::ostream &out) const
Prints a sparse matrix to stream out in CSR format.
real_t MaxNorm() const
const int * HostReadJ() const
void GetDiag(Vector &d) const
Returns the Diagonal of A.
real_t GetRowNorml1(int irow) const
For i = irow compute .
void PartMult(const Array< int > &rows, const Vector &x, Vector &y) const
bool isSorted
Are the columns sorted already.
Definition sparsemat.hpp:88
RowNodeAlloc * NodesMem
Definition sparsemat.hpp:84
void Gauss_Seidel_back(const Vector &x, Vector &y) const
bool RowIsEmpty(const int row) const
void ClearColPtr() const
Reset the "current row" set by calling SetColPtr(). This method must be called between any two calls ...
MatrixInverse * Inverse() const override
This virtual method is not supported: it always returns NULL.
void Jacobi3(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
real_t * ReadWriteData(bool on_dev=true)
Memory< real_t > A
Array with size I[height], containing the actual entries of the sparse matrix, as indexed by the I ar...
Definition sparsemat.hpp:68
void EliminateRowCol(int rc, const real_t sol, Vector &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Eliminate row rc and column rc and modify the rhs using sol.
SparseMatrix & operator*=(real_t a)
void MultTranspose(const Vector &x, Vector &y) const override
Multiply a vector with the transposed matrix. y = At * x.
void SetWidth(int width_=-1)
Change the width of a SparseMatrix.
MemAlloc< RowNode, 1024 > RowNodeAlloc
Definition sparsemat.hpp:83
bool Empty() const
Check if the SparseMatrix is empty.
void GetSubMatrix(const Array< int > &rows, const Array< int > &cols, DenseMatrix &subm) const
SparseMatrix * At
Transpose of A. Owned. Used to perform MultTranspose() on devices.
Definition sparsemat.hpp:80
void PartAddMult(const Array< int > &rows, const Vector &x, Vector &y, const real_t a=1.0) const
void GetRowSums(Vector &x) const
For all i compute .
int NumNonZeroElems() const override
Returns the number of the nonzero elements in the matrix.
bool Finalized() const
Returns whether or not CSR format has been finalized.
void EliminateBC(const Array< int > &ess_dofs, DiagonalPolicy diag_policy)
Eliminate essential (Dirichlet) boundary conditions.
void PrintInfo(std::ostream &out) const
Print various sparse matrix statistics.
void MoveDiagonalFirst()
Move the diagonal entry to the first position in each row, preserving the order of the rest of the co...
void Add(const int i, const int j, const real_t val)
void EliminateRowColMultipleRHS(int rc, const Vector &sol, DenseMatrix &rhs, DiagonalPolicy dpolicy=DIAG_ONE)
Similar to EliminateRowCol(int, const double, Vector &, DiagonalPolicy), but multiple values for elim...
void Jacobi(const Vector &b, const Vector &x0, Vector &x1, real_t sc, bool use_abs_diag=false) const
void SetColPtr(const int row) const
Initialize the SparseMatrix for fast access to the entries of the given row which becomes the "curren...
void SetDiagIdentity()
If a row contains only one diag entry of zero, set it to 1.
void AbsMultTranspose(const Vector &x, Vector &y) const
y = |At| * x, using entry-wise absolute values of the transpose of matrix A
void Gauss_Seidel_forw(const Vector &x, Vector &y) const
Gauss-Seidel forward and backward iterations over a vector x.
void MakeRef(const SparseMatrix &master)
Clear the contents of the SparseMatrix and make it a reference to master.
int CheckFinite() const
Count the number of entries that are NOT finite, i.e. Inf or Nan.
Memory< int > J
Array with size I[height], containing the column indices for all matrix entries, as indexed by the I ...
Definition sparsemat.hpp:65
void EliminateCol(int col, DiagonalPolicy dpolicy=DIAG_ZERO)
Eliminates the column col from the matrix.
void EnsureMultTranspose() const
Ensures that the matrix is capable of performing MultTranspose(), AddMultTranspose(),...
void Print(std::ostream &out=mfem::out, int width_=4) const override
Prints matrix to stream out.
void _Add_(const int col, const real_t a)
Add a value to an entry in the "current row". See SetColPtr().
SparseMatrix()
Create an empty SparseMatrix.
cusparseSpMatDescr_t matA_descr
real_t & Elem(int i, int j) override
Returns reference to a_{ij}.
virtual void PrintMathematica(std::ostream &out=mfem::out) const
Prints matrix as a SparseArray for importing into Mathematica.
void AbsMult(const Vector &x, Vector &y) const
y = |A| * x, using entry-wise absolute values of matrix A
void ClearGPUSparse()
Clear the cuSPARSE/hipSPARSE descriptors. This must be called after releasing the device memory of A.
Definition sparsemat.cpp:81
void Threshold(real_t tol, bool fix_empty_rows=false)
Remove entries smaller in absolute value than a given tolerance tol. If fix_empty_rows is true,...
void SetSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void PrintMatlab(std::ostream &out=mfem::out) const override
Prints matrix in matlab format.
const int * ReadI(bool on_dev=true) const
real_t InnerProduct(const Vector &x, const Vector &y) const
Compute y^t A x.
Memory< int > I
Array with size (height+1) containing the row offsets.
Definition sparsemat.hpp:62
real_t & operator()(int i, int j)
Returns reference to A[i][j].
const real_t * HostReadData() const
void PrintMM(std::ostream &out=mfem::out) const
Prints matrix in Matrix Market sparse format.
void ScaleColumns(const Vector &sr)
this = this * diag(sr);
void PrintCSR(std::ostream &out) const
Prints matrix to stream out in hypre_CSRMatrix format.
void Swap(SparseMatrix &other)
int * ReadWriteJ(bool on_dev=true)
cusparseDnVecDescr_t vecY_descr
void BooleanMultTranspose(const Array< int > &x, Array< int > &y) const
y = At * x, treating all entries as booleans (zero=false, nonzero=true).
static size_t bufferSize
RowNode ** Rows
Array of linked lists, one for every row. This array represents the linked list (LIL) storage format.
Definition sparsemat.hpp:73
real_t & SearchRow(const int col)
Perform a fast search for an entry in the "current row". See SetColPtr().
static int SparseMatrixCount
void AddSubMatrix(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void Symmetrize()
(*this) = 1/2 ((*this) + (*this)^t)
void BooleanMult(const Array< int > &x, Array< int > &y) const
y = A * x, treating all entries as booleans (zero=false, nonzero=true).
void AddMultTranspose(const Vector &x, Vector &y, const real_t a=1.0) const override
y += At * x (default) or y += a * At * x
int * GetRowColumns(const int row)
Return a pointer to the column indices in a row.
real_t GetJacobiScaling() const
Determine appropriate scaling for Jacobi iteration.
RowNode ** ColPtrNode
Definition sparsemat.hpp:77
int RowSize(const int i) const
Returns the number of elements in row i.
real_t * HostReadWriteData()
int CountSmallElems(real_t tol) const
Count the number of entries with |a_ij| <= tol.
const int * ReadJ(bool on_dev=true) const
int MaxRowSize() const
Returns the maximum number of elements among all rows.
real_t _Get_(const int col) const
Read the value of an entry in the "current row". See SetColPtr().
void AddMult(const Vector &x, Vector &y, const real_t a=1.0) const override
y += A * x (default) or y += a * A * x
virtual ~SparseMatrix()
Destroys sparse matrix.
SparseMatrix & operator=(const SparseMatrix &rhs)
Assignment operator: deep copy.
void Mult(const Vector &x, Vector &y) const override
Matrix vector multiplication.
void OverrideSize(int height_, int width_)
Sets the height and width of the matrix.
real_t * HostWriteData()
static cusparseHandle_t handle
void EliminateRow(int row, const real_t sol, Vector &rhs)
Eliminates a column from the transpose matrix.
void GetBlocks(Array2D< SparseMatrix * > &blocks) const
SparseMatrix & operator+=(const SparseMatrix &B)
Add the sparse matrix 'B' to '*this'. This operation will cause an error if '*this' is finalized and ...
void Clear()
Clear the contents of the SparseMatrix.
void ResetTranspose() const
void EliminateRowColDiag(int rc, real_t value)
Perform elimination and set the diagonal entry to the given value.
real_t * GetRowEntries(const int row)
Return a pointer to the entries in a row.
void ScaleRows(const Vector &sl)
this = diag(sl) * this;
void SetRow(const int row, const Array< int > &cols, const Vector &srow)
void DiagScale(const Vector &b, Vector &x, real_t sc=1.0, bool use_abs_diag=false) const
x = sc b / A_ii. When use_abs_diag = true, |A_ii| is used.
real_t * GetData()
Return the element data, i.e. the array A.
void SetSubMatrixTranspose(const Array< int > &rows, const Array< int > &cols, const DenseMatrix &subm, int skip_zeros=1)
void BuildTranspose() const
Build and store internally the transpose of this matrix which will be used in the methods AddMultTran...
void SortColumnIndices()
Sort the column indices corresponding to each row.
void Finalize(int skip_zeros=1) override
Finalize the matrix initialization, switching the storage format from LIL to CSR.
real_t IsSymmetric() const
Returns max_{i,j} |(i,j)-(j,i)| for a finalized matrix.
DenseMatrix * ToDenseMatrix() const
Produces a DenseMatrix from a SparseMatrix.
void _Set_(const int col, const real_t a)
Set an entry in the "current row". See SetColPtr().
static void * dBuffer
void EliminateZeroRows(const real_t threshold=1e-12) override
If a row contains only zeros, set its diagonal to 1.
void EliminateCols(const Array< int > &cols, const Vector *x=NULL, Vector *b=NULL)
Eliminate all columns i for which cols[i] != 0.
cusparseDnVecDescr_t vecX_descr
const real_t * ReadData(bool on_dev=true) const
void ScaleRow(const int row, const real_t scale)
void AddRow(const int row, const Array< int > &cols, const Vector &srow)
void Jacobi2(const Vector &b, const Vector &x0, Vector &x1, real_t sc=1.0) const
int ActualWidth() const
Returns the actual Width of the matrix.
const int * HostReadI() const
void Set(const int i, const int j, const real_t val)
Vector data type.
Definition vector.hpp:82
virtual const real_t * HostRead() const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:498
virtual const real_t * Read(bool on_dev=true) const
Shortcut for mfem::Read(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:494
void Neg()
(*this) = -(*this)
Definition vector.cpp:375
virtual real_t * ReadWrite(bool on_dev=true)
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:510
int Size() const
Returns the size of the vector.
Definition vector.hpp:226
virtual void UseDevice(bool use_dev) const
Enable execution of Vector operations using the mfem::Device.
Definition vector.hpp:144
void SetSize(int s)
Resize the vector to size s.
Definition vector.hpp:558
void NewDataAndSize(real_t *d, int s)
Set the Vector data and size, deleting the old data, if owned.
Definition vector.hpp:189
real_t * GetData() const
Return a pointer to the beginning of the Vector data.
Definition vector.hpp:235
virtual real_t * HostReadWrite()
Shortcut for mfem::ReadWrite(vec.GetMemory(), vec.Size(), false).
Definition vector.hpp:514
virtual real_t * Write(bool on_dev=true)
Shortcut for mfem::Write(vec.GetMemory(), vec.Size(), on_dev).
Definition vector.hpp:502
Vector beta
const real_t alpha
Definition ex15.cpp:369
real_t b
Definition lissajous.cpp:42
real_t a
Definition lissajous.cpp:41
void * CuMemAlloc(void **dptr, size_t bytes)
Allocates device memory and returns destination ptr.
Definition cuda.cpp:34
void * CuMemFree(void *dptr)
Frees device memory and returns destination ptr.
Definition cuda.cpp:79
DenseMatrix * OuterProduct(const DenseMatrix &A, const DenseMatrix &B)
Produces a block matrix with blocks A_{ij}*B.
const T * Read(const Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for read access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:341
void Swap(Array< T > &, Array< T > &)
Definition array.hpp:672
real_t infinity()
Define a shortcut for std::numeric_limits<double>::infinity()
Definition vector.hpp:47
void mfem_error(const char *msg)
Definition error.cpp:154
void Mult(const Table &A, const Table &B, Table &C)
C = A * B (as boolean matrices)
Definition table.cpp:548
const T * HostRead(const Memory< T > &mem, int size)
Shortcut to Read(const Memory<T> &mem, int size, false)
Definition device.hpp:348
T * Write(Memory< T > &mem, int size, bool on_dev=true)
Get a pointer for write access to mem with the mfem::Device's DeviceMemoryClass, if on_dev = true,...
Definition device.hpp:358
bool IsFinite(const real_t &val)
Definition vector.hpp:527
void Transpose(const Table &A, Table &At, int ncols_A_)
Transpose a Table.
Definition table.cpp:486
void RAP(const DenseMatrix &A, const DenseMatrix &P, DenseMatrix &RAP)
int CheckFinite(const real_t *v, const int n)
Definition vector.hpp:538
T * HostWrite(Memory< T > &mem, int size)
Shortcut to Write(const Memory<T> &mem, int size, false)
Definition device.hpp:365
SparseMatrix * MultAbstractSparseMatrix(const AbstractSparseMatrix &A, const AbstractSparseMatrix &B)
Matrix product of sparse matrices. A and B do not need to be CSR matrices.
void * HipMemFree(void *dptr)
Frees device memory.
Definition hip.cpp:79
SparseMatrix * TransposeAbstractSparseMatrix(const AbstractSparseMatrix &A, int useActualWidth)
Transpose of a sparse matrix. A does not need to be a CSR matrix.
SparseMatrix * Mult_AtDA(const SparseMatrix &A, const Vector &D, SparseMatrix *OAtDA)
Matrix multiplication A^t D A. All matrices must be finalized.
float real_t
Definition config.hpp:43
MemoryType
Memory types supported by MFEM.
std::function< real_t(const Vector &)> f(real_t mass_coeff)
Definition lor_mms.hpp:30
SparseMatrix * TransposeMult(const SparseMatrix &A, const SparseMatrix &B)
C = A^T B.
void * HipMemAlloc(void **dptr, size_t bytes)
Allocates device memory.
Definition hip.cpp:34
void forall(int N, lambda &&body)
Definition forall.hpp:753
void forall_switch(bool use_dev, int N, lambda &&body)
Definition forall.hpp:756
void Add(const DenseMatrix &A, const DenseMatrix &B, real_t alpha, DenseMatrix &C)
C = A + alpha*B.
void SparseMatrixFunction(SparseMatrix &S, real_t(*f)(real_t))
Applies f() to each element of the matrix (after it is finalized).
@ HIP_MASK
Biwise-OR of all HIP backends.
Definition device.hpp:91
@ CPU_MASK
Biwise-OR of all CPU backends.
Definition device.hpp:87
@ CUDA_MASK
Biwise-OR of all CUDA backends.
Definition device.hpp:89