|  | // This file is part of Eigen, a lightweight C++ template library | 
|  | // for linear algebra. | 
|  | // | 
|  | // Copyright (C) 2022 Julian Kent <jkflying@gmail.com> | 
|  | // | 
|  | // This Source Code Form is subject to the terms of the Mozilla | 
|  | // Public License v. 2.0. If a copy of the MPL was not distributed | 
|  | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
|  |  | 
|  | #ifndef EIGEN_SPARSEINVERSE_H | 
|  | #define EIGEN_SPARSEINVERSE_H | 
|  |  | 
|  | // IWYU pragma: private | 
|  | #include "./InternalHeaderCheck.h" | 
|  |  | 
|  | #include "../../../../Eigen/Sparse" | 
|  | #include "../../../../Eigen/SparseLU" | 
|  |  | 
|  | namespace Eigen { | 
|  |  | 
|  | /** | 
|  | * @brief Kahan algorithm based accumulator | 
|  | * | 
|  | * The Kahan sum algorithm guarantees to bound the error from floating point | 
|  | * accumulation to a fixed value, regardless of the number of accumulations | 
|  | * performed. Naive accumulation accumulates errors O(N), and pairwise O(logN). | 
|  | * However pairwise also requires O(logN) memory while Kahan summation requires | 
|  | * O(1) memory, but 4x the operations / latency. | 
|  | * | 
|  | * NB! Do not enable associative math optimizations, they may cause the Kahan | 
|  | * summation to be optimized out leaving you with naive summation again. | 
|  | * | 
|  | */ | 
|  | template <typename Scalar> | 
|  | class KahanSum { | 
|  | // Straighforward Kahan summation for accurate accumulation of a sum of numbers | 
|  | Scalar _sum{}; | 
|  | Scalar _correction{}; | 
|  |  | 
|  | public: | 
|  | Scalar value() { return _sum; } | 
|  |  | 
|  | void operator+=(Scalar increment) { | 
|  | const Scalar correctedIncrement = increment + _correction; | 
|  | const Scalar previousSum = _sum; | 
|  | _sum += correctedIncrement; | 
|  | _correction = correctedIncrement - (_sum - previousSum); | 
|  | } | 
|  | }; | 
|  | template <typename Scalar, Index Width = 16> | 
|  | class FABSum { | 
|  | // https://epubs.siam.org/doi/pdf/10.1137/19M1257780 | 
|  | // Fast and Accurate Blocked Summation | 
|  | // Uses naive summation for the fast sum, and Kahan summation for the accurate sum | 
|  | // Theoretically SIMD sum could be changed to a tree sum which would improve accuracy | 
|  | // over naive summation | 
|  | KahanSum<Scalar> _totalSum; | 
|  | Matrix<Scalar, Width, 1> _block; | 
|  | Index _blockUsed{}; | 
|  |  | 
|  | public: | 
|  | Scalar value() { return _block.topRows(_blockUsed).sum() + _totalSum.value(); } | 
|  |  | 
|  | void operator+=(Scalar increment) { | 
|  | _block(_blockUsed++, 0) = increment; | 
|  | if (_blockUsed == Width) { | 
|  | _totalSum += _block.sum(); | 
|  | _blockUsed = 0; | 
|  | } | 
|  | } | 
|  | }; | 
|  |  | 
|  | /** | 
|  | * @brief computes an accurate dot product on two sparse vectors | 
|  | * | 
|  | * Uses an accurate summation algorithm for the accumulator in order to | 
|  | * compute an accurate dot product for two sparse vectors. | 
|  | * | 
|  | */ | 
|  | template <typename Derived, typename OtherDerived> | 
|  | typename Derived::Scalar accurateDot(const SparseMatrixBase<Derived>& A, const SparseMatrixBase<OtherDerived>& other) { | 
|  | typedef typename Derived::Scalar Scalar; | 
|  | EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) | 
|  | EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) | 
|  | EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived) | 
|  | static_assert(internal::is_same<Scalar, typename OtherDerived::Scalar>::value, "mismatched types"); | 
|  |  | 
|  | internal::evaluator<Derived> thisEval(A.derived()); | 
|  | typename Derived::ReverseInnerIterator i(thisEval, 0); | 
|  |  | 
|  | internal::evaluator<OtherDerived> otherEval(other.derived()); | 
|  | typename OtherDerived::ReverseInnerIterator j(otherEval, 0); | 
|  |  | 
|  | FABSum<Scalar> res; | 
|  | while (i && j) { | 
|  | if (i.index() == j.index()) { | 
|  | res += numext::conj(i.value()) * j.value(); | 
|  | --i; | 
|  | --j; | 
|  | } else if (i.index() > j.index()) | 
|  | --i; | 
|  | else | 
|  | --j; | 
|  | } | 
|  | return res.value(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * @brief calculate sparse subset of inverse of sparse matrix | 
|  | * | 
|  | * This class returns a sparse subset of the inverse of the input matrix. | 
|  | * The nonzeros correspond to the nonzeros of the input, plus any additional | 
|  | * elements required due to fill-in of the internal LU factorization. This is | 
|  | * is minimized via a applying a fill-reducing permutation as part of the LU | 
|  | * factorization. | 
|  | * | 
|  | * If there are specific entries of the input matrix which you need inverse | 
|  | * values for, which are zero for the input, you need to insert entries into | 
|  | * the input sparse matrix for them to be calculated. | 
|  | * | 
|  | * Due to the sensitive nature of matrix inversion, particularly on large | 
|  | * matrices which are made possible via sparsity, high accuracy dot products | 
|  | * based on Kahan summation are used to reduce numerical error. If you still | 
|  | * encounter numerical errors you may with to equilibrate your matrix before | 
|  | * calculating the inverse, as well as making sure it is actually full rank. | 
|  | */ | 
|  | template <typename Scalar> | 
|  | class SparseInverse { | 
|  | public: | 
|  | typedef SparseMatrix<Scalar, ColMajor> MatrixType; | 
|  | typedef SparseMatrix<Scalar, RowMajor> RowMatrixType; | 
|  |  | 
|  | SparseInverse() {} | 
|  |  | 
|  | /** | 
|  | * @brief This Constructor is for if you already have a factored SparseLU and would like to use it to calculate a | 
|  | * sparse inverse. | 
|  | * | 
|  | * Just call this constructor with your already factored SparseLU class and you can directly call the .inverse() | 
|  | * method to get the result. | 
|  | */ | 
|  | SparseInverse(const SparseLU<MatrixType>& slu) { _result = computeInverse(slu); } | 
|  |  | 
|  | /** | 
|  | * @brief Calculate the sparse inverse from a given sparse input | 
|  | */ | 
|  | SparseInverse& compute(const SparseMatrix<Scalar>& A) { | 
|  | SparseLU<MatrixType> slu; | 
|  | slu.compute(A); | 
|  | _result = computeInverse(slu); | 
|  | return *this; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * @brief return the already-calculated sparse inverse, or a 0x0 matrix if it could not be computed | 
|  | */ | 
|  | const MatrixType& inverse() const { return _result; } | 
|  |  | 
|  | /** | 
|  | * @brief Internal function to calculate the sparse inverse in a functional way | 
|  | * @return A sparse inverse representation, or, if the decomposition didn't complete, a 0x0 matrix. | 
|  | */ | 
|  | static MatrixType computeInverse(const SparseLU<MatrixType>& slu) { | 
|  | if (slu.info() != Success) { | 
|  | return MatrixType(0, 0); | 
|  | } | 
|  |  | 
|  | // Extract from SparseLU and decompose into L, inverse D and U terms | 
|  | Matrix<Scalar, Dynamic, 1> invD; | 
|  | RowMatrixType Upper; | 
|  | { | 
|  | RowMatrixType DU = slu.matrixU().toSparse(); | 
|  | invD = DU.diagonal().cwiseInverse(); | 
|  | Upper = (invD.asDiagonal() * DU).template triangularView<StrictlyUpper>(); | 
|  | } | 
|  | MatrixType Lower = slu.matrixL().toSparse().template triangularView<StrictlyLower>(); | 
|  |  | 
|  | // Compute the inverse and reapply the permutation matrix from the LU decomposition | 
|  | return slu.colsPermutation().transpose() * computeInverse(Upper, invD, Lower) * slu.rowsPermutation(); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * @brief Internal function to calculate the inverse from strictly upper, diagonal and strictly lower components | 
|  | */ | 
|  | static MatrixType computeInverse(const RowMatrixType& Upper, const Matrix<Scalar, Dynamic, 1>& inverseDiagonal, | 
|  | const MatrixType& Lower) { | 
|  | // Calculate the 'minimal set', which is the nonzeros of (L+U).transpose() | 
|  | // It could be zeroed, but we will overwrite all non-zeros anyways. | 
|  | MatrixType colInv = Lower.transpose().template triangularView<UnitUpper>(); | 
|  | colInv += Upper.transpose(); | 
|  |  | 
|  | // We also need rowmajor representation in order to do efficient row-wise dot products | 
|  | RowMatrixType rowInv = Upper.transpose().template triangularView<UnitLower>(); | 
|  | rowInv += Lower.transpose(); | 
|  |  | 
|  | // Use the Takahashi algorithm to build the supporting elements of the inverse | 
|  | // upwards and to the left, from the bottom right element, 1 col/row at a time | 
|  | for (Index recurseLevel = Upper.cols() - 1; recurseLevel >= 0; recurseLevel--) { | 
|  | const auto& col = Lower.col(recurseLevel); | 
|  | const auto& row = Upper.row(recurseLevel); | 
|  |  | 
|  | // Calculate the inverse values for the nonzeros in this column | 
|  | typename MatrixType::ReverseInnerIterator colIter(colInv, recurseLevel); | 
|  | for (; recurseLevel < colIter.index(); --colIter) { | 
|  | const Scalar element = -accurateDot(col, rowInv.row(colIter.index())); | 
|  | colIter.valueRef() = element; | 
|  | rowInv.coeffRef(colIter.index(), recurseLevel) = element; | 
|  | } | 
|  |  | 
|  | // Calculate the inverse values for the nonzeros in this row | 
|  | typename RowMatrixType::ReverseInnerIterator rowIter(rowInv, recurseLevel); | 
|  | for (; recurseLevel < rowIter.index(); --rowIter) { | 
|  | const Scalar element = -accurateDot(row, colInv.col(rowIter.index())); | 
|  | rowIter.valueRef() = element; | 
|  | colInv.coeffRef(recurseLevel, rowIter.index()) = element; | 
|  | } | 
|  |  | 
|  | // And finally the diagonal, which corresponds to both row and col iterator now | 
|  | const Scalar diag = inverseDiagonal(recurseLevel) - accurateDot(row, colInv.col(recurseLevel)); | 
|  | rowIter.valueRef() = diag; | 
|  | colIter.valueRef() = diag; | 
|  | } | 
|  |  | 
|  | return colInv; | 
|  | } | 
|  |  | 
|  | private: | 
|  | MatrixType _result; | 
|  | }; | 
|  |  | 
|  | }  // namespace Eigen | 
|  | #endif |