IncompleteLUT: static row matching to handle zero-diagonal matrices (#2626) libeigen/eigen!2475 Closes #2626 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index 3a84e55..61e8d3c 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@@ -142,7 +142,11 @@ /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was successful, - * \c NumericalIssue if the matrix.appears to be negative. + * \c NumericalIssue if a zero pivot was encountered during + * factorization (the resulting preconditioner is unlikely to be + * usable; the input matrix typically has zero diagonal entries + * that cannot be moved by a static row permutation, e.g. it is + * structurally singular). */ ComputationInfo info() const { eigen_assert(m_isInitialized && "IncompleteLUT is not initialized."); @@ -157,7 +161,11 @@ /** * Compute an incomplete LU factorization with dual threshold on the matrix mat - * No pivoting is done in this version + * No partial pivoting is done in this version. A static row permutation + * (maximum bipartite matching) is computed in \c analyzePattern so that + * the permuted matrix has a structurally nonzero diagonal whenever one + * exists; without it, the lack of pivoting makes ILUT silently produce + * a useless preconditioner on matrices with zero diagonal entries. * **/ template <typename MatrixType> @@ -172,7 +180,7 @@ template <typename Rhs, typename Dest> void _solve_impl(const Rhs& b, Dest& x) const { - x = m_Pinv * b; + x = m_PinvPr * b; x = m_lu.template triangularView<UnitLower>().solve(x); x = m_lu.template triangularView<Upper>().solve(x); x = m_P * x; @@ -184,6 +192,23 @@ inline bool operator()(const Index& row, const Index& col, const Scalar&) const { return row != col; } }; + template <typename MatrixType> + Index computeRowMatching(const MatrixType& amat); + + // Produce a column-major CSC sparsity pattern for `amat` (integers only — + // the scalar values are never read or copied). When `amat` is already a + // compressed column-major SparseMatrix, `outer`/`inner` point directly into + // its index storage; otherwise the pattern is materialized into the local + // `outer_buf`/`inner_buf` arrays and pointers are set into them. + static void patternColMajor(const SparseMatrix<Scalar, ColMajor, StorageIndex>& amat, + Matrix<StorageIndex, Dynamic, 1>& outer_buf, Matrix<StorageIndex, Dynamic, 1>& inner_buf, + const StorageIndex*& outer, const StorageIndex*& inner); + + template <typename MatrixType> + static void patternColMajor(const MatrixType& amat, Matrix<StorageIndex, Dynamic, 1>& outer_buf, + Matrix<StorageIndex, Dynamic, 1>& inner_buf, const StorageIndex*& outer, + const StorageIndex*& inner); + protected: FactorType m_lu; RealScalar m_droptol; @@ -191,8 +216,10 @@ bool m_analysisIsOk; bool m_factorizationIsOk; ComputationInfo m_info; - PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_P; // Fill-reducing permutation - PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pinv; // Inverse permutation + PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_P; // Fill-reducing permutation + PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pinv; // Inverse permutation + PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pr; // Static row permutation (matching-based) + PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_PinvPr; // Cached composition m_Pinv * m_Pr for solve }; /** @@ -235,21 +262,221 @@ return m_lu.template triangularView<Upper>(); } +// Specialization: amat is already a column-major SparseMatrix. +// Share its index storage directly when compressed; otherwise materialize the +// indices (without copying any scalar values) into outer_buf/inner_buf. +template <typename Scalar, typename StorageIndex> +void IncompleteLUT<Scalar, StorageIndex>::patternColMajor(const SparseMatrix<Scalar, ColMajor, StorageIndex>& amat, + Matrix<StorageIndex, Dynamic, 1>& outer_buf, + Matrix<StorageIndex, Dynamic, 1>& inner_buf, + const StorageIndex*& outer, const StorageIndex*& inner) { + if (amat.isCompressed()) { + outer = amat.outerIndexPtr(); + inner = amat.innerIndexPtr(); + return; + } + const Index n = amat.cols(); + const StorageIndex* a_outer = amat.outerIndexPtr(); + const StorageIndex* a_inner_nz = amat.innerNonZeroPtr(); + const StorageIndex* a_inner = amat.innerIndexPtr(); + outer_buf.resize(n + 1); + outer_buf(0) = 0; + for (Index j = 0; j < n; ++j) outer_buf(j + 1) = outer_buf(j) + a_inner_nz[j]; + inner_buf.resize(outer_buf(n)); + for (Index j = 0; j < n; ++j) { + const StorageIndex* src = a_inner + a_outer[j]; + std::copy(src, src + a_inner_nz[j], inner_buf.data() + outer_buf(j)); + } + outer = outer_buf.data(); + inner = inner_buf.data(); +} + +// Generic fallback: any other sparse input (row-major, expressions). Build a +// column-major pattern via inner iterators — no scalar values are read. +template <typename Scalar, typename StorageIndex> +template <typename MatrixType_> +void IncompleteLUT<Scalar, StorageIndex>::patternColMajor(const MatrixType_& amat, + Matrix<StorageIndex, Dynamic, 1>& outer_buf, + Matrix<StorageIndex, Dynamic, 1>& inner_buf, + const StorageIndex*& outer, const StorageIndex*& inner) { + using internal::convert_index; + const Index n = amat.cols(); + outer_buf.setZero(n + 1); + for (Index i = 0; i < amat.outerSize(); ++i) + for (typename MatrixType_::InnerIterator it(amat, i); it; ++it) ++outer_buf(it.col() + 1); + for (Index j = 0; j < n; ++j) outer_buf(j + 1) += outer_buf(j); + inner_buf.resize(outer_buf(n)); + Matrix<StorageIndex, Dynamic, 1> head = outer_buf.head(n); + for (Index i = 0; i < amat.outerSize(); ++i) + for (typename MatrixType_::InnerIterator it(amat, i); it; ++it) + inner_buf(head(it.col())++) = convert_index<StorageIndex>(it.row()); + outer = outer_buf.data(); + inner = inner_buf.data(); +} + +// Compute a row permutation m_Pr such that (m_Pr * amat) has a structurally +// nonzero diagonal wherever one exists. Returns the number of matched columns. +// Uses a maximum bipartite cardinality matching on the sparsity pattern, with +// a greedy initialization that prefers the natural diagonal so that matrices +// already having a nonzero diagonal yield the identity permutation. +template <typename Scalar, typename StorageIndex> +template <typename MatrixType_> +Index IncompleteLUT<Scalar, StorageIndex>::computeRowMatching(const MatrixType_& amat) { + using internal::convert_index; + const Index n = amat.rows(); + // We only need the column-major sparsity pattern; never read scalar values. + // Share amat's index storage when it is already a compressed column-major + // SparseMatrix, otherwise build a value-free pattern into local arrays. + Matrix<StorageIndex, Dynamic, 1> outer_buf; + Matrix<StorageIndex, Dynamic, 1> inner_buf; + const StorageIndex* outer = nullptr; + const StorageIndex* inner = nullptr; + patternColMajor(amat, outer_buf, inner_buf, outer, inner); + + const StorageIndex kUnmatched = StorageIndex(-1); + // match_row[j] = original row matched to column j; match_col[i] = column matched to row i. + std::vector<StorageIndex> match_row(n, kUnmatched); + std::vector<StorageIndex> match_col(n, kUnmatched); + + // The matching uses the stored sparsity pattern only and is independent of + // numerical values. This preserves the analyzePattern/factorize contract: + // the same analysis is reusable for any matrix sharing this stored pattern. + // Phase 1: greedy diagonal preference. + for (Index j = 0; j < n; ++j) { + for (Index k = outer[j]; k < outer[j + 1]; ++k) { + if (Index(inner[k]) == j) { + match_row[j] = convert_index<StorageIndex>(j); + match_col[j] = convert_index<StorageIndex>(j); + break; + } + } + } + // Phase 2: greedy off-diagonal pickup of any free row. + for (Index j = 0; j < n; ++j) { + if (match_row[j] != kUnmatched) continue; + for (Index k = outer[j]; k < outer[j + 1]; ++k) { + Index i = inner[k]; + if (match_col[i] == kUnmatched) { + match_row[j] = convert_index<StorageIndex>(i); + match_col[i] = convert_index<StorageIndex>(j); + break; + } + } + } + // Phase 3: augmenting paths for any column still unmatched. + std::vector<StorageIndex> visited(n, kUnmatched); + // Iterative DFS: the stack frames are (column, edge index, chosen row). + // chosen_row[k] is the row that frame k will commit to if a path is found. + std::vector<Index> stack_col; + std::vector<Index> stack_pos; + std::vector<Index> stack_chosen_row; + stack_col.reserve(n); + stack_pos.reserve(n); + stack_chosen_row.reserve(n); + + for (Index start = 0; start < n; ++start) { + if (match_row[start] != kUnmatched) continue; + StorageIndex epoch = convert_index<StorageIndex>(start); + stack_col.clear(); + stack_pos.clear(); + stack_chosen_row.clear(); + stack_col.push_back(start); + stack_pos.push_back(outer[start]); + stack_chosen_row.push_back(-1); + + while (!stack_col.empty()) { + Index j = stack_col.back(); + Index pos = stack_pos.back(); + Index col_end = outer[j + 1]; + bool advanced = false; + + while (pos < col_end) { + Index i = inner[pos]; + ++pos; + if (visited[i] == epoch) continue; + visited[i] = epoch; + + if (match_col[i] == kUnmatched) { + // Found an augmenting path: commit it. + stack_chosen_row.back() = i; + stack_pos.back() = pos; + for (size_t k = 0; k < stack_col.size(); ++k) { + Index col = stack_col[k]; + Index row = stack_chosen_row[k]; + match_row[col] = convert_index<StorageIndex>(row); + match_col[row] = convert_index<StorageIndex>(col); + } + stack_col.clear(); + break; + } else { + // Descend into the column currently matched to row i. + stack_chosen_row.back() = i; + stack_pos.back() = pos; + Index next_col = match_col[i]; + stack_col.push_back(next_col); + stack_pos.push_back(outer[next_col]); + stack_chosen_row.push_back(-1); + advanced = true; + break; + } + } + + if (!advanced && !stack_col.empty()) { + stack_col.pop_back(); + stack_pos.pop_back(); + stack_chosen_row.pop_back(); + } + } + } + + // Build the row permutation. Matched columns get their matching row; + // any leftover columns are filled in identity-fashion with the leftover rows. + m_Pr.resize(n); + std::vector<bool> col_used(n, false), row_used(n, false); + Index matched = 0; + for (Index j = 0; j < n; ++j) { + if (match_row[j] != kUnmatched) { + m_Pr.indices()(match_row[j]) = convert_index<StorageIndex>(j); + col_used[j] = true; + row_used[match_row[j]] = true; + ++matched; + } + } + Index next_col = 0; + for (Index i = 0; i < n; ++i) { + if (row_used[i]) continue; + while (next_col < n && col_used[next_col]) ++next_col; + m_Pr.indices()(i) = convert_index<StorageIndex>(next_col); + ++next_col; + } + return matched; +} + template <typename Scalar, typename StorageIndex> template <typename MatrixType_> void IncompleteLUT<Scalar, StorageIndex>::analyzePattern(const MatrixType_& amat) { - // Compute the Fill-reducing permutation + eigen_assert((amat.rows() == amat.cols()) && "The factorization should be done on a square matrix"); + // 1. Compute a static row permutation that makes the diagonal structurally + // nonzero. This is a workaround for the lack of partial pivoting in ILUT. + // For matrices that already have a nonzero diagonal, this returns the + // identity permutation and is essentially free. + computeRowMatching(amat); + + // 2. Compute the Fill-reducing permutation on the row-permuted matrix. // Since ILUT does not perform any numerical pivoting, // it is highly preferable to keep the diagonal through symmetric permutations. // To this end, let's symmetrize the pattern and perform AMD on it. - SparseMatrix<Scalar, ColMajor, StorageIndex> mat1 = amat; - SparseMatrix<Scalar, ColMajor, StorageIndex> mat2 = amat.transpose(); + SparseMatrix<Scalar, ColMajor, StorageIndex> mat1 = m_Pr * amat; + SparseMatrix<Scalar, ColMajor, StorageIndex> mat2 = mat1.transpose(); // FIXME: for a nearly symmetric pattern, mat2+mat1 is appropriate; // for a highly non-symmetric pattern, mat2*mat1 should be preferred. SparseMatrix<Scalar, ColMajor, StorageIndex> AtA = mat2 + mat1; AMDOrdering<StorageIndex> ordering; ordering(AtA, m_P); m_Pinv = m_P.inverse(); // cache the inverse permutation + // Cache the composition m_Pinv * m_Pr so _solve_impl applies a single + // permutation to the RHS instead of two. + m_PinvPr = m_Pinv * m_Pr; m_analysisIsOk = true; m_factorizationIsOk = false; m_isInitialized = true; @@ -271,10 +498,13 @@ VectorI ju(n); // column position of the values in u -- maximum size is n VectorI jr(n); // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1 - // Apply the fill-reducing permutation + // Apply the static row permutation (from analyzePattern), then the + // fill-reducing symmetric permutation. eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); + SparseMatrix<Scalar, RowMajor, StorageIndex> row_permuted_mat = m_Pr * amat; SparseMatrix<Scalar, RowMajor, StorageIndex> mat; - mat = amat.twistedBy(m_Pinv); + mat = row_permuted_mat.twistedBy(m_Pinv); + Index zero_pivots = 0; // Initialization jr.fill(-1); @@ -415,7 +645,10 @@ // store the diagonal element // apply a shifting rule to avoid zero pivots (we are doing an incomplete factorization) - if (u(ii) == Scalar(0)) u(ii) = sqrt(m_droptol) * rownorm; + if (u(ii) == Scalar(0)) { + u(ii) = sqrt(m_droptol) * rownorm; + ++zero_pivots; + } m_lu.insertBackByOuterInnerUnordered(ii, ii) = u(ii); // sort the U-part of the row @@ -441,7 +674,11 @@ m_lu.makeCompressed(); m_factorizationIsOk = true; - m_info = Success; + // If we had to shift any zero pivot, the factorization is not faithful to + // the input matrix and the resulting preconditioner may be useless. + // Report this to the caller via NumericalIssue rather than silently + // returning Success. + m_info = (zero_pivots == 0) ? Success : NumericalIssue; } } // end namespace Eigen
diff --git a/test/incomplete_LUT.cpp b/test/incomplete_LUT.cpp index 7213ecc..b9c1679 100644 --- a/test/incomplete_LUT.cpp +++ b/test/incomplete_LUT.cpp
@@ -78,6 +78,121 @@ VERIFY_IS_APPROX(expectedMatU, matU); } +// https://gitlab.com/libeigen/eigen/-/issues/2626 +// IncompleteLUT must apply a static row permutation so that matrices whose +// natural diagonal has zeros (but admit a full bipartite matching on the +// sparsity pattern) can still be factorized. Without it, ILUT silently +// produces a useless preconditioner. +template <typename T> +void test_zero_diagonal_2626() { + typedef Eigen::SparseMatrix<T> SparseMatrixT; + typedef Eigen::Matrix<T, Eigen::Dynamic, 1> Vector; + + const int n = 50; + // 1D Laplacian: SPD, fully nonzero diagonal. + std::vector<Eigen::Triplet<T>> triplets; + for (int i = 0; i < n; ++i) { + triplets.emplace_back(i, i, T(2)); + if (i > 0) triplets.emplace_back(i, i - 1, T(-1)); + if (i + 1 < n) triplets.emplace_back(i, i + 1, T(-1)); + } + SparseMatrixT base(n, n); + base.setFromTriplets(triplets.begin(), triplets.end()); + + // Cyclic-shift row permutation by 2: separates the natural diagonal from + // the tridiagonal nonzero band, so all diagonal entries of (Pshift * base) + // are zero. A valid row matching must find the inverse shift. + Eigen::PermutationMatrix<Eigen::Dynamic> Pshift(n); + for (int i = 0; i < n; ++i) Pshift.indices()(i) = (i + 2) % n; + SparseMatrixT A = Pshift * base; + + // Verify the test setup: every diagonal is zero. + int nz_diag = 0; + for (int i = 0; i < n; ++i) + if (A.coeff(i, i) != T(0)) ++nz_diag; + VERIFY(nz_diag == 0); + + // BiCGSTAB + IncompleteLUT must converge to a reasonable residual. + Vector b = Vector::Random(n); + Eigen::BiCGSTAB<SparseMatrixT, Eigen::IncompleteLUT<T>> solver; + solver.setTolerance(typename Eigen::NumTraits<T>::Real(16) * Eigen::NumTraits<T>::epsilon()); + solver.compute(A); + VERIFY(solver.preconditioner().info() == Eigen::Success); + Vector x = solver.solve(b); + VERIFY(solver.info() == Eigen::Success); + Vector residual = b - A * x; + // Solver was set to tol = 16*eps; allow some slack for the residual check. + typename Eigen::NumTraits<T>::Real residual_bound = + typename Eigen::NumTraits<T>::Real(1024) * Eigen::NumTraits<T>::epsilon() * b.norm(); + VERIFY(residual.norm() < residual_bound); +} + +// A structurally singular matrix (empty row) cannot be made diagonal-nonzero +// by any row permutation; the factorization must report NumericalIssue. +// This covers the rownorm == 0 early-return path. +template <typename T> +void test_structurally_singular() { + typedef Eigen::SparseMatrix<T> SparseMatrixT; + std::vector<Eigen::Triplet<T>> triplets; + triplets.emplace_back(0, 0, T(2)); + triplets.emplace_back(0, 1, T(1)); + triplets.emplace_back(1, 0, T(1)); + triplets.emplace_back(1, 1, T(2)); + // row 2 is intentionally empty + triplets.emplace_back(3, 0, T(1)); + triplets.emplace_back(3, 3, T(2)); + SparseMatrixT A(4, 4); + A.setFromTriplets(triplets.begin(), triplets.end()); + + Eigen::IncompleteLUT<T> ilut; + ilut.compute(A); + VERIFY(ilut.info() == Eigen::NumericalIssue); +} + +// A matrix where every row is structurally non-empty (so rownorm != 0) but +// no full bipartite matching exists, forcing at least one pivot to be shifted. +// Rows 0 and 1 only have entries in column 0, so columns 1 and 2 cannot both +// be matched. This exercises the shifted-pivot path that flips info() to +// NumericalIssue. +template <typename T> +void test_zero_pivot_numerical_issue() { + typedef Eigen::SparseMatrix<T> SparseMatrixT; + SparseMatrixT A(3, 3); + std::vector<Eigen::Triplet<T>> triplets; + triplets.emplace_back(0, 0, T(1)); + triplets.emplace_back(1, 0, T(2)); // row 1 competes with row 0 for column 0 + triplets.emplace_back(2, 2, T(3)); + A.setFromTriplets(triplets.begin(), triplets.end()); + + Eigen::IncompleteLUT<T> ilut; + ilut.compute(A); + VERIFY(ilut.info() == Eigen::NumericalIssue); +} + +// analyzePattern() must depend only on the stored sparsity pattern and not on +// numerical values. Otherwise, the two-step API contract breaks: the same +// analysis would no longer be reusable for any matrix sharing this stored +// pattern. Repro: analyze a pattern with all-zero placeholder values, then +// factorize a numerically nonzero matrix sharing that pattern. +template <typename T> +void test_pattern_value_separation() { + typedef Eigen::SparseMatrix<T> SparseMatrixT; + SparseMatrixT pattern(2, 2); + pattern.insert(0, 1) = T(0); + pattern.insert(1, 0) = T(0); + pattern.makeCompressed(); + + SparseMatrixT A(2, 2); + A.insert(0, 1) = T(1); + A.insert(1, 0) = T(1); + A.makeCompressed(); + + Eigen::IncompleteLUT<T> ilut; + ilut.analyzePattern(pattern); + ilut.factorize(A); + VERIFY(ilut.info() == Eigen::Success); +} + EIGEN_DECLARE_TEST(incomplete_LUT) { CALL_SUBTEST_1((test_incompleteLUT_T<double, int>())); CALL_SUBTEST_1((test_incompleteLUT_T<float, int>())); @@ -86,4 +201,10 @@ CALL_SUBTEST_4(test_extract_LU<double>()); CALL_SUBTEST_4(test_extract_LU<float>()); + + CALL_SUBTEST_5(test_zero_diagonal_2626<double>()); + CALL_SUBTEST_5(test_zero_diagonal_2626<float>()); + CALL_SUBTEST_5(test_structurally_singular<double>()); + CALL_SUBTEST_5(test_zero_pivot_numerical_issue<double>()); + CALL_SUBTEST_5(test_pattern_value_separation<double>()); }