| // This file is part of Eigen, a lightweight C++ template library |
| // for linear algebra. |
| // |
| // This Source Code Form is subject to the terms of the Mozilla |
| // Public License v. 2.0. If a copy of the MPL was not distributed |
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #ifndef EIGEN_SPARSITY_PATTERN_REF_H |
| #define EIGEN_SPARSITY_PATTERN_REF_H |
| |
| // IWYU pragma: private |
| #include "./InternalHeaderCheck.h" |
| |
| namespace Eigen { |
| namespace internal { |
| |
| /** \internal |
| * \ingroup SparseCore_Module |
| * |
| * Non-owning view of a column-major sparsity pattern. The pattern is encoded |
| * as CSC index arrays: |
| * - \c outer (size \c outerSize+1 when compressed) gives the start of each |
| * column's row-index range in \c inner. |
| * - \c inner holds the row indices. |
| * - \c innerNonZero is non-null only for an uncompressed source: in that |
| * case column \c j's row indices live in |
| * [outer[j], outer[j] + innerNonZero[j]) instead of [outer[j], outer[j+1]). |
| * |
| * Use the helper \ref nonZeros to iterate uniformly over both layouts: |
| * \code |
| * const Index nz = pattern.nonZeros(j); |
| * for (Index k = pattern.outer[j], end = k + nz; k < end; ++k) { |
| * Index row = pattern.inner[k]; |
| * } |
| * \endcode |
| * |
| * Construct via \ref make_col_major_pattern_ref, which shares the source |
| * storage when possible (already column-major SparseMatrix) and otherwise |
| * materializes the pattern into caller-supplied scratch buffers without ever |
| * reading the source's scalar values. |
| */ |
| template <typename StorageIndex> |
| struct SparsityPatternRef { |
| const StorageIndex* outer = nullptr; |
| const StorageIndex* inner = nullptr; |
| // null ⇒ source is compressed: column j is [outer[j], outer[j+1]). |
| // otherwise: column j is [outer[j], outer[j] + innerNonZero[j]). |
| const StorageIndex* innerNonZero = nullptr; |
| Index outerSize = 0; // number of columns |
| Index innerSize = 0; // number of rows |
| |
| EIGEN_DEVICE_FUNC inline bool isCompressed() const { return innerNonZero == nullptr; } |
| |
| EIGEN_DEVICE_FUNC inline Index nonZeros(Index j) const { |
| return isCompressed() ? Index(outer[j + 1] - outer[j]) : Index(innerNonZero[j]); |
| } |
| }; |
| |
| /** \internal |
| * Build a column-major sparsity-pattern view of \c amat without copying any |
| * scalar values. The returned view aliases \c amat's index storage when |
| * \c amat is already a column-major \c SparseMatrix; otherwise the pattern is |
| * materialized into the caller-supplied \c outer_buf / \c inner_buf and the |
| * returned view points into them. |
| * |
| * The buffers must outlive every use of the returned view. |
| * |
| * Specialization for column-major \c SparseMatrix (the no-copy fast path). |
| */ |
| template <typename Scalar, typename StorageIndex> |
| SparsityPatternRef<StorageIndex> make_col_major_pattern_ref(const SparseMatrix<Scalar, ColMajor, StorageIndex>& amat, |
| Matrix<StorageIndex, Dynamic, 1>& /*outer_buf*/, |
| Matrix<StorageIndex, Dynamic, 1>& /*inner_buf*/) { |
| SparsityPatternRef<StorageIndex> p; |
| p.outer = amat.outerIndexPtr(); |
| p.inner = amat.innerIndexPtr(); |
| p.innerNonZero = amat.isCompressed() ? nullptr : amat.innerNonZeroPtr(); |
| p.outerSize = amat.cols(); |
| p.innerSize = amat.rows(); |
| return p; |
| } |
| |
| /** \internal |
| * Generic fallback for any other sparse expression (row-major, products, |
| * permutations, etc.). The pattern is materialized into \c outer_buf and |
| * \c inner_buf via a value-free counting transpose driven by the expression |
| * evaluator. Some evaluators may materialize internally. |
| */ |
| template <typename Derived> |
| SparsityPatternRef<typename Derived::StorageIndex> make_col_major_pattern_ref( |
| const SparseMatrixBase<Derived>& amat_base, Matrix<typename Derived::StorageIndex, Dynamic, 1>& outer_buf, |
| Matrix<typename Derived::StorageIndex, Dynamic, 1>& inner_buf) { |
| typedef typename Derived::StorageIndex StorageIndex; |
| const Derived& amat = amat_base.derived(); |
| internal::evaluator<Derived> amat_eval(amat); |
| const Index n_cols = amat.cols(); |
| const Index n_outer = amat.outerSize(); |
| |
| outer_buf.setZero(n_cols + 1); |
| for (Index i = 0; i < n_outer; ++i) |
| for (typename internal::evaluator<Derived>::InnerIterator it(amat_eval, i); it; ++it) ++outer_buf(it.col() + 1); |
| for (Index j = 0; j < n_cols; ++j) outer_buf(j + 1) += outer_buf(j); |
| inner_buf.resize(outer_buf(n_cols)); |
| Matrix<StorageIndex, Dynamic, 1> head = outer_buf.head(n_cols); |
| for (Index i = 0; i < n_outer; ++i) |
| for (typename internal::evaluator<Derived>::InnerIterator it(amat_eval, i); it; ++it) |
| inner_buf(head(it.col())++) = convert_index<StorageIndex>(it.row()); |
| |
| SparsityPatternRef<StorageIndex> p; |
| p.outer = outer_buf.data(); |
| p.inner = inner_buf.data(); |
| p.innerNonZero = nullptr; |
| p.outerSize = n_cols; |
| p.innerSize = amat.rows(); |
| return p; |
| } |
| |
| /** \internal |
| * Materialize a column-major sparsity pattern view as a compressed |
| * \c SparseMatrix with a 1-byte placeholder \c Scalar. Values are filled with |
| * a fixed nonzero sentinel — downstream pattern consumers (\c transpose(), |
| * sparse \c operator+, \c selfadjointView expansion) read coefficients even |
| * when the algorithm only cares about the pattern, so leaving them |
| * uninitialized would be UB. |
| * |
| * Intended for fill-reducing ordering algorithms (AMD, COLAMD) that read only |
| * the pattern. \b Precondition: \c out's storage must not alias \c pat — |
| * \c resize / \c resizeNonZeros may reallocate and invalidate the view. |
| * |
| * If \c row_perm is non-null, each inner row index is remapped through it |
| * (entry \c (i, j) of \c pat becomes \c (row_perm[i], j) in the result), |
| * implementing a left multiplication by a row permutation without copying any |
| * scalar values from the source. |
| * |
| * The result preserves SparseMatrix's invariant that inner indices are sorted |
| * within each column. |
| */ |
| template <typename StorageIndex> |
| void materialize_col_major_pattern(const SparsityPatternRef<StorageIndex>& pat, const StorageIndex* row_perm, |
| SparseMatrix<signed char, ColMajor, StorageIndex>& out) { |
| const Index n_outer = pat.outerSize; |
| const Index n_inner = pat.innerSize; |
| out.resize(n_inner, n_outer); |
| |
| Index total = 0; |
| if (pat.isCompressed()) { |
| total = pat.outer[n_outer]; |
| } else { |
| for (Index j = 0; j < n_outer; ++j) total += pat.nonZeros(j); |
| } |
| out.resizeNonZeros(total); |
| |
| StorageIndex* o_outer = out.outerIndexPtr(); |
| StorageIndex* o_inner = out.innerIndexPtr(); |
| if (pat.isCompressed()) { |
| // The source's outer array is already in CSC form, and SparseMatrix's |
| // outerIndexPtr() always starts at zero (as does the buffer-built fallback |
| // in make_col_major_pattern_ref), so the result's outer indices are a |
| // verbatim copy. For the no-permutation case the inner indices are also a |
| // straight bulk copy. Both reduce to single std::copy / memcpy calls. |
| std::copy(pat.outer, pat.outer + n_outer + 1, o_outer); |
| if (row_perm == nullptr) { |
| std::copy(pat.inner, pat.inner + total, o_inner); |
| } else { |
| // Remap-and-sort per column, reading from the source's inner array |
| // directly to avoid the write-amplification of bulk-copying then |
| // overwriting in place. |
| for (Index j = 0; j < n_outer; ++j) { |
| const StorageIndex* src = pat.inner + o_outer[j]; |
| StorageIndex* dst = o_inner + o_outer[j]; |
| const Index nz = o_outer[j + 1] - o_outer[j]; |
| for (Index k = 0; k < nz; ++k) dst[k] = row_perm[src[k]]; |
| std::sort(dst, dst + nz); |
| } |
| } |
| } else { |
| o_outer[0] = 0; |
| for (Index j = 0; j < n_outer; ++j) { |
| const Index nz = pat.nonZeros(j); |
| o_outer[j + 1] = convert_index<StorageIndex>(o_outer[j] + nz); |
| const StorageIndex* src = pat.inner + pat.outer[j]; |
| StorageIndex* dst = o_inner + o_outer[j]; |
| if (row_perm == nullptr) { |
| std::copy(src, src + nz, dst); |
| } else { |
| for (Index k = 0; k < nz; ++k) dst[k] = row_perm[src[k]]; |
| std::sort(dst, dst + nz); |
| } |
| } |
| } |
| // Fill values with a fixed nonzero sentinel so downstream consumers that |
| // read coefficients (transpose, sparse +, selfadjointView expansion) do not |
| // observe uninitialized memory. |
| std::fill_n(out.valuePtr(), total, static_cast<signed char>(1)); |
| } |
| |
| /** \internal Convenience overload — identity row permutation. */ |
| template <typename StorageIndex> |
| void materialize_col_major_pattern(const SparsityPatternRef<StorageIndex>& pat, |
| SparseMatrix<signed char, ColMajor, StorageIndex>& out) { |
| materialize_col_major_pattern(pat, static_cast<const StorageIndex*>(nullptr), out); |
| } |
| |
| // The two materializers below — materialize_at_plus_a_pattern and |
| // materialize_selfadjoint_pattern<UpLo> — share a common skeleton (counting |
| // sort to build A^T, then two passes of linear two-way merge to count and |
| // write column sizes). The two-way merge bodies are duplicated rather than |
| // extracted into shared helpers because every attempt to factor them out |
| // regressed performance by 2–5% on medium matrices: lambda-predicate |
| // unification (because the lambda call wasn't elided across the inner loop) |
| // and EIGEN_STRONG_INLINE function-template helpers (the compiler inlined |
| // them but the function-template boundary still produced measurably worse |
| // codegen than the explicit inline loops, presumably from differences in |
| // register allocation or vectorization heuristics around the merge's |
| // local-variable life-ranges). The merge logic is short enough that the |
| // duplication is a small maintenance burden in exchange for stable codegen |
| // on the AMD hot path. If a future compiler change makes the abstraction |
| // free, this can be revisited. |
| |
| /** \internal |
| * Materialize the symmetric pattern \c pattern(A + A^T) of a square column-major |
| * sparsity pattern \a A as a compressed \c SparseMatrix with a 1-byte placeholder |
| * \c Scalar. Values are filled with a fixed nonzero sentinel. |
| * |
| * Avoids the \c C = A.transpose(); symmat = C + A; chain previously performed |
| * by Eigen's general-purpose evaluators. Both A's columns and A^T's columns |
| * (built by counting sort) are sorted, so per-column union is a linear two-way |
| * merge. |
| * |
| * Intended for fill-reducing ordering algorithms (AMD) that read only the |
| * pattern. \b Precondition: \a A is square (\c outerSize == innerSize) and |
| * \a out's storage must not alias \a A. |
| */ |
| template <typename StorageIndex> |
| void materialize_at_plus_a_pattern(const SparsityPatternRef<StorageIndex>& A, |
| SparseMatrix<signed char, ColMajor, StorageIndex>& out) { |
| const Index n = A.outerSize; |
| eigen_assert(n == A.innerSize && "materialize_at_plus_a_pattern: A must be square"); |
| |
| // 1. Build A^T's column starts (= A's row counts) via counting sort. |
| Matrix<StorageIndex, Dynamic, 1> AT_p(n + 1); |
| AT_p.setZero(); |
| for (Index j = 0; j < n; ++j) { |
| const Index nz = A.nonZeros(j); |
| const StorageIndex* col = A.inner + A.outer[j]; |
| for (Index k = 0; k < nz; ++k) ++AT_p(col[k] + 1); |
| } |
| for (Index i = 0; i < n; ++i) AT_p(i + 1) += AT_p(i); |
| |
| // 2. Place A^T's inner indices. Visiting A column-by-column in increasing j |
| // means each A^T column accumulates its inner indices in increasing order, |
| // so AT_i is sorted within each column. \a head is a deliberate deep copy |
| // of \c AT_p.head(n) — it is incremented as a per-column write cursor while |
| // \c AT_p remains the immutable column-start array used in passes 3 and 4. |
| const StorageIndex a_nnz = AT_p(n); |
| Matrix<StorageIndex, Dynamic, 1> AT_i(a_nnz); |
| Matrix<StorageIndex, Dynamic, 1> head = AT_p.head(n); |
| for (Index j = 0; j < n; ++j) { |
| const Index nz = A.nonZeros(j); |
| const StorageIndex* col = A.inner + A.outer[j]; |
| for (Index k = 0; k < nz; ++k) AT_i(head(col[k])++) = convert_index<StorageIndex>(j); |
| } |
| |
| // 3. First merge pass: count column sizes of pattern(A + A^T) by two-way |
| // merge of A's and A^T's sorted columns. |
| out.resize(n, n); |
| StorageIndex* out_p = out.outerIndexPtr(); |
| out_p[0] = 0; |
| for (Index j = 0; j < n; ++j) { |
| const StorageIndex* a_col = A.inner + A.outer[j]; |
| const Index a_nz = A.nonZeros(j); |
| const StorageIndex* at_col = AT_i.data() + AT_p(j); |
| const Index at_nz = AT_p(j + 1) - AT_p(j); |
| Index ia = 0, it = 0, count = 0; |
| while (ia < a_nz && it < at_nz) { |
| const StorageIndex va = a_col[ia], vt = at_col[it]; |
| if (va < vt) { |
| ++ia; |
| } else if (va > vt) { |
| ++it; |
| } else { |
| ++ia; |
| ++it; |
| } |
| ++count; |
| } |
| count += (a_nz - ia) + (at_nz - it); |
| out_p[j + 1] = out_p[j] + convert_index<StorageIndex>(count); |
| } |
| |
| const StorageIndex total = out_p[n]; |
| out.resizeNonZeros(total); |
| |
| // 4. Second merge pass: write merged inner indices. |
| StorageIndex* out_i = out.innerIndexPtr(); |
| for (Index j = 0; j < n; ++j) { |
| const StorageIndex* a_col = A.inner + A.outer[j]; |
| const Index a_nz = A.nonZeros(j); |
| const StorageIndex* at_col = AT_i.data() + AT_p(j); |
| const Index at_nz = AT_p(j + 1) - AT_p(j); |
| StorageIndex* dst = out_i + out_p[j]; |
| Index ia = 0, it = 0; |
| while (ia < a_nz && it < at_nz) { |
| const StorageIndex va = a_col[ia], vt = at_col[it]; |
| if (va < vt) { |
| *dst++ = va; |
| ++ia; |
| } else if (va > vt) { |
| *dst++ = vt; |
| ++it; |
| } else { |
| *dst++ = va; |
| ++ia; |
| ++it; |
| } |
| } |
| while (ia < a_nz) *dst++ = a_col[ia++]; |
| while (it < at_nz) *dst++ = at_col[it++]; |
| } |
| std::fill_n(out.valuePtr(), total, static_cast<signed char>(1)); |
| } |
| |
| /** \internal |
| * Materialize the full symmetric pattern of a \c SparseSelfAdjointView<UpLo> |
| * given the underlying matrix's pattern view \a A. \c UpLo must be \c Lower or |
| * \c Upper; entries of \a A outside that triangle (and outside the diagonal) |
| * are ignored, matching \c selfadjointView semantics. |
| * |
| * Output is a compressed column-major \c SparseMatrix<signed char> with |
| * placeholder values, suitable for the AMD ordering pipeline. \b Precondition: |
| * \a A is square; \a out's storage must not alias \a A. |
| */ |
| template <unsigned int UpLo, typename StorageIndex> |
| void materialize_selfadjoint_pattern(const SparsityPatternRef<StorageIndex>& A, |
| SparseMatrix<signed char, ColMajor, StorageIndex>& out) { |
| static_assert(UpLo == static_cast<unsigned>(Lower) || UpLo == static_cast<unsigned>(Upper), |
| "UpLo must be Lower or Upper"); |
| const Index n = A.outerSize; |
| eigen_assert(n == A.innerSize && "materialize_selfadjoint_pattern: A must be square"); |
| constexpr bool IsLower = (UpLo == static_cast<unsigned>(Lower)); |
| |
| // 1. Count filtered row occurrences (A^T's column lengths). For UpLo == Lower |
| // we keep entries with row >= col; for Upper, row <= col. The diagonal is |
| // kept in both cases. |
| Matrix<StorageIndex, Dynamic, 1> AT_p(n + 1); |
| AT_p.setZero(); |
| for (Index j = 0; j < n; ++j) { |
| const Index nz = A.nonZeros(j); |
| const StorageIndex* col = A.inner + A.outer[j]; |
| for (Index k = 0; k < nz; ++k) { |
| const StorageIndex r = col[k]; |
| const bool keep = IsLower ? (Index(r) >= j) : (Index(r) <= j); |
| if (keep) ++AT_p(r + 1); |
| } |
| } |
| for (Index i = 0; i < n; ++i) AT_p(i + 1) += AT_p(i); |
| |
| // 2. Place A^T's inner indices. The kept entries of column j are visited in |
| // sorted row-order, so each A^T column ends up sorted. \a head is a deep |
| // copy used as a per-column write cursor while \c AT_p stays untouched. |
| const StorageIndex a_nnz = AT_p(n); |
| Matrix<StorageIndex, Dynamic, 1> AT_i(a_nnz); |
| Matrix<StorageIndex, Dynamic, 1> head = AT_p.head(n); |
| for (Index j = 0; j < n; ++j) { |
| const Index nz = A.nonZeros(j); |
| const StorageIndex* col = A.inner + A.outer[j]; |
| for (Index k = 0; k < nz; ++k) { |
| const StorageIndex r = col[k]; |
| const bool keep = IsLower ? (Index(r) >= j) : (Index(r) <= j); |
| if (keep) AT_i(head(r)++) = convert_index<StorageIndex>(j); |
| } |
| } |
| |
| // Cache the kept-range bound for each column so passes 3 and 4 don't both |
| // pay the binary search. For UpLo == Lower the kept range starts at |
| // \c lower_bound(j) and runs to the end of the column; for Upper it starts |
| // at 0 and ends at \c upper_bound(j). Storing only the side that requires a |
| // search keeps scratch to one Index per column. |
| Matrix<Index, Dynamic, 1> a_split(n); |
| for (Index j = 0; j < n; ++j) { |
| const StorageIndex* a_col = A.inner + A.outer[j]; |
| const Index a_nz = A.nonZeros(j); |
| if (IsLower) { |
| a_split(j) = std::lower_bound(a_col, a_col + a_nz, StorageIndex(j)) - a_col; |
| } else { |
| a_split(j) = std::upper_bound(a_col, a_col + a_nz, StorageIndex(j)) - a_col; |
| } |
| } |
| |
| // 3. First merge pass: count column sizes of pattern(A + A^T) over the |
| // filtered subrange of each column. |
| out.resize(n, n); |
| StorageIndex* out_p = out.outerIndexPtr(); |
| out_p[0] = 0; |
| for (Index j = 0; j < n; ++j) { |
| const StorageIndex* a_col = A.inner + A.outer[j]; |
| const Index a_nz = A.nonZeros(j); |
| const StorageIndex* a_kept = IsLower ? a_col + a_split(j) : a_col; |
| const Index a_kept_nz = IsLower ? (a_nz - a_split(j)) : a_split(j); |
| const StorageIndex* at_col = AT_i.data() + AT_p(j); |
| const Index at_nz = AT_p(j + 1) - AT_p(j); |
| Index ia = 0, it = 0, count = 0; |
| while (ia < a_kept_nz && it < at_nz) { |
| const StorageIndex va = a_kept[ia], vt = at_col[it]; |
| if (va < vt) { |
| ++ia; |
| } else if (va > vt) { |
| ++it; |
| } else { |
| ++ia; |
| ++it; |
| } |
| ++count; |
| } |
| count += (a_kept_nz - ia) + (at_nz - it); |
| out_p[j + 1] = out_p[j] + convert_index<StorageIndex>(count); |
| } |
| |
| const StorageIndex total = out_p[n]; |
| out.resizeNonZeros(total); |
| |
| // 4. Second merge pass: write merged inner indices. |
| StorageIndex* out_i = out.innerIndexPtr(); |
| for (Index j = 0; j < n; ++j) { |
| const StorageIndex* a_col = A.inner + A.outer[j]; |
| const Index a_nz = A.nonZeros(j); |
| const StorageIndex* a_kept = IsLower ? a_col + a_split(j) : a_col; |
| const Index a_kept_nz = IsLower ? (a_nz - a_split(j)) : a_split(j); |
| const StorageIndex* at_col = AT_i.data() + AT_p(j); |
| const Index at_nz = AT_p(j + 1) - AT_p(j); |
| StorageIndex* dst = out_i + out_p[j]; |
| Index ia = 0, it = 0; |
| while (ia < a_kept_nz && it < at_nz) { |
| const StorageIndex va = a_kept[ia], vt = at_col[it]; |
| if (va < vt) { |
| *dst++ = va; |
| ++ia; |
| } else if (va > vt) { |
| *dst++ = vt; |
| ++it; |
| } else { |
| *dst++ = va; |
| ++ia; |
| ++it; |
| } |
| } |
| while (ia < a_kept_nz) *dst++ = a_kept[ia++]; |
| while (it < at_nz) *dst++ = at_col[it++]; |
| } |
| std::fill_n(out.valuePtr(), total, static_cast<signed char>(1)); |
| } |
| |
| } // namespace internal |
| } // namespace Eigen |
| |
| #endif // EIGEN_SPARSITY_PATTERN_REF_H |