blob: ecc1de9abf17e8fa92cb79b824d244d9c9b4ba3d [file]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// SPDX-License-Identifier: MPL-2.0
#define EIGEN_USE_THREADS 1
#include "sparse.h"
// Pulls in the ThreadedSparseProduct header via the SparseCore module.
// (Eigen/Sparse -> Eigen/SparseCore conditional include under EIGEN_USE_THREADS.)
#include <Eigen/SparseCore>
template <typename SparseMatrixType, typename DenseVectorType>
void verify_threaded_spmv(Index rows, Index cols, double density) {
typedef typename SparseMatrixType::Scalar Scalar;
typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;
DenseMatrix refMat(rows, cols);
SparseMatrixType A(rows, cols);
initSparse<Scalar>(density, refMat, A);
A.makeCompressed();
DenseVectorType x_fwd = DenseVectorType::Random(cols);
DenseVectorType x_adj = DenseVectorType::Random(rows);
// Reference results via the existing dense path.
DenseVectorType y_fwd_ref = refMat * x_fwd;
DenseVectorType y_adj_ref = refMat.adjoint() * x_adj;
ThreadedSparseProduct<SparseMatrixType> op(A);
VERIFY(op.rows() == A.rows());
VERIFY(op.cols() == A.cols());
// Overwriting forms.
{
DenseVectorType y(rows);
y.setRandom();
op.apply(x_fwd, y);
VERIFY_IS_APPROX(y, y_fwd_ref);
}
{
DenseVectorType y(cols);
y.setRandom();
op.applyAdjoint(x_adj, y);
VERIFY_IS_APPROX(y, y_adj_ref);
}
// Accumulating forms: y += alpha * A * x, with both alpha = 1 and a
// non-trivial alpha.
for (Scalar alpha : {Scalar(1), Scalar(internal::random<Scalar>())}) {
{
DenseVectorType y0 = DenseVectorType::Random(rows);
DenseVectorType y_ref = y0 + alpha * y_fwd_ref;
DenseVectorType y = y0;
op.applyAddTo(x_fwd, y, alpha);
VERIFY_IS_APPROX(y, y_ref);
}
{
DenseVectorType y0 = DenseVectorType::Random(cols);
DenseVectorType y_ref = y0 + alpha * y_adj_ref;
DenseVectorType y = y0;
op.applyAdjointAddTo(x_adj, y, alpha);
VERIFY_IS_APPROX(y, y_ref);
}
}
// Calling adjoint must have materialized the mirror; calling it again must
// not rebuild (no easy assertion for "not rebuilt", but verify the path is
// consistent across repeated calls).
VERIFY(op.hasMirror());
{
DenseVectorType y(cols);
op.applyAdjoint(x_adj, y);
VERIFY_IS_APPROX(y, y_adj_ref);
}
// Re-running analyzePattern must invalidate the mirror.
op.analyzePattern(A);
VERIFY(!op.hasMirror());
{
DenseVectorType y(cols);
op.applyAdjoint(x_adj, y);
VERIFY_IS_APPROX(y, y_adj_ref);
}
// Coefficient-only invalidation: mutate the bound matrix's stored values
// (same sparsity pattern), call refreshValues(), and verify the next
// mirror-using direction picks up the new coefficients. The mirror is
// materialized lazily by the direction that needs the transposed view --
// forward for ColMajor A, adjoint for RowMajor A -- so call both to warm
// it regardless of storage order.
{
DenseVectorType y_fwd(rows);
DenseVectorType y_adj(cols);
op.apply(x_fwd, y_fwd);
op.applyAdjoint(x_adj, y_adj);
VERIFY(op.hasMirror());
}
for (Index k = 0; k < A.nonZeros(); ++k) A.valuePtr()[k] *= Scalar(2);
refMat *= Scalar(2);
DenseVectorType y_fwd_ref_updated = refMat * x_fwd;
DenseVectorType y_adj_ref_updated = refMat.adjoint() * x_adj;
op.refreshValues();
VERIFY(!op.hasMirror());
{
DenseVectorType y(rows);
op.apply(x_fwd, y);
VERIFY_IS_APPROX(y, y_fwd_ref_updated);
}
{
DenseVectorType y(cols);
op.applyAdjoint(x_adj, y);
VERIFY_IS_APPROX(y, y_adj_ref_updated);
}
}
// IEEE-754 propagation through the ColMajor SpMV OMP scatter/reduce path
// (Eigen/src/SparseCore/SparseDenseProduct.h). With OpenMP enabled, the
// kernel sums per-thread scratch buffers into y; NaN/Inf in any thread's
// contribution must propagate to the reduced sum. Without OpenMP this still
// exercises the serial scatter, where the same propagation must hold.
template <typename Scalar>
void verify_threaded_scatter_ieee754() {
typedef SparseMatrix<Scalar, ColMajor> SpMat;
typedef Matrix<Scalar, Dynamic, 1> Vec;
typedef typename NumTraits<Scalar>::Real Real;
// Pin the OMP team size so the scatter/reduce gate (nnz >= threads * m)
// is deterministic across runs.
const int prev_threads = Eigen::nbThreads();
Eigen::setNbThreads(4);
// 300x300 with diagonal + (i+j)%4==0 entries -> ~22600 nnz, above the
// 20000-nnz threshold and well above threads*m = 4*300 for the gate.
const Index n = 300;
std::vector<Triplet<Scalar> > triplets;
triplets.reserve(static_cast<std::size_t>(n) * (n / 4 + 1));
// Simple deterministic LCG so we don't depend on <random> being included
// transitively. Modulo-bounded so we don't rely on a specific integer width.
unsigned rng = 0xC0FFEEu;
for (int j = 0; j < int(n); ++j) {
for (int i = 0; i < int(n); ++i) {
if (i == j || (i + j) % 4 == 0) {
rng = (rng * 1664525u + 1013904223u) & 0xFFFFu;
const Real v = Real(0.1) + Real(0.9) * Real(rng) / Real(0xFFFF);
triplets.push_back(Triplet<Scalar>(i, j, Scalar(v)));
}
}
}
SpMat A(n, n);
A.setFromTriplets(triplets.begin(), triplets.end());
A.makeCompressed();
VERIFY(A.nonZeros() > 20000);
// Inject NaN/Inf into three rows of column 0 by overwriting valuePtr().
// Pick rows that are guaranteed in column 0's pattern: row 0 (diagonal),
// row 4 ((4+0)%4==0), row 8 ((8+0)%4==0).
const Real nan_r = std::numeric_limits<Real>::quiet_NaN();
const Real inf_r = std::numeric_limits<Real>::infinity();
const Index i_nan = 0, i_pinf = 4, i_ninf = 8;
bool set_nan = false, set_pinf = false, set_ninf = false;
for (Index k = A.outerIndexPtr()[0]; k < A.outerIndexPtr()[1]; ++k) {
const Index i = A.innerIndexPtr()[k];
if (i == i_nan) {
A.valuePtr()[k] = Scalar(nan_r);
set_nan = true;
} else if (i == i_pinf) {
A.valuePtr()[k] = Scalar(inf_r);
set_pinf = true;
} else if (i == i_ninf) {
A.valuePtr()[k] = Scalar(-inf_r);
set_ninf = true;
}
}
VERIFY(set_nan && set_pinf && set_ninf);
Vec x = Vec::Random(n);
// Ensure x[0] is finite and strictly positive (real part), so the injected
// ±Inf contributions don't cancel against a zero-multiplier or flip sign.
x[0] = Scalar(1);
Vec y = A * x;
VERIFY((numext::isnan)(numext::real(y[i_nan])));
VERIFY((numext::isinf)(numext::real(y[i_pinf])) && numext::real(y[i_pinf]) > Real(0));
VERIFY((numext::isinf)(numext::real(y[i_ninf])) && numext::real(y[i_ninf]) < Real(0));
Eigen::setNbThreads(prev_threads);
}
template <typename Scalar>
void run_grid() {
typedef Matrix<Scalar, Dynamic, 1> Vec;
// Cover both storage orders, square and rectangular, small (serial path,
// nnz < threshold) and larger (threaded path).
for (int order = 0; order < 2; ++order) {
if (order == 0) {
verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(8, 8, 0.5);
verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(13, 21, 0.3);
verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(500, 500, 0.15);
verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(800, 400, 0.10);
// Large enough that nnz > kThreadingThreshold (20000) to exercise the
// multi-threaded dispatch path.
verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(1500, 1500, 0.02);
} else {
verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(8, 8, 0.5);
verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(21, 13, 0.3);
verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(500, 500, 0.15);
verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(400, 800, 0.10);
verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(1500, 1500, 0.02);
}
}
}
EIGEN_DECLARE_TEST(sparse_threaded_product) {
for (int i = 0; i < g_repeat; ++i) {
CALL_SUBTEST_1(run_grid<float>());
CALL_SUBTEST_2(run_grid<double>());
CALL_SUBTEST_3(run_grid<std::complex<double> >());
CALL_SUBTEST_4(verify_threaded_scatter_ieee754<double>());
CALL_SUBTEST_4(verify_threaded_scatter_ieee754<float>());
}
}