test/sparse_threaded_product.cpp - mirror - Git at Google

 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 // SPDX-License-Identifier: MPL-2.0

 #define EIGEN_USE_THREADS 1
 #include "sparse.h"

 // Pulls in the ThreadedSparseProduct header via the SparseCore module.
 // (Eigen/Sparse -> Eigen/SparseCore conditional include under EIGEN_USE_THREADS.)
 #include <Eigen/SparseCore>

 template <typename SparseMatrixType, typename DenseVectorType>
 void verify_threaded_spmv(Index rows, Index cols, double density) {
   typedef typename SparseMatrixType::Scalar Scalar;
   typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;

   DenseMatrix refMat(rows, cols);
   SparseMatrixType A(rows, cols);
   initSparse<Scalar>(density, refMat, A);
   A.makeCompressed();

   DenseVectorType x_fwd = DenseVectorType::Random(cols);
   DenseVectorType x_adj = DenseVectorType::Random(rows);

   // Reference results via the existing dense path.
   DenseVectorType y_fwd_ref = refMat * x_fwd;
   DenseVectorType y_adj_ref = refMat.adjoint() * x_adj;

   ThreadedSparseProduct<SparseMatrixType> op(A);
   VERIFY(op.rows() == A.rows());
   VERIFY(op.cols() == A.cols());

   // Overwriting forms.
   {
     DenseVectorType y(rows);
     y.setRandom();
     op.apply(x_fwd, y);
     VERIFY_IS_APPROX(y, y_fwd_ref);
   }
   {
     DenseVectorType y(cols);
     y.setRandom();
     op.applyAdjoint(x_adj, y);
     VERIFY_IS_APPROX(y, y_adj_ref);
   }

   // Accumulating forms: y += alpha * A * x, with both alpha = 1 and a
   // non-trivial alpha.
   for (Scalar alpha : {Scalar(1), Scalar(internal::random<Scalar>())}) {
     {
       DenseVectorType y0 = DenseVectorType::Random(rows);
       DenseVectorType y_ref = y0 + alpha * y_fwd_ref;
       DenseVectorType y = y0;
       op.applyAddTo(x_fwd, y, alpha);
       VERIFY_IS_APPROX(y, y_ref);
     }
     {
       DenseVectorType y0 = DenseVectorType::Random(cols);
       DenseVectorType y_ref = y0 + alpha * y_adj_ref;
       DenseVectorType y = y0;
       op.applyAdjointAddTo(x_adj, y, alpha);
       VERIFY_IS_APPROX(y, y_ref);
     }
   }

   // Calling adjoint must have materialized the mirror; calling it again must
   // not rebuild (no easy assertion for "not rebuilt", but verify the path is
   // consistent across repeated calls).
   VERIFY(op.hasMirror());
   {
     DenseVectorType y(cols);
     op.applyAdjoint(x_adj, y);
     VERIFY_IS_APPROX(y, y_adj_ref);
   }

   // Re-running analyzePattern must invalidate the mirror.
   op.analyzePattern(A);
   VERIFY(!op.hasMirror());
   {
     DenseVectorType y(cols);
     op.applyAdjoint(x_adj, y);
     VERIFY_IS_APPROX(y, y_adj_ref);
   }

   // Coefficient-only invalidation: mutate the bound matrix's stored values
   // (same sparsity pattern), call refreshValues(), and verify the next
   // mirror-using direction picks up the new coefficients. The mirror is
   // materialized lazily by the direction that needs the transposed view --
   // forward for ColMajor A, adjoint for RowMajor A -- so call both to warm
   // it regardless of storage order.
   {
     DenseVectorType y_fwd(rows);
     DenseVectorType y_adj(cols);
     op.apply(x_fwd, y_fwd);
     op.applyAdjoint(x_adj, y_adj);
     VERIFY(op.hasMirror());
   }
   for (Index k = 0; k < A.nonZeros(); ++k) A.valuePtr()[k] *= Scalar(2);
   refMat *= Scalar(2);
   DenseVectorType y_fwd_ref_updated = refMat * x_fwd;
   DenseVectorType y_adj_ref_updated = refMat.adjoint() * x_adj;
   op.refreshValues();
   VERIFY(!op.hasMirror());
   {
     DenseVectorType y(rows);
     op.apply(x_fwd, y);
     VERIFY_IS_APPROX(y, y_fwd_ref_updated);
   }
   {
     DenseVectorType y(cols);
     op.applyAdjoint(x_adj, y);
     VERIFY_IS_APPROX(y, y_adj_ref_updated);
   }
 }

 // IEEE-754 propagation through the ColMajor SpMV OMP scatter/reduce path
 // (Eigen/src/SparseCore/SparseDenseProduct.h). With OpenMP enabled, the
 // kernel sums per-thread scratch buffers into y; NaN/Inf in any thread's
 // contribution must propagate to the reduced sum. Without OpenMP this still
 // exercises the serial scatter, where the same propagation must hold.
 template <typename Scalar>
 void verify_threaded_scatter_ieee754() {
   typedef SparseMatrix<Scalar, ColMajor> SpMat;
   typedef Matrix<Scalar, Dynamic, 1> Vec;
   typedef typename NumTraits<Scalar>::Real Real;

   // Pin the OMP team size so the scatter/reduce gate (nnz >= threads * m)
   // is deterministic across runs.
   const int prev_threads = Eigen::nbThreads();
   Eigen::setNbThreads(4);

   // 300x300 with diagonal + (i+j)%4==0 entries -> ~22600 nnz, above the
   // 20000-nnz threshold and well above threads*m = 4*300 for the gate.
   const Index n = 300;
   std::vector<Triplet<Scalar> > triplets;
   triplets.reserve(static_cast<std::size_t>(n) * (n / 4 + 1));
   // Simple deterministic LCG so we don't depend on <random> being included
   // transitively. Modulo-bounded so we don't rely on a specific integer width.
   unsigned rng = 0xC0FFEEu;
   for (int j = 0; j < int(n); ++j) {
     for (int i = 0; i < int(n); ++i) {
       if (i == j || (i + j) % 4 == 0) {
         rng = (rng * 1664525u + 1013904223u) & 0xFFFFu;
         const Real v = Real(0.1) + Real(0.9) * Real(rng) / Real(0xFFFF);
         triplets.push_back(Triplet<Scalar>(i, j, Scalar(v)));
       }
     }
   }
   SpMat A(n, n);
   A.setFromTriplets(triplets.begin(), triplets.end());
   A.makeCompressed();
   VERIFY(A.nonZeros() > 20000);

   // Inject NaN/Inf into three rows of column 0 by overwriting valuePtr().
   // Pick rows that are guaranteed in column 0's pattern: row 0 (diagonal),
   // row 4 ((4+0)%4==0), row 8 ((8+0)%4==0).
   const Real nan_r = std::numeric_limits<Real>::quiet_NaN();
   const Real inf_r = std::numeric_limits<Real>::infinity();
   const Index i_nan = 0, i_pinf = 4, i_ninf = 8;
   bool set_nan = false, set_pinf = false, set_ninf = false;
   for (Index k = A.outerIndexPtr()[0]; k < A.outerIndexPtr()[1]; ++k) {
     const Index i = A.innerIndexPtr()[k];
     if (i == i_nan) {
       A.valuePtr()[k] = Scalar(nan_r);
       set_nan = true;
     } else if (i == i_pinf) {
       A.valuePtr()[k] = Scalar(inf_r);
       set_pinf = true;
     } else if (i == i_ninf) {
       A.valuePtr()[k] = Scalar(-inf_r);
       set_ninf = true;
     }
   }
   VERIFY(set_nan && set_pinf && set_ninf);

   Vec x = Vec::Random(n);
   // Ensure x[0] is finite and strictly positive (real part), so the injected
   // ±Inf contributions don't cancel against a zero-multiplier or flip sign.
   x[0] = Scalar(1);

   Vec y = A * x;

   VERIFY((numext::isnan)(numext::real(y[i_nan])));
   VERIFY((numext::isinf)(numext::real(y[i_pinf])) && numext::real(y[i_pinf]) > Real(0));
   VERIFY((numext::isinf)(numext::real(y[i_ninf])) && numext::real(y[i_ninf]) < Real(0));

   Eigen::setNbThreads(prev_threads);
 }

 template <typename Scalar>
 void run_grid() {
   typedef Matrix<Scalar, Dynamic, 1> Vec;
   // Cover both storage orders, square and rectangular, small (serial path,
   // nnz < threshold) and larger (threaded path).
   for (int order = 0; order < 2; ++order) {
     if (order == 0) {
       verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(8, 8, 0.5);
       verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(13, 21, 0.3);
       verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(500, 500, 0.15);
       verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(800, 400, 0.10);
       // Large enough that nnz > kThreadingThreshold (20000) to exercise the
       // multi-threaded dispatch path.
       verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(1500, 1500, 0.02);
     } else {
       verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(8, 8, 0.5);
       verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(21, 13, 0.3);
       verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(500, 500, 0.15);
       verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(400, 800, 0.10);
       verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(1500, 1500, 0.02);
     }
   }
 }

 EIGEN_DECLARE_TEST(sparse_threaded_product) {
   for (int i = 0; i < g_repeat; ++i) {
     CALL_SUBTEST_1(run_grid<float>());
     CALL_SUBTEST_2(run_grid<double>());
     CALL_SUBTEST_3(run_grid<std::complex<double> >());
     CALL_SUBTEST_4(verify_threaded_scatter_ieee754<double>());
     CALL_SUBTEST_4(verify_threaded_scatter_ieee754<float>());
   }
 }
	// This file is part of Eigen, a lightweight C++ template library
	// for linear algebra.
	//
	// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
	//
	// This Source Code Form is subject to the terms of the Mozilla
	// Public License v. 2.0. If a copy of the MPL was not distributed
	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
	// SPDX-License-Identifier: MPL-2.0

	#define EIGEN_USE_THREADS 1
	#include "sparse.h"

	// Pulls in the ThreadedSparseProduct header via the SparseCore module.
	// (Eigen/Sparse -> Eigen/SparseCore conditional include under EIGEN_USE_THREADS.)
	#include <Eigen/SparseCore>

	template <typename SparseMatrixType, typename DenseVectorType>
	void verify_threaded_spmv(Index rows, Index cols, double density) {
	typedef typename SparseMatrixType::Scalar Scalar;
	typedef Matrix<Scalar, Dynamic, Dynamic> DenseMatrix;

	DenseMatrix refMat(rows, cols);
	SparseMatrixType A(rows, cols);
	initSparse<Scalar>(density, refMat, A);
	A.makeCompressed();

	DenseVectorType x_fwd = DenseVectorType::Random(cols);
	DenseVectorType x_adj = DenseVectorType::Random(rows);

	// Reference results via the existing dense path.
	DenseVectorType y_fwd_ref = refMat * x_fwd;
	DenseVectorType y_adj_ref = refMat.adjoint() * x_adj;

	ThreadedSparseProduct<SparseMatrixType> op(A);
	VERIFY(op.rows() == A.rows());
	VERIFY(op.cols() == A.cols());

	// Overwriting forms.
	{
	DenseVectorType y(rows);
	y.setRandom();
	op.apply(x_fwd, y);
	VERIFY_IS_APPROX(y, y_fwd_ref);
	}
	{
	DenseVectorType y(cols);
	y.setRandom();
	op.applyAdjoint(x_adj, y);
	VERIFY_IS_APPROX(y, y_adj_ref);
	}

	// Accumulating forms: y += alpha * A * x, with both alpha = 1 and a
	// non-trivial alpha.
	for (Scalar alpha : {Scalar(1), Scalar(internal::random<Scalar>())}) {
	{
	DenseVectorType y0 = DenseVectorType::Random(rows);
	DenseVectorType y_ref = y0 + alpha * y_fwd_ref;
	DenseVectorType y = y0;
	op.applyAddTo(x_fwd, y, alpha);
	VERIFY_IS_APPROX(y, y_ref);
	}
	{
	DenseVectorType y0 = DenseVectorType::Random(cols);
	DenseVectorType y_ref = y0 + alpha * y_adj_ref;
	DenseVectorType y = y0;
	op.applyAdjointAddTo(x_adj, y, alpha);
	VERIFY_IS_APPROX(y, y_ref);
	}
	}

	// Calling adjoint must have materialized the mirror; calling it again must
	// not rebuild (no easy assertion for "not rebuilt", but verify the path is
	// consistent across repeated calls).
	VERIFY(op.hasMirror());
	{
	DenseVectorType y(cols);
	op.applyAdjoint(x_adj, y);
	VERIFY_IS_APPROX(y, y_adj_ref);
	}

	// Re-running analyzePattern must invalidate the mirror.
	op.analyzePattern(A);
	VERIFY(!op.hasMirror());
	{
	DenseVectorType y(cols);
	op.applyAdjoint(x_adj, y);
	VERIFY_IS_APPROX(y, y_adj_ref);
	}

	// Coefficient-only invalidation: mutate the bound matrix's stored values
	// (same sparsity pattern), call refreshValues(), and verify the next
	// mirror-using direction picks up the new coefficients. The mirror is
	// materialized lazily by the direction that needs the transposed view --
	// forward for ColMajor A, adjoint for RowMajor A -- so call both to warm
	// it regardless of storage order.
	{
	DenseVectorType y_fwd(rows);
	DenseVectorType y_adj(cols);
	op.apply(x_fwd, y_fwd);
	op.applyAdjoint(x_adj, y_adj);
	VERIFY(op.hasMirror());
	}
	for (Index k = 0; k < A.nonZeros(); ++k) A.valuePtr()[k] *= Scalar(2);
	refMat *= Scalar(2);
	DenseVectorType y_fwd_ref_updated = refMat * x_fwd;
	DenseVectorType y_adj_ref_updated = refMat.adjoint() * x_adj;
	op.refreshValues();
	VERIFY(!op.hasMirror());
	{
	DenseVectorType y(rows);
	op.apply(x_fwd, y);
	VERIFY_IS_APPROX(y, y_fwd_ref_updated);
	}
	{
	DenseVectorType y(cols);
	op.applyAdjoint(x_adj, y);
	VERIFY_IS_APPROX(y, y_adj_ref_updated);
	}
	}

	// IEEE-754 propagation through the ColMajor SpMV OMP scatter/reduce path
	// (Eigen/src/SparseCore/SparseDenseProduct.h). With OpenMP enabled, the
	// kernel sums per-thread scratch buffers into y; NaN/Inf in any thread's
	// contribution must propagate to the reduced sum. Without OpenMP this still
	// exercises the serial scatter, where the same propagation must hold.
	template <typename Scalar>
	void verify_threaded_scatter_ieee754() {
	typedef SparseMatrix<Scalar, ColMajor> SpMat;
	typedef Matrix<Scalar, Dynamic, 1> Vec;
	typedef typename NumTraits<Scalar>::Real Real;

	// Pin the OMP team size so the scatter/reduce gate (nnz >= threads * m)
	// is deterministic across runs.
	const int prev_threads = Eigen::nbThreads();
	Eigen::setNbThreads(4);

	// 300x300 with diagonal + (i+j)%4==0 entries -> ~22600 nnz, above the
	// 20000-nnz threshold and well above threadsm = 4300 for the gate.
	const Index n = 300;
	std::vector<Triplet<Scalar> > triplets;
	triplets.reserve(static_cast<std::size_t>(n) * (n / 4 + 1));
	// Simple deterministic LCG so we don't depend on <random> being included
	// transitively. Modulo-bounded so we don't rely on a specific integer width.
	unsigned rng = 0xC0FFEEu;
	for (int j = 0; j < int(n); ++j) {
	for (int i = 0; i < int(n); ++i) {
	if (i == j \|\| (i + j) % 4 == 0) {
	rng = (rng * 1664525u + 1013904223u) & 0xFFFFu;
	const Real v = Real(0.1) + Real(0.9) * Real(rng) / Real(0xFFFF);
	triplets.push_back(Triplet<Scalar>(i, j, Scalar(v)));
	}
	}
	}
	SpMat A(n, n);
	A.setFromTriplets(triplets.begin(), triplets.end());
	A.makeCompressed();
	VERIFY(A.nonZeros() > 20000);

	// Inject NaN/Inf into three rows of column 0 by overwriting valuePtr().
	// Pick rows that are guaranteed in column 0's pattern: row 0 (diagonal),
	// row 4 ((4+0)%4==0), row 8 ((8+0)%4==0).
	const Real nan_r = std::numeric_limits<Real>::quiet_NaN();
	const Real inf_r = std::numeric_limits<Real>::infinity();
	const Index i_nan = 0, i_pinf = 4, i_ninf = 8;
	bool set_nan = false, set_pinf = false, set_ninf = false;
	for (Index k = A.outerIndexPtr()[0]; k < A.outerIndexPtr()[1]; ++k) {
	const Index i = A.innerIndexPtr()[k];
	if (i == i_nan) {
	A.valuePtr()[k] = Scalar(nan_r);
	set_nan = true;
	} else if (i == i_pinf) {
	A.valuePtr()[k] = Scalar(inf_r);
	set_pinf = true;
	} else if (i == i_ninf) {
	A.valuePtr()[k] = Scalar(-inf_r);
	set_ninf = true;
	}
	}
	VERIFY(set_nan && set_pinf && set_ninf);

	Vec x = Vec::Random(n);
	// Ensure x[0] is finite and strictly positive (real part), so the injected
	// ±Inf contributions don't cancel against a zero-multiplier or flip sign.
	x[0] = Scalar(1);

	Vec y = A * x;

	VERIFY((numext::isnan)(numext::real(y[i_nan])));
	VERIFY((numext::isinf)(numext::real(y[i_pinf])) && numext::real(y[i_pinf]) > Real(0));
	VERIFY((numext::isinf)(numext::real(y[i_ninf])) && numext::real(y[i_ninf]) < Real(0));

	Eigen::setNbThreads(prev_threads);
	}

	template <typename Scalar>
	void run_grid() {
	typedef Matrix<Scalar, Dynamic, 1> Vec;
	// Cover both storage orders, square and rectangular, small (serial path,
	// nnz < threshold) and larger (threaded path).
	for (int order = 0; order < 2; ++order) {
	if (order == 0) {
	verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(8, 8, 0.5);
	verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(13, 21, 0.3);
	verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(500, 500, 0.15);
	verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(800, 400, 0.10);
	// Large enough that nnz > kThreadingThreshold (20000) to exercise the
	// multi-threaded dispatch path.
	verify_threaded_spmv<SparseMatrix<Scalar, ColMajor>, Vec>(1500, 1500, 0.02);
	} else {
	verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(8, 8, 0.5);
	verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(21, 13, 0.3);
	verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(500, 500, 0.15);
	verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(400, 800, 0.10);
	verify_threaded_spmv<SparseMatrix<Scalar, RowMajor>, Vec>(1500, 1500, 0.02);
	}
	}
	}

	EIGEN_DECLARE_TEST(sparse_threaded_product) {
	for (int i = 0; i < g_repeat; ++i) {
	CALL_SUBTEST_1(run_grid<float>());
	CALL_SUBTEST_2(run_grid<double>());
	CALL_SUBTEST_3(run_grid<std::complex<double> >());
	CALL_SUBTEST_4(verify_threaded_scatter_ieee754<double>());
	CALL_SUBTEST_4(verify_threaded_scatter_ieee754<float>());
	}
	}