unsupported/test/GPU/cusparse_spmv.cpp - mirror - Git at Google

 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 // SPDX-License-Identifier: MPL-2.0

 // Tests for gpu::SparseContext: GPU SpMV/SpMM via cuSPARSE.

 #define EIGEN_USE_GPU
 #include "main.h"
 #include <Eigen/Sparse>
 #include <unsupported/Eigen/GPU>
 #include "gpu_test_helpers.h"

 using namespace Eigen;

 // ---- Helper: build a random sparse matrix -----------------------------------

 template <typename Scalar>
 SparseMatrix<Scalar, ColMajor, int> make_sparse(Index rows, Index cols, double density = 0.1) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   SpMat R(rows, cols);
   R.reserve(VectorXi::Constant(cols, static_cast<int>(rows * density) + 1));
   for (Index j = 0; j < cols; ++j) {
     for (Index i = 0; i < rows; ++i) {
       if ((std::rand() / double(RAND_MAX)) < density) {
         const RealScalar re = RealScalar(std::rand() / double(RAND_MAX) - 0.5);
         const RealScalar im = RealScalar(std::rand() / double(RAND_MAX) - 0.5);
         R.insert(i, j) = gpu_test::make_test_value<Scalar>(re, im);
       }
     }
   }
   R.makeCompressed();
   return R;
 }

 // ---- SpMV: y = A * x -------------------------------------------------------

 template <typename Scalar>
 void test_spmv(Index rows, Index cols) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Vec = Matrix<Scalar, Dynamic, 1>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   SpMat A = make_sparse<Scalar>(rows, cols);
   Vec x = Vec::Random(cols);

   gpu::SparseContext<Scalar> ctx;
   Vec y_gpu = ctx.multiply(A, x);
   Vec y_cpu = A * x;

   RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
   VERIFY_IS_EQUAL(y_gpu.size(), rows);
   VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
 }

 // ---- SpMV with alpha/beta: y = alpha*A*x + beta*y ---------------------------

 template <typename Scalar>
 void test_spmv_alpha_beta(Index n) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Vec = Matrix<Scalar, Dynamic, 1>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   SpMat A = make_sparse<Scalar>(n, n);
   Vec x = Vec::Random(n);
   Vec y_init = Vec::Random(n);

   Scalar alpha(2);
   Scalar beta(3);

   Vec y_cpu = alpha * (A * x) + beta * y_init;

   gpu::SparseContext<Scalar> ctx;
   Vec y_gpu = y_init;
   ctx.multiply(A, x, y_gpu, alpha, beta);

   RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits<Scalar>::epsilon();
   VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
 }

 // ---- Transpose: y = A^T * x ------------------------------------------------

 template <typename Scalar>
 void test_spmv_transpose(Index rows, Index cols) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Vec = Matrix<Scalar, Dynamic, 1>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   SpMat A = make_sparse<Scalar>(rows, cols);
   Vec x = Vec::Random(rows);

   gpu::SparseContext<Scalar> ctx;
   Vec y_gpu = ctx.multiplyT(A, x);
   Vec y_cpu = A.transpose() * x;

   RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
   VERIFY_IS_EQUAL(y_gpu.size(), cols);
   VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
 }

 // ---- SpMV adjoint: y = A^H * x ----------------------------------------------

 template <typename Scalar>
 void test_spmv_adjoint(Index rows, Index cols) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Vec = Matrix<Scalar, Dynamic, 1>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   SpMat A = make_sparse<Scalar>(rows, cols);
   Vec x = Vec::Random(rows);

   gpu::SparseContext<Scalar> ctx;
   Vec y_gpu = ctx.multiplyAdjoint(A, x);
   Vec y_cpu = A.adjoint() * x;

   RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
   VERIFY_IS_EQUAL(y_gpu.size(), cols);
   VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
 }

 // ---- SpMM: Y = A * X (multiple RHS) ----------------------------------------

 template <typename Scalar>
 void test_spmm(Index rows, Index cols, Index nrhs) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Mat = Matrix<Scalar, Dynamic, Dynamic>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   SpMat A = make_sparse<Scalar>(rows, cols);
   Mat X = Mat::Random(cols, nrhs);

   gpu::SparseContext<Scalar> ctx;
   Mat Y_gpu = ctx.multiplyMat(A, X);
   Mat Y_cpu = A * X;

   RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
   VERIFY_IS_EQUAL(Y_gpu.rows(), rows);
   VERIFY_IS_EQUAL(Y_gpu.cols(), nrhs);
   VERIFY((Y_gpu - Y_cpu).norm() / (Y_cpu.norm() + RealScalar(1)) < tol);
 }

 // ---- SpMM transpose: Y = A^T * X --------------------------------------------

 template <typename Scalar>
 void test_spmm_transpose(Index rows, Index cols, Index nrhs) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Mat = Matrix<Scalar, Dynamic, Dynamic>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   SpMat A = make_sparse<Scalar>(rows, cols);
   Mat X = Mat::Random(rows, nrhs);

   gpu::SparseContext<Scalar> ctx;
   Mat Y_gpu = ctx.multiplyMat(A, X, gpu::GpuOp::Trans);
   Mat Y_cpu = A.transpose() * X;

   RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
   VERIFY_IS_EQUAL(Y_gpu.rows(), cols);
   VERIFY_IS_EQUAL(Y_gpu.cols(), nrhs);
   VERIFY((Y_gpu - Y_cpu).norm() / (Y_cpu.norm() + RealScalar(1)) < tol);
 }

 // ---- Identity matrix: I * x = x --------------------------------------------

 template <typename Scalar>
 void test_identity(Index n) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Vec = Matrix<Scalar, Dynamic, 1>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   // Build sparse identity.
   SpMat eye(n, n);
   eye.setIdentity();
   eye.makeCompressed();

   Vec x = Vec::Random(n);

   gpu::SparseContext<Scalar> ctx;
   Vec y = ctx.multiply(eye, x);

   RealScalar tol = NumTraits<Scalar>::epsilon();
   VERIFY((y - x).norm() < tol);
 }

 // ---- Context reuse ----------------------------------------------------------

 template <typename Scalar>
 void test_reuse(Index n) {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Vec = Matrix<Scalar, Dynamic, 1>;
   using RealScalar = typename NumTraits<Scalar>::Real;

   gpu::SparseContext<Scalar> ctx;
   RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits<Scalar>::epsilon();

   for (int trial = 0; trial < 3; ++trial) {
     SpMat A = make_sparse<Scalar>(n, n);
     Vec x = Vec::Random(n);
     Vec y_gpu = ctx.multiply(A, x);
     Vec y_cpu = A * x;
     VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
   }
 }

 // ---- Empty ------------------------------------------------------------------

 template <typename Scalar>
 void test_empty() {
   using SpMat = SparseMatrix<Scalar, ColMajor, int>;
   using Vec = Matrix<Scalar, Dynamic, 1>;

   SpMat A(0, 0);
   A.makeCompressed();
   Vec x(0);

   gpu::SparseContext<Scalar> ctx;
   Vec y = ctx.multiply(A, x);
   VERIFY_IS_EQUAL(y.size(), 0);
 }

 // ---- Per-scalar driver ------------------------------------------------------

 template <typename Scalar>
 void test_scalar() {
   CALL_SUBTEST(test_spmv<Scalar>(64, 64));
   CALL_SUBTEST(test_spmv<Scalar>(128, 64));  // non-square
   CALL_SUBTEST(test_spmv<Scalar>(64, 128));  // wide
   CALL_SUBTEST(test_spmv_alpha_beta<Scalar>(64));
   CALL_SUBTEST(test_spmv_transpose<Scalar>(128, 64));
   CALL_SUBTEST(test_spmv_adjoint<Scalar>(128, 64));
   CALL_SUBTEST(test_spmm<Scalar>(64, 64, 4));
   CALL_SUBTEST(test_spmm_transpose<Scalar>(128, 64, 4));
   CALL_SUBTEST(test_identity<Scalar>(64));
   CALL_SUBTEST(test_reuse<Scalar>(64));
   CALL_SUBTEST(test_empty<Scalar>());
 }

 EIGEN_DECLARE_TEST(gpu_cusparse_spmv) {
   gpu_test::require_cusparse_context();

   // Split by scalar so each part compiles in parallel.
   CALL_SUBTEST_1(test_scalar<float>());
   CALL_SUBTEST_2(test_scalar<double>());
   CALL_SUBTEST_3(test_scalar<std::complex<float>>());
   CALL_SUBTEST_4(test_scalar<std::complex<double>>());
 }
	// This file is part of Eigen, a lightweight C++ template library
	// for linear algebra.
	//
	// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
	//
	// This Source Code Form is subject to the terms of the Mozilla
	// Public License v. 2.0. If a copy of the MPL was not distributed
	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
	// SPDX-License-Identifier: MPL-2.0

	// Tests for gpu::SparseContext: GPU SpMV/SpMM via cuSPARSE.

	#define EIGEN_USE_GPU
	#include "main.h"
	#include <Eigen/Sparse>
	#include <unsupported/Eigen/GPU>
	#include "gpu_test_helpers.h"

	using namespace Eigen;

	// ---- Helper: build a random sparse matrix -----------------------------------

	template <typename Scalar>
	SparseMatrix<Scalar, ColMajor, int> make_sparse(Index rows, Index cols, double density = 0.1) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	SpMat R(rows, cols);
	R.reserve(VectorXi::Constant(cols, static_cast<int>(rows * density) + 1));
	for (Index j = 0; j < cols; ++j) {
	for (Index i = 0; i < rows; ++i) {
	if ((std::rand() / double(RAND_MAX)) < density) {
	const RealScalar re = RealScalar(std::rand() / double(RAND_MAX) - 0.5);
	const RealScalar im = RealScalar(std::rand() / double(RAND_MAX) - 0.5);
	R.insert(i, j) = gpu_test::make_test_value<Scalar>(re, im);
	}
	}
	}
	R.makeCompressed();
	return R;
	}

	// ---- SpMV: y = A * x -------------------------------------------------------

	template <typename Scalar>
	void test_spmv(Index rows, Index cols) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Vec = Matrix<Scalar, Dynamic, 1>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	SpMat A = make_sparse<Scalar>(rows, cols);
	Vec x = Vec::Random(cols);

	gpu::SparseContext<Scalar> ctx;
	Vec y_gpu = ctx.multiply(A, x);
	Vec y_cpu = A * x;

	RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
	VERIFY_IS_EQUAL(y_gpu.size(), rows);
	VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
	}

	// ---- SpMV with alpha/beta: y = alphaAx + beta*y ---------------------------

	template <typename Scalar>
	void test_spmv_alpha_beta(Index n) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Vec = Matrix<Scalar, Dynamic, 1>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	SpMat A = make_sparse<Scalar>(n, n);
	Vec x = Vec::Random(n);
	Vec y_init = Vec::Random(n);

	Scalar alpha(2);
	Scalar beta(3);

	Vec y_cpu = alpha * (A * x) + beta * y_init;

	gpu::SparseContext<Scalar> ctx;
	Vec y_gpu = y_init;
	ctx.multiply(A, x, y_gpu, alpha, beta);

	RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits<Scalar>::epsilon();
	VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
	}

	// ---- Transpose: y = A^T * x ------------------------------------------------

	template <typename Scalar>
	void test_spmv_transpose(Index rows, Index cols) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Vec = Matrix<Scalar, Dynamic, 1>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	SpMat A = make_sparse<Scalar>(rows, cols);
	Vec x = Vec::Random(rows);

	gpu::SparseContext<Scalar> ctx;
	Vec y_gpu = ctx.multiplyT(A, x);
	Vec y_cpu = A.transpose() * x;

	RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
	VERIFY_IS_EQUAL(y_gpu.size(), cols);
	VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
	}

	// ---- SpMV adjoint: y = A^H * x ----------------------------------------------

	template <typename Scalar>
	void test_spmv_adjoint(Index rows, Index cols) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Vec = Matrix<Scalar, Dynamic, 1>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	SpMat A = make_sparse<Scalar>(rows, cols);
	Vec x = Vec::Random(rows);

	gpu::SparseContext<Scalar> ctx;
	Vec y_gpu = ctx.multiplyAdjoint(A, x);
	Vec y_cpu = A.adjoint() * x;

	RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
	VERIFY_IS_EQUAL(y_gpu.size(), cols);
	VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
	}

	// ---- SpMM: Y = A * X (multiple RHS) ----------------------------------------

	template <typename Scalar>
	void test_spmm(Index rows, Index cols, Index nrhs) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Mat = Matrix<Scalar, Dynamic, Dynamic>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	SpMat A = make_sparse<Scalar>(rows, cols);
	Mat X = Mat::Random(cols, nrhs);

	gpu::SparseContext<Scalar> ctx;
	Mat Y_gpu = ctx.multiplyMat(A, X);
	Mat Y_cpu = A * X;

	RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
	VERIFY_IS_EQUAL(Y_gpu.rows(), rows);
	VERIFY_IS_EQUAL(Y_gpu.cols(), nrhs);
	VERIFY((Y_gpu - Y_cpu).norm() / (Y_cpu.norm() + RealScalar(1)) < tol);
	}

	// ---- SpMM transpose: Y = A^T * X --------------------------------------------

	template <typename Scalar>
	void test_spmm_transpose(Index rows, Index cols, Index nrhs) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Mat = Matrix<Scalar, Dynamic, Dynamic>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	SpMat A = make_sparse<Scalar>(rows, cols);
	Mat X = Mat::Random(rows, nrhs);

	gpu::SparseContext<Scalar> ctx;
	Mat Y_gpu = ctx.multiplyMat(A, X, gpu::GpuOp::Trans);
	Mat Y_cpu = A.transpose() * X;

	RealScalar tol = RealScalar(10) * RealScalar((std::max)(rows, cols)) * NumTraits<Scalar>::epsilon();
	VERIFY_IS_EQUAL(Y_gpu.rows(), cols);
	VERIFY_IS_EQUAL(Y_gpu.cols(), nrhs);
	VERIFY((Y_gpu - Y_cpu).norm() / (Y_cpu.norm() + RealScalar(1)) < tol);
	}

	// ---- Identity matrix: I * x = x --------------------------------------------

	template <typename Scalar>
	void test_identity(Index n) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Vec = Matrix<Scalar, Dynamic, 1>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	// Build sparse identity.
	SpMat eye(n, n);
	eye.setIdentity();
	eye.makeCompressed();

	Vec x = Vec::Random(n);

	gpu::SparseContext<Scalar> ctx;
	Vec y = ctx.multiply(eye, x);

	RealScalar tol = NumTraits<Scalar>::epsilon();
	VERIFY((y - x).norm() < tol);
	}

	// ---- Context reuse ----------------------------------------------------------

	template <typename Scalar>
	void test_reuse(Index n) {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Vec = Matrix<Scalar, Dynamic, 1>;
	using RealScalar = typename NumTraits<Scalar>::Real;

	gpu::SparseContext<Scalar> ctx;
	RealScalar tol = RealScalar(10) * RealScalar(n) * NumTraits<Scalar>::epsilon();

	for (int trial = 0; trial < 3; ++trial) {
	SpMat A = make_sparse<Scalar>(n, n);
	Vec x = Vec::Random(n);
	Vec y_gpu = ctx.multiply(A, x);
	Vec y_cpu = A * x;
	VERIFY((y_gpu - y_cpu).norm() / (y_cpu.norm() + RealScalar(1)) < tol);
	}
	}

	// ---- Empty ------------------------------------------------------------------

	template <typename Scalar>
	void test_empty() {
	using SpMat = SparseMatrix<Scalar, ColMajor, int>;
	using Vec = Matrix<Scalar, Dynamic, 1>;

	SpMat A(0, 0);
	A.makeCompressed();
	Vec x(0);

	gpu::SparseContext<Scalar> ctx;
	Vec y = ctx.multiply(A, x);
	VERIFY_IS_EQUAL(y.size(), 0);
	}

	// ---- Per-scalar driver ------------------------------------------------------

	template <typename Scalar>
	void test_scalar() {
	CALL_SUBTEST(test_spmv<Scalar>(64, 64));
	CALL_SUBTEST(test_spmv<Scalar>(128, 64)); // non-square
	CALL_SUBTEST(test_spmv<Scalar>(64, 128)); // wide
	CALL_SUBTEST(test_spmv_alpha_beta<Scalar>(64));
	CALL_SUBTEST(test_spmv_transpose<Scalar>(128, 64));
	CALL_SUBTEST(test_spmv_adjoint<Scalar>(128, 64));
	CALL_SUBTEST(test_spmm<Scalar>(64, 64, 4));
	CALL_SUBTEST(test_spmm_transpose<Scalar>(128, 64, 4));
	CALL_SUBTEST(test_identity<Scalar>(64));
	CALL_SUBTEST(test_reuse<Scalar>(64));
	CALL_SUBTEST(test_empty<Scalar>());
	}

	EIGEN_DECLARE_TEST(gpu_cusparse_spmv) {
	gpu_test::require_cusparse_context();

	// Split by scalar so each part compiles in parallel.
	CALL_SUBTEST_1(test_scalar<float>());
	CALL_SUBTEST_2(test_scalar<double>());
	CALL_SUBTEST_3(test_scalar<std::complex<float>>());
	CALL_SUBTEST_4(test_scalar<std::complex<double>>());
	}