unsupported/test/GPU/device_matrix.cpp - mirror - Git at Google

 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 // SPDX-License-Identifier: MPL-2.0

 // Tests for DeviceMatrix and HostTransfer: typed RAII GPU memory wrapper.
 // No cuSOLVER dependency — only CUDA runtime.

 #define EIGEN_USE_GPU
 #include "main.h"
 #include <unsupported/Eigen/GPU>

 using namespace Eigen;

 // ---- Default construction ---------------------------------------------------

 void test_default_construct() {
   gpu::DeviceMatrix<double> dm;
   VERIFY(dm.empty());
   VERIFY_IS_EQUAL(dm.rows(), 0);
   VERIFY_IS_EQUAL(dm.cols(), 0);
   VERIFY(dm.data() == nullptr);
   VERIFY_IS_EQUAL(dm.sizeInBytes(), size_t(0));
 }

 // ---- Allocate uninitialized -------------------------------------------------

 template <typename Scalar>
 void test_allocate(Index rows, Index cols) {
   gpu::DeviceMatrix<Scalar> dm(rows, cols);
   VERIFY(!dm.empty());
   VERIFY_IS_EQUAL(dm.rows(), rows);
   VERIFY_IS_EQUAL(dm.cols(), cols);
   VERIFY(dm.data() != nullptr);
   VERIFY_IS_EQUAL(dm.sizeInBytes(), size_t(rows) * size_t(cols) * sizeof(Scalar));
 }

 // ---- fromHost / toHost roundtrip (synchronous) ------------------------------

 template <typename Scalar>
 void test_roundtrip(Index rows, Index cols) {
   using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
   MatrixType host = MatrixType::Random(rows, cols);

   auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
   VERIFY_IS_EQUAL(dm.rows(), rows);
   VERIFY_IS_EQUAL(dm.cols(), cols);
   VERIFY(!dm.empty());

   MatrixType result = dm.toHost();
   VERIFY_IS_EQUAL(result.rows(), rows);
   VERIFY_IS_EQUAL(result.cols(), cols);
   VERIFY_IS_APPROX(result, host);
 }

 // ---- fromHostAsync / toHostAsync roundtrip -----------------------------------

 template <typename Scalar>
 void test_roundtrip_async(Index rows, Index cols) {
   using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
   MatrixType host = MatrixType::Random(rows, cols);

   cudaStream_t stream;
   EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&stream));

   // Async upload from raw pointer.
   auto dm = gpu::DeviceMatrix<Scalar>::fromHostAsync(host.data(), rows, cols, stream);
   VERIFY_IS_EQUAL(dm.rows(), rows);
   VERIFY_IS_EQUAL(dm.cols(), cols);

   // Async download via HostTransfer future.
   auto transfer = dm.toHostAsync(stream);

   // get() blocks and returns the matrix.
   MatrixType result = transfer.get();
   VERIFY_IS_APPROX(result, host);

   EIGEN_CUDA_RUNTIME_CHECK(cudaStreamDestroy(stream));

   cudaStream_t producer_stream;
   cudaStream_t consumer_stream;
   EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&producer_stream));
   EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&consumer_stream));

   auto cross_stream_dm = gpu::DeviceMatrix<Scalar>::fromHostAsync(host.data(), rows, cols, producer_stream);
   auto cross_stream_transfer = cross_stream_dm.toHostAsync(consumer_stream);
   MatrixType cross_stream_result = cross_stream_transfer.get();
   VERIFY_IS_APPROX(cross_stream_result, host);

   EIGEN_CUDA_RUNTIME_CHECK(cudaStreamDestroy(consumer_stream));
   EIGEN_CUDA_RUNTIME_CHECK(cudaStreamDestroy(producer_stream));
 }

 // ---- HostTransfer::ready() and idempotent get() -----------------------------

 void test_host_transfer_ready() {
   using MatrixType = Eigen::Matrix<double, Dynamic, Dynamic>;
   MatrixType host = MatrixType::Random(100, 100);

   auto dm = gpu::DeviceMatrix<double>::fromHost(host);
   auto transfer = dm.toHostAsync();

   // After get(), ready() must return true.
   MatrixType result = transfer.get();
   VERIFY(transfer.ready());
   VERIFY_IS_APPROX(result, host);

   // get() is idempotent.
   MatrixType& result2 = transfer.get();
   VERIFY_IS_APPROX(result2, host);
 }

 // ---- HostTransfer move ------------------------------------------------------

 void test_host_transfer_move() {
   using MatrixType = Eigen::Matrix<double, Dynamic, Dynamic>;
   MatrixType host = MatrixType::Random(50, 50);

   auto dm = gpu::DeviceMatrix<double>::fromHost(host);
   auto transfer = dm.toHostAsync();

   gpu::HostTransfer<double> moved(std::move(transfer));
   MatrixType result = moved.get();
   VERIFY_IS_APPROX(result, host);
 }

 // ---- clone() produces independent copy --------------------------------------

 template <typename Scalar>
 void test_clone(Index rows, Index cols) {
   using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
   MatrixType host = MatrixType::Random(rows, cols);

   auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
   auto cloned = dm.clone();

   // Overwrite original with different data.
   MatrixType other = MatrixType::Random(rows, cols);
   dm = gpu::DeviceMatrix<Scalar>::fromHost(other);

   // Clone still holds the original data.
   MatrixType clone_result = cloned.toHost();
   VERIFY_IS_APPROX(clone_result, host);

   // Original holds the new data.
   MatrixType dm_result = dm.toHost();
   VERIFY_IS_APPROX(dm_result, other);
 }

 // ---- Move construct ---------------------------------------------------------

 template <typename Scalar>
 void test_move_construct(Index rows, Index cols) {
   using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
   MatrixType host = MatrixType::Random(rows, cols);

   auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
   gpu::DeviceMatrix<Scalar> moved(std::move(dm));

   VERIFY(dm.empty());
   VERIFY(dm.data() == nullptr);

   VERIFY_IS_EQUAL(moved.rows(), rows);
   VERIFY_IS_EQUAL(moved.cols(), cols);
   MatrixType result = moved.toHost();
   VERIFY_IS_APPROX(result, host);
 }

 // ---- Move assign ------------------------------------------------------------

 template <typename Scalar>
 void test_move_assign(Index rows, Index cols) {
   using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
   MatrixType host = MatrixType::Random(rows, cols);

   auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
   gpu::DeviceMatrix<Scalar> dest;
   dest = std::move(dm);

   VERIFY(dm.empty());
   VERIFY_IS_EQUAL(dest.rows(), rows);
   MatrixType result = dest.toHost();
   VERIFY_IS_APPROX(result, host);
 }

 // ---- resize() ---------------------------------------------------------------

 void test_resize() {
   gpu::DeviceMatrix<double> dm(10, 20);
   VERIFY_IS_EQUAL(dm.rows(), 10);
   VERIFY_IS_EQUAL(dm.cols(), 20);

   dm.resize(50, 30);
   VERIFY_IS_EQUAL(dm.rows(), 50);
   VERIFY_IS_EQUAL(dm.cols(), 30);
   VERIFY(dm.data() != nullptr);

   // Resize to same dimensions is a no-op.
   double* ptr_before = dm.data();
   dm.resize(50, 30);
   VERIFY(dm.data() == ptr_before);
 }

 // ---- Empty / 0x0 matrix -----------------------------------------------------

 void test_empty() {
   using MatrixType = Eigen::Matrix<double, Dynamic, Dynamic>;
   MatrixType empty_mat(0, 0);

   auto dm = gpu::DeviceMatrix<double>::fromHost(empty_mat);
   VERIFY(dm.empty());
   VERIFY_IS_EQUAL(dm.rows(), 0);
   VERIFY_IS_EQUAL(dm.cols(), 0);

   MatrixType result = dm.toHost();
   VERIFY_IS_EQUAL(result.rows(), 0);
   VERIFY_IS_EQUAL(result.cols(), 0);
 }

 // ---- Per-scalar driver ------------------------------------------------------

 template <typename Scalar>
 void test_scalar() {
   // Square.
   CALL_SUBTEST(test_roundtrip<Scalar>(1, 1));
   CALL_SUBTEST(test_roundtrip<Scalar>(64, 64));
   CALL_SUBTEST(test_roundtrip<Scalar>(256, 256));

   // Rectangular.
   CALL_SUBTEST(test_roundtrip<Scalar>(100, 7));
   CALL_SUBTEST(test_roundtrip<Scalar>(7, 100));

   // Async roundtrip.
   CALL_SUBTEST(test_roundtrip_async<Scalar>(64, 64));
   CALL_SUBTEST(test_roundtrip_async<Scalar>(100, 7));

   CALL_SUBTEST(test_clone<Scalar>(64, 64));
   CALL_SUBTEST(test_move_construct<Scalar>(64, 64));
   CALL_SUBTEST(test_move_assign<Scalar>(64, 64));
 }

 EIGEN_DECLARE_TEST(gpu_device_matrix) {
   CALL_SUBTEST(test_default_construct());
   CALL_SUBTEST(test_empty());
   CALL_SUBTEST(test_resize());
   CALL_SUBTEST(test_host_transfer_ready());
   CALL_SUBTEST(test_host_transfer_move());
   CALL_SUBTEST((test_allocate<float>(100, 50)));
   CALL_SUBTEST((test_allocate<double>(100, 50)));
   CALL_SUBTEST(test_scalar<float>());
   CALL_SUBTEST(test_scalar<double>());
   CALL_SUBTEST(test_scalar<std::complex<float>>());
   CALL_SUBTEST(test_scalar<std::complex<double>>());
 }
	// This file is part of Eigen, a lightweight C++ template library
	// for linear algebra.
	//
	// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
	//
	// This Source Code Form is subject to the terms of the Mozilla
	// Public License v. 2.0. If a copy of the MPL was not distributed
	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
	// SPDX-License-Identifier: MPL-2.0

	// Tests for DeviceMatrix and HostTransfer: typed RAII GPU memory wrapper.
	// No cuSOLVER dependency — only CUDA runtime.

	#define EIGEN_USE_GPU
	#include "main.h"
	#include <unsupported/Eigen/GPU>

	using namespace Eigen;

	// ---- Default construction ---------------------------------------------------

	void test_default_construct() {
	gpu::DeviceMatrix<double> dm;
	VERIFY(dm.empty());
	VERIFY_IS_EQUAL(dm.rows(), 0);
	VERIFY_IS_EQUAL(dm.cols(), 0);
	VERIFY(dm.data() == nullptr);
	VERIFY_IS_EQUAL(dm.sizeInBytes(), size_t(0));
	}

	// ---- Allocate uninitialized -------------------------------------------------

	template <typename Scalar>
	void test_allocate(Index rows, Index cols) {
	gpu::DeviceMatrix<Scalar> dm(rows, cols);
	VERIFY(!dm.empty());
	VERIFY_IS_EQUAL(dm.rows(), rows);
	VERIFY_IS_EQUAL(dm.cols(), cols);
	VERIFY(dm.data() != nullptr);
	VERIFY_IS_EQUAL(dm.sizeInBytes(), size_t(rows) * size_t(cols) * sizeof(Scalar));
	}

	// ---- fromHost / toHost roundtrip (synchronous) ------------------------------

	template <typename Scalar>
	void test_roundtrip(Index rows, Index cols) {
	using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
	MatrixType host = MatrixType::Random(rows, cols);

	auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
	VERIFY_IS_EQUAL(dm.rows(), rows);
	VERIFY_IS_EQUAL(dm.cols(), cols);
	VERIFY(!dm.empty());

	MatrixType result = dm.toHost();
	VERIFY_IS_EQUAL(result.rows(), rows);
	VERIFY_IS_EQUAL(result.cols(), cols);
	VERIFY_IS_APPROX(result, host);
	}

	// ---- fromHostAsync / toHostAsync roundtrip -----------------------------------

	template <typename Scalar>
	void test_roundtrip_async(Index rows, Index cols) {
	using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
	MatrixType host = MatrixType::Random(rows, cols);

	cudaStream_t stream;
	EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&stream));

	// Async upload from raw pointer.
	auto dm = gpu::DeviceMatrix<Scalar>::fromHostAsync(host.data(), rows, cols, stream);
	VERIFY_IS_EQUAL(dm.rows(), rows);
	VERIFY_IS_EQUAL(dm.cols(), cols);

	// Async download via HostTransfer future.
	auto transfer = dm.toHostAsync(stream);

	// get() blocks and returns the matrix.
	MatrixType result = transfer.get();
	VERIFY_IS_APPROX(result, host);

	EIGEN_CUDA_RUNTIME_CHECK(cudaStreamDestroy(stream));

	cudaStream_t producer_stream;
	cudaStream_t consumer_stream;
	EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&producer_stream));
	EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&consumer_stream));

	auto cross_stream_dm = gpu::DeviceMatrix<Scalar>::fromHostAsync(host.data(), rows, cols, producer_stream);
	auto cross_stream_transfer = cross_stream_dm.toHostAsync(consumer_stream);
	MatrixType cross_stream_result = cross_stream_transfer.get();
	VERIFY_IS_APPROX(cross_stream_result, host);

	EIGEN_CUDA_RUNTIME_CHECK(cudaStreamDestroy(consumer_stream));
	EIGEN_CUDA_RUNTIME_CHECK(cudaStreamDestroy(producer_stream));
	}

	// ---- HostTransfer::ready() and idempotent get() -----------------------------

	void test_host_transfer_ready() {
	using MatrixType = Eigen::Matrix<double, Dynamic, Dynamic>;
	MatrixType host = MatrixType::Random(100, 100);

	auto dm = gpu::DeviceMatrix<double>::fromHost(host);
	auto transfer = dm.toHostAsync();

	// After get(), ready() must return true.
	MatrixType result = transfer.get();
	VERIFY(transfer.ready());
	VERIFY_IS_APPROX(result, host);

	// get() is idempotent.
	MatrixType& result2 = transfer.get();
	VERIFY_IS_APPROX(result2, host);
	}

	// ---- HostTransfer move ------------------------------------------------------

	void test_host_transfer_move() {
	using MatrixType = Eigen::Matrix<double, Dynamic, Dynamic>;
	MatrixType host = MatrixType::Random(50, 50);

	auto dm = gpu::DeviceMatrix<double>::fromHost(host);
	auto transfer = dm.toHostAsync();

	gpu::HostTransfer<double> moved(std::move(transfer));
	MatrixType result = moved.get();
	VERIFY_IS_APPROX(result, host);
	}

	// ---- clone() produces independent copy --------------------------------------

	template <typename Scalar>
	void test_clone(Index rows, Index cols) {
	using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
	MatrixType host = MatrixType::Random(rows, cols);

	auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
	auto cloned = dm.clone();

	// Overwrite original with different data.
	MatrixType other = MatrixType::Random(rows, cols);
	dm = gpu::DeviceMatrix<Scalar>::fromHost(other);

	// Clone still holds the original data.
	MatrixType clone_result = cloned.toHost();
	VERIFY_IS_APPROX(clone_result, host);

	// Original holds the new data.
	MatrixType dm_result = dm.toHost();
	VERIFY_IS_APPROX(dm_result, other);
	}

	// ---- Move construct ---------------------------------------------------------

	template <typename Scalar>
	void test_move_construct(Index rows, Index cols) {
	using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
	MatrixType host = MatrixType::Random(rows, cols);

	auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
	gpu::DeviceMatrix<Scalar> moved(std::move(dm));

	VERIFY(dm.empty());
	VERIFY(dm.data() == nullptr);

	VERIFY_IS_EQUAL(moved.rows(), rows);
	VERIFY_IS_EQUAL(moved.cols(), cols);
	MatrixType result = moved.toHost();
	VERIFY_IS_APPROX(result, host);
	}

	// ---- Move assign ------------------------------------------------------------

	template <typename Scalar>
	void test_move_assign(Index rows, Index cols) {
	using MatrixType = Eigen::Matrix<Scalar, Dynamic, Dynamic>;
	MatrixType host = MatrixType::Random(rows, cols);

	auto dm = gpu::DeviceMatrix<Scalar>::fromHost(host);
	gpu::DeviceMatrix<Scalar> dest;
	dest = std::move(dm);

	VERIFY(dm.empty());
	VERIFY_IS_EQUAL(dest.rows(), rows);
	MatrixType result = dest.toHost();
	VERIFY_IS_APPROX(result, host);
	}

	// ---- resize() ---------------------------------------------------------------

	void test_resize() {
	gpu::DeviceMatrix<double> dm(10, 20);
	VERIFY_IS_EQUAL(dm.rows(), 10);
	VERIFY_IS_EQUAL(dm.cols(), 20);

	dm.resize(50, 30);
	VERIFY_IS_EQUAL(dm.rows(), 50);
	VERIFY_IS_EQUAL(dm.cols(), 30);
	VERIFY(dm.data() != nullptr);

	// Resize to same dimensions is a no-op.
	double* ptr_before = dm.data();
	dm.resize(50, 30);
	VERIFY(dm.data() == ptr_before);
	}

	// ---- Empty / 0x0 matrix -----------------------------------------------------

	void test_empty() {
	using MatrixType = Eigen::Matrix<double, Dynamic, Dynamic>;
	MatrixType empty_mat(0, 0);

	auto dm = gpu::DeviceMatrix<double>::fromHost(empty_mat);
	VERIFY(dm.empty());
	VERIFY_IS_EQUAL(dm.rows(), 0);
	VERIFY_IS_EQUAL(dm.cols(), 0);

	MatrixType result = dm.toHost();
	VERIFY_IS_EQUAL(result.rows(), 0);
	VERIFY_IS_EQUAL(result.cols(), 0);
	}

	// ---- Per-scalar driver ------------------------------------------------------

	template <typename Scalar>
	void test_scalar() {
	// Square.
	CALL_SUBTEST(test_roundtrip<Scalar>(1, 1));
	CALL_SUBTEST(test_roundtrip<Scalar>(64, 64));
	CALL_SUBTEST(test_roundtrip<Scalar>(256, 256));

	// Rectangular.
	CALL_SUBTEST(test_roundtrip<Scalar>(100, 7));
	CALL_SUBTEST(test_roundtrip<Scalar>(7, 100));

	// Async roundtrip.
	CALL_SUBTEST(test_roundtrip_async<Scalar>(64, 64));
	CALL_SUBTEST(test_roundtrip_async<Scalar>(100, 7));

	CALL_SUBTEST(test_clone<Scalar>(64, 64));
	CALL_SUBTEST(test_move_construct<Scalar>(64, 64));
	CALL_SUBTEST(test_move_assign<Scalar>(64, 64));
	}

	EIGEN_DECLARE_TEST(gpu_device_matrix) {
	CALL_SUBTEST(test_default_construct());
	CALL_SUBTEST(test_empty());
	CALL_SUBTEST(test_resize());
	CALL_SUBTEST(test_host_transfer_ready());
	CALL_SUBTEST(test_host_transfer_move());
	CALL_SUBTEST((test_allocate<float>(100, 50)));
	CALL_SUBTEST((test_allocate<double>(100, 50)));
	CALL_SUBTEST(test_scalar<float>());
	CALL_SUBTEST(test_scalar<double>());
	CALL_SUBTEST(test_scalar<std::complex<float>>());
	CALL_SUBTEST(test_scalar<std::complex<double>>());
	}