unsupported/test/GPU/cusolver_llt.cpp - mirror - Git at Google

 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2026 Eigen Authors
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 // SPDX-License-Identifier: MPL-2.0

 // Tests for GpuLLT: GPU Cholesky (LL^T) using cuSOLVER.
 // Covers cusolverDnXpotrf (factorization) and cusolverDnXpotrs (solve)
 // for float, double, complex<float>, complex<double>, Lower and Upper.

 #define EIGEN_USE_GPU
 #include "main.h"
 #include <Eigen/Cholesky>
 #include <unsupported/Eigen/GPU>

 // Identifier convention throughout this file:
 //   h_ prefix for host-resident Eigen::Matrix values
 //   d_ prefix for device-resident Eigen::gpu::DeviceMatrix values
 // We also keep namespaces explicit (Eigen::, Eigen::gpu::) so the CPU vs GPU
 // path is obvious at every call site.

 // Build a random symmetric positive-definite matrix: A = M^H*M + n*I.
 template <typename MatrixType>
 MatrixType make_spd(Eigen::Index n) {
   using Scalar = typename MatrixType::Scalar;
   MatrixType M = MatrixType::Random(n, n);
   return M.adjoint() * M + MatrixType::Identity(n, n) * static_cast<Scalar>(n);
 }

 // Test factorization: L*L^H must reconstruct A to within floating-point tolerance.
 template <typename Scalar, int UpLo>
 void test_potrf(Eigen::Index n) {
   using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
   using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

   MatrixType h_A = make_spd<MatrixType>(n);

   // GPU factorization under test: factor stays on device.
   Eigen::gpu::LLT<Scalar, UpLo> gpu_llt(h_A);
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

   // CPU LLT is the oracle. GpuLLT does not expose the device-resident factor,
   // so we validate correctness via the CPU-reconstructed matrix.
   Eigen::LLT<MatrixType, UpLo> cpu_llt(h_A);
   VERIFY_IS_EQUAL(cpu_llt.info(), Eigen::Success);
   MatrixType h_A_reconstructed = cpu_llt.reconstructedMatrix();

   RealScalar tol = RealScalar(4) * RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon() * h_A.norm();
   VERIFY((h_A_reconstructed - h_A).norm() < tol);

   // Cross-check: GPU and CPU solves must agree on the same RHS. `solve(Matrix)`
   // uploads/downloads internally, so both outputs end up on the host.
   MatrixType h_b = MatrixType::Random(n, 1);
   MatrixType h_x_gpu = gpu_llt.solve(h_b);
   MatrixType h_x_cpu = cpu_llt.solve(h_b);
   VERIFY((h_x_gpu - h_x_cpu).norm() < tol);
 }

 // Test solve: residual ||A*X - B|| / ||B|| must be small.
 template <typename Scalar, int UpLo>
 void test_potrs(Eigen::Index n, Eigen::Index nrhs) {
   using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
   using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

   MatrixType h_A = make_spd<MatrixType>(n);
   MatrixType h_B = MatrixType::Random(n, nrhs);

   Eigen::gpu::LLT<Scalar, UpLo> gpu_llt(h_A);
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

   MatrixType h_X = gpu_llt.solve(h_B);

   RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
   RealScalar tol = RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon();
   VERIFY(residual < tol);
 }

 // Test that multiple solves against the same factor all produce correct results.
 // This exercises the key design property: L stays on device across calls.
 template <typename Scalar>
 void test_multiple_solves(Eigen::Index n) {
   using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
   using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

   MatrixType h_A = make_spd<MatrixType>(n);
   Eigen::gpu::LLT<Scalar, Eigen::Lower> gpu_llt(h_A);
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

   RealScalar tol = RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon();
   for (int k = 0; k < 5; ++k) {
     MatrixType h_B = MatrixType::Random(n, 3);
     MatrixType h_X = gpu_llt.solve(h_B);
     RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
     VERIFY(residual < tol);
   }
 }

 // Test that GpuLLT correctly detects a non-SPD matrix.
 void test_not_spd() {
   Eigen::MatrixXd h_A = -Eigen::MatrixXd::Identity(8, 8);  // negative definite
   Eigen::gpu::LLT<double> gpu_llt(h_A);
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::NumericalIssue);
 }

 // solve(DeviceMatrix) must not silently return garbage when the factorization
 // failed: it must sync the info word and assert just like solve(MatrixBase).
 void test_not_spd_device_solve_asserts() {
   Eigen::MatrixXd h_A = -Eigen::MatrixXd::Identity(8, 8);
   Eigen::MatrixXd h_B = Eigen::MatrixXd::Random(8, 4);
   Eigen::gpu::LLT<double> gpu_llt(h_A);
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::NumericalIssue);
   auto d_B = Eigen::gpu::DeviceMatrix<double>::fromHost(h_B);
   VERIFY_RAISES_ASSERT(gpu_llt.solve(d_B));
 }

 // ---- DeviceMatrix-native API --------------------------------------------
 // These tests exercise the device-resident path: compute(DeviceMatrix) +
 // solve(DeviceMatrix) -> DeviceMatrix, with the user explicitly managing
 // upload/download. The tests above use the host-Matrix overloads which do
 // the transfers internally; this section covers the no-implicit-transfer
 // surface that keeps data on device across a chain of calls.

 // compute(DeviceMatrix) + solve(DeviceMatrix) → toHost
 template <typename Scalar, int UpLo>
 void test_device_matrix_solve(Eigen::Index n, Eigen::Index nrhs) {
   using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
   using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

   MatrixType h_A = make_spd<MatrixType>(n);
   MatrixType h_B = MatrixType::Random(n, nrhs);

   auto d_A = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_A);
   auto d_B = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_B);

   Eigen::gpu::LLT<Scalar, UpLo> gpu_llt;
   gpu_llt.compute(d_A);
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

   Eigen::gpu::DeviceMatrix<Scalar> d_X = gpu_llt.solve(d_B);
   MatrixType h_X = d_X.toHost();

   RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
   VERIFY(residual < RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon());
 }

 // compute(DeviceMatrix&&) — move path
 template <typename Scalar>
 void test_device_matrix_move_compute(Eigen::Index n) {
   using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
   using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

   MatrixType h_A = make_spd<MatrixType>(n);
   MatrixType h_B = MatrixType::Random(n, 1);

   auto d_A = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_A);
   Eigen::gpu::LLT<Scalar, Eigen::Lower> gpu_llt;
   gpu_llt.compute(std::move(d_A));
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

   // d_A should be empty after move.
   VERIFY(d_A.empty());

   MatrixType h_X = gpu_llt.solve(h_B);
   RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
   VERIFY(residual < RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon());
 }

 // Full async chain: compute → solve → solve again with result as RHS → toHost
 template <typename Scalar>
 void test_chaining(Eigen::Index n) {
   using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
   using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

   MatrixType h_A = make_spd<MatrixType>(n);
   MatrixType h_B = MatrixType::Random(n, 3);

   auto d_A = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_A);
   auto d_B = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_B);

   Eigen::gpu::LLT<Scalar, Eigen::Lower> gpu_llt;
   gpu_llt.compute(d_A);
   VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

   // Chain: solve → use result as RHS for another solve. Everything stays on
   // device until the final toHost() below; that's the only sync point.
   Eigen::gpu::DeviceMatrix<Scalar> d_X = gpu_llt.solve(d_B);
   Eigen::gpu::DeviceMatrix<Scalar> d_Y = gpu_llt.solve(d_X);

   MatrixType h_Y = d_Y.toHost();

   // Verify: Y = A^{-2} * B using CPU oracle.
   MatrixType h_X_ref = Eigen::LLT<MatrixType, Eigen::Lower>(h_A).solve(h_B);
   MatrixType h_Y_ref = Eigen::LLT<MatrixType, Eigen::Lower>(h_A).solve(h_X_ref);

   RealScalar tol = RealScalar(4) * RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon() * h_Y_ref.norm();
   VERIFY((h_Y - h_Y_ref).norm() < tol);
 }

 template <typename Scalar>
 void test_scalar() {
   CALL_SUBTEST((test_potrf<Scalar, Eigen::Lower>(1)));
   CALL_SUBTEST((test_potrf<Scalar, Eigen::Lower>(64)));
   CALL_SUBTEST((test_potrf<Scalar, Eigen::Lower>(256)));
   CALL_SUBTEST((test_potrf<Scalar, Eigen::Upper>(64)));
   CALL_SUBTEST((test_potrf<Scalar, Eigen::Upper>(256)));

   CALL_SUBTEST((test_potrs<Scalar, Eigen::Lower>(64, 1)));
   CALL_SUBTEST((test_potrs<Scalar, Eigen::Lower>(64, 4)));
   CALL_SUBTEST((test_potrs<Scalar, Eigen::Lower>(256, 8)));
   CALL_SUBTEST((test_potrs<Scalar, Eigen::Upper>(64, 1)));
   CALL_SUBTEST((test_potrs<Scalar, Eigen::Upper>(256, 4)));

   CALL_SUBTEST(test_multiple_solves<Scalar>(128));

   CALL_SUBTEST((test_device_matrix_solve<Scalar, Eigen::Lower>(64, 4)));
   CALL_SUBTEST((test_device_matrix_solve<Scalar, Eigen::Upper>(128, 1)));
   CALL_SUBTEST(test_device_matrix_move_compute<Scalar>(64));
   CALL_SUBTEST(test_chaining<Scalar>(64));
 }

 EIGEN_DECLARE_TEST(gpu_cusolver_llt) {
   // Split by scalar so each part compiles in parallel.
   CALL_SUBTEST_1(test_scalar<float>());
   CALL_SUBTEST_2(test_scalar<double>());
   CALL_SUBTEST_3(test_scalar<std::complex<float>>());
   CALL_SUBTEST_4(test_scalar<std::complex<double>>());
   CALL_SUBTEST_5(test_not_spd());
   CALL_SUBTEST_5(test_not_spd_device_solve_asserts());
 }
	// This file is part of Eigen, a lightweight C++ template library
	// for linear algebra.
	//
	// Copyright (C) 2026 Eigen Authors
	//
	// This Source Code Form is subject to the terms of the Mozilla
	// Public License v. 2.0. If a copy of the MPL was not distributed
	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
	// SPDX-License-Identifier: MPL-2.0

	// Tests for GpuLLT: GPU Cholesky (LL^T) using cuSOLVER.
	// Covers cusolverDnXpotrf (factorization) and cusolverDnXpotrs (solve)
	// for float, double, complex<float>, complex<double>, Lower and Upper.

	#define EIGEN_USE_GPU
	#include "main.h"
	#include <Eigen/Cholesky>
	#include <unsupported/Eigen/GPU>

	// Identifier convention throughout this file:
	// h_ prefix for host-resident Eigen::Matrix values
	// d_ prefix for device-resident Eigen::gpu::DeviceMatrix values
	// We also keep namespaces explicit (Eigen::, Eigen::gpu::) so the CPU vs GPU
	// path is obvious at every call site.

	// Build a random symmetric positive-definite matrix: A = M^HM + nI.
	template <typename MatrixType>
	MatrixType make_spd(Eigen::Index n) {
	using Scalar = typename MatrixType::Scalar;
	MatrixType M = MatrixType::Random(n, n);
	return M.adjoint() * M + MatrixType::Identity(n, n) * static_cast<Scalar>(n);
	}

	// Test factorization: L*L^H must reconstruct A to within floating-point tolerance.
	template <typename Scalar, int UpLo>
	void test_potrf(Eigen::Index n) {
	using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
	using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

	MatrixType h_A = make_spd<MatrixType>(n);

	// GPU factorization under test: factor stays on device.
	Eigen::gpu::LLT<Scalar, UpLo> gpu_llt(h_A);
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

	// CPU LLT is the oracle. GpuLLT does not expose the device-resident factor,
	// so we validate correctness via the CPU-reconstructed matrix.
	Eigen::LLT<MatrixType, UpLo> cpu_llt(h_A);
	VERIFY_IS_EQUAL(cpu_llt.info(), Eigen::Success);
	MatrixType h_A_reconstructed = cpu_llt.reconstructedMatrix();

	RealScalar tol = RealScalar(4) * RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon() * h_A.norm();
	VERIFY((h_A_reconstructed - h_A).norm() < tol);

	// Cross-check: GPU and CPU solves must agree on the same RHS. `solve(Matrix)`
	// uploads/downloads internally, so both outputs end up on the host.
	MatrixType h_b = MatrixType::Random(n, 1);
	MatrixType h_x_gpu = gpu_llt.solve(h_b);
	MatrixType h_x_cpu = cpu_llt.solve(h_b);
	VERIFY((h_x_gpu - h_x_cpu).norm() < tol);
	}

	// Test solve: residual \|\|A*X - B\|\| / \|\|B\|\| must be small.
	template <typename Scalar, int UpLo>
	void test_potrs(Eigen::Index n, Eigen::Index nrhs) {
	using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
	using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

	MatrixType h_A = make_spd<MatrixType>(n);
	MatrixType h_B = MatrixType::Random(n, nrhs);

	Eigen::gpu::LLT<Scalar, UpLo> gpu_llt(h_A);
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

	MatrixType h_X = gpu_llt.solve(h_B);

	RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
	RealScalar tol = RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon();
	VERIFY(residual < tol);
	}

	// Test that multiple solves against the same factor all produce correct results.
	// This exercises the key design property: L stays on device across calls.
	template <typename Scalar>
	void test_multiple_solves(Eigen::Index n) {
	using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
	using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

	MatrixType h_A = make_spd<MatrixType>(n);
	Eigen::gpu::LLT<Scalar, Eigen::Lower> gpu_llt(h_A);
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

	RealScalar tol = RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon();
	for (int k = 0; k < 5; ++k) {
	MatrixType h_B = MatrixType::Random(n, 3);
	MatrixType h_X = gpu_llt.solve(h_B);
	RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
	VERIFY(residual < tol);
	}
	}

	// Test that GpuLLT correctly detects a non-SPD matrix.
	void test_not_spd() {
	Eigen::MatrixXd h_A = -Eigen::MatrixXd::Identity(8, 8); // negative definite
	Eigen::gpu::LLT<double> gpu_llt(h_A);
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::NumericalIssue);
	}

	// solve(DeviceMatrix) must not silently return garbage when the factorization
	// failed: it must sync the info word and assert just like solve(MatrixBase).
	void test_not_spd_device_solve_asserts() {
	Eigen::MatrixXd h_A = -Eigen::MatrixXd::Identity(8, 8);
	Eigen::MatrixXd h_B = Eigen::MatrixXd::Random(8, 4);
	Eigen::gpu::LLT<double> gpu_llt(h_A);
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::NumericalIssue);
	auto d_B = Eigen::gpu::DeviceMatrix<double>::fromHost(h_B);
	VERIFY_RAISES_ASSERT(gpu_llt.solve(d_B));
	}

	// ---- DeviceMatrix-native API --------------------------------------------
	// These tests exercise the device-resident path: compute(DeviceMatrix) +
	// solve(DeviceMatrix) -> DeviceMatrix, with the user explicitly managing
	// upload/download. The tests above use the host-Matrix overloads which do
	// the transfers internally; this section covers the no-implicit-transfer
	// surface that keeps data on device across a chain of calls.

	// compute(DeviceMatrix) + solve(DeviceMatrix) → toHost
	template <typename Scalar, int UpLo>
	void test_device_matrix_solve(Eigen::Index n, Eigen::Index nrhs) {
	using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
	using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

	MatrixType h_A = make_spd<MatrixType>(n);
	MatrixType h_B = MatrixType::Random(n, nrhs);

	auto d_A = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_A);
	auto d_B = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_B);

	Eigen::gpu::LLT<Scalar, UpLo> gpu_llt;
	gpu_llt.compute(d_A);
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

	Eigen::gpu::DeviceMatrix<Scalar> d_X = gpu_llt.solve(d_B);
	MatrixType h_X = d_X.toHost();

	RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
	VERIFY(residual < RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon());
	}

	// compute(DeviceMatrix&&) — move path
	template <typename Scalar>
	void test_device_matrix_move_compute(Eigen::Index n) {
	using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
	using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

	MatrixType h_A = make_spd<MatrixType>(n);
	MatrixType h_B = MatrixType::Random(n, 1);

	auto d_A = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_A);
	Eigen::gpu::LLT<Scalar, Eigen::Lower> gpu_llt;
	gpu_llt.compute(std::move(d_A));
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

	// d_A should be empty after move.
	VERIFY(d_A.empty());

	MatrixType h_X = gpu_llt.solve(h_B);
	RealScalar residual = (h_A * h_X - h_B).norm() / h_B.norm();
	VERIFY(residual < RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon());
	}

	// Full async chain: compute → solve → solve again with result as RHS → toHost
	template <typename Scalar>
	void test_chaining(Eigen::Index n) {
	using MatrixType = Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic>;
	using RealScalar = typename Eigen::NumTraits<Scalar>::Real;

	MatrixType h_A = make_spd<MatrixType>(n);
	MatrixType h_B = MatrixType::Random(n, 3);

	auto d_A = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_A);
	auto d_B = Eigen::gpu::DeviceMatrix<Scalar>::fromHost(h_B);

	Eigen::gpu::LLT<Scalar, Eigen::Lower> gpu_llt;
	gpu_llt.compute(d_A);
	VERIFY_IS_EQUAL(gpu_llt.info(), Eigen::Success);

	// Chain: solve → use result as RHS for another solve. Everything stays on
	// device until the final toHost() below; that's the only sync point.
	Eigen::gpu::DeviceMatrix<Scalar> d_X = gpu_llt.solve(d_B);
	Eigen::gpu::DeviceMatrix<Scalar> d_Y = gpu_llt.solve(d_X);

	MatrixType h_Y = d_Y.toHost();

	// Verify: Y = A^{-2} * B using CPU oracle.
	MatrixType h_X_ref = Eigen::LLT<MatrixType, Eigen::Lower>(h_A).solve(h_B);
	MatrixType h_Y_ref = Eigen::LLT<MatrixType, Eigen::Lower>(h_A).solve(h_X_ref);

	RealScalar tol = RealScalar(4) * RealScalar(n) * Eigen::NumTraits<Scalar>::epsilon() * h_Y_ref.norm();
	VERIFY((h_Y - h_Y_ref).norm() < tol);
	}

	template <typename Scalar>
	void test_scalar() {
	CALL_SUBTEST((test_potrf<Scalar, Eigen::Lower>(1)));
	CALL_SUBTEST((test_potrf<Scalar, Eigen::Lower>(64)));
	CALL_SUBTEST((test_potrf<Scalar, Eigen::Lower>(256)));
	CALL_SUBTEST((test_potrf<Scalar, Eigen::Upper>(64)));
	CALL_SUBTEST((test_potrf<Scalar, Eigen::Upper>(256)));

	CALL_SUBTEST((test_potrs<Scalar, Eigen::Lower>(64, 1)));
	CALL_SUBTEST((test_potrs<Scalar, Eigen::Lower>(64, 4)));
	CALL_SUBTEST((test_potrs<Scalar, Eigen::Lower>(256, 8)));
	CALL_SUBTEST((test_potrs<Scalar, Eigen::Upper>(64, 1)));
	CALL_SUBTEST((test_potrs<Scalar, Eigen::Upper>(256, 4)));

	CALL_SUBTEST(test_multiple_solves<Scalar>(128));

	CALL_SUBTEST((test_device_matrix_solve<Scalar, Eigen::Lower>(64, 4)));
	CALL_SUBTEST((test_device_matrix_solve<Scalar, Eigen::Upper>(128, 1)));
	CALL_SUBTEST(test_device_matrix_move_compute<Scalar>(64));
	CALL_SUBTEST(test_chaining<Scalar>(64));
	}

	EIGEN_DECLARE_TEST(gpu_cusolver_llt) {
	// Split by scalar so each part compiles in parallel.
	CALL_SUBTEST_1(test_scalar<float>());
	CALL_SUBTEST_2(test_scalar<double>());
	CALL_SUBTEST_3(test_scalar<std::complex<float>>());
	CALL_SUBTEST_4(test_scalar<std::complex<double>>());
	CALL_SUBTEST_5(test_not_spd());
	CALL_SUBTEST_5(test_not_spd_device_solve_asserts());
	}