blob: 4b02869fa697867e55d43f2d00cd9285002db61e [file] [edit]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// SPDX-License-Identifier: MPL-2.0
// Generic CUDA runtime support shared across all GPU library integrations
// (cuSOLVER and cuBLAS):
// - Error-checking macros
// - RAII device buffer
//
// Only depends on <cuda_runtime.h>. No NVIDIA library headers.
#ifndef EIGEN_GPU_SUPPORT_H
#define EIGEN_GPU_SUPPORT_H
// IWYU pragma: private
#include "./InternalHeaderCheck.h"
#include <cuda_runtime.h>
#include <limits>
#include <memory>
namespace Eigen {
namespace gpu {
// ---- Generic operation flag -------------------------------------------------
// Public flag for transpose/adjoint in BLAS-, solver-, and sparse-style calls.
// Each library's support header maps this to its own enum (cublasOperation_t,
// cusparseOperation_t, etc.) via a small to_<lib>_op() helper.
enum class GpuOp { NoTrans, Trans, ConjTrans };
namespace internal {
// ---- Error-checking macros --------------------------------------------------
// These abort (via eigen_assert) on failure. Not for use in destructors.
#define EIGEN_CUDA_RUNTIME_CHECK(expr) \
do { \
cudaError_t _e = (expr); \
eigen_assert(_e == cudaSuccess && "CUDA runtime call failed"); \
} while (0)
// ---- Bounds-checked narrowing for cuBLAS/cuSOLVER int parameters ------------
// cuBLAS and the legacy cuSOLVER APIs take dimensions and leading dimensions as
// `int` (32-bit signed). Modern GPUs can host allocations whose dimensions
// exceed INT_MAX, and Eigen's Index is 64-bit by default. Use this helper at
// every narrowing call site so an out-of-range value triggers an assert
// instead of silently overflowing the BLAS argument.
inline int to_blas_int(int64_t v) {
eigen_assert(v >= 0 && v <= static_cast<int64_t>((std::numeric_limits<int>::max)()) &&
"dimension exceeds the int range supported by cuBLAS / cuSOLVER");
return static_cast<int>(v);
}
// ---- Custom deleters for CUDA-allocated memory ------------------------------
// Used with std::unique_ptr to give CUDA allocations RAII semantics with no
// hand-rolled move/dtor boilerplate.
struct CudaFreeDeleter {
// When `borrow == true`, the unique_ptr does not free the pointer. Used by
// DeviceMatrix::view() to wrap a non-owning device pointer with the same
// smart-pointer machinery as owning storage, without changing the type.
bool borrow = false;
void operator()(void* p) const noexcept {
if (p && !borrow) (void)cudaFree(p);
}
};
struct CudaFreeHostDeleter {
void operator()(void* p) const noexcept {
if (p) (void)cudaFreeHost(p);
}
};
// ---- RAII: device buffer ----------------------------------------------------
class DeviceBuffer {
public:
DeviceBuffer() = default;
explicit DeviceBuffer(size_t bytes) {
if (bytes > 0) {
void* p = nullptr;
EIGEN_CUDA_RUNTIME_CHECK(cudaMalloc(&p, bytes));
ptr_.reset(p);
}
}
void* get() const noexcept { return ptr_.get(); }
void* release() noexcept { return ptr_.release(); }
explicit operator bool() const noexcept { return static_cast<bool>(ptr_); }
// Adopt an existing device pointer. Caller relinquishes ownership.
static DeviceBuffer adopt(void* p) noexcept {
DeviceBuffer b;
b.ptr_.reset(p);
return b;
}
private:
std::unique_ptr<void, CudaFreeDeleter> ptr_;
};
// ---- RAII: pinned host buffer -----------------------------------------------
// For async D2H copies (cudaMemcpyAsync requires pinned host memory for true
// asynchrony and to avoid compute-sanitizer warnings).
class PinnedHostBuffer {
public:
PinnedHostBuffer() = default;
explicit PinnedHostBuffer(size_t bytes) {
if (bytes > 0) {
void* p = nullptr;
EIGEN_CUDA_RUNTIME_CHECK(cudaMallocHost(&p, bytes));
ptr_.reset(p);
}
}
void* get() const noexcept { return ptr_.get(); }
explicit operator bool() const noexcept { return static_cast<bool>(ptr_); }
private:
std::unique_ptr<void, CudaFreeHostDeleter> ptr_;
};
// ---- Scalar → cudaDataType_t ------------------------------------------------
// Shared by cuBLAS and cuSOLVER. cudaDataType_t is defined in library_types.h
// which is included transitively by cuda_runtime.h.
template <typename Scalar>
struct cuda_data_type;
template <>
struct cuda_data_type<float> {
static constexpr cudaDataType_t value = CUDA_R_32F;
};
template <>
struct cuda_data_type<double> {
static constexpr cudaDataType_t value = CUDA_R_64F;
};
template <>
struct cuda_data_type<std::complex<float>> {
static constexpr cudaDataType_t value = CUDA_C_32F;
};
template <>
struct cuda_data_type<std::complex<double>> {
static constexpr cudaDataType_t value = CUDA_C_64F;
};
} // namespace internal
} // namespace gpu
} // namespace Eigen
#endif // EIGEN_GPU_SUPPORT_H