blob: 4bdec4face80b48894627032dadd15eae81220a9 [file] [edit]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2026 Rasmus Munk Larsen <rmlarsen@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// SPDX-License-Identifier: MPL-2.0
// Unified GPU execution context.
//
// gpu::Context owns a CUDA stream and NVIDIA library handles (cuBLAS
// eagerly, cuSOLVER lazily on first use). It is the entry point for all
// GPU operations on gpu::DeviceMatrix.
//
// The cuSOLVER handle is created on the first call to cusolverHandle()
// so that translation units which only use cuFFT or cuBLAS paths (e.g.
// the cufft test) do not pull cusolverDn* symbols into the link.
//
// Usage:
// gpu::Context ctx; // explicit context
// d_C.device(ctx) = d_A * d_B; // GEMM on ctx's stream
//
// d_C = d_A * d_B; // thread-local default context
// gpu::Context& ctx = gpu::Context::threadLocal();
#ifndef EIGEN_GPU_CONTEXT_H
#define EIGEN_GPU_CONTEXT_H
// IWYU pragma: private
#include "./InternalHeaderCheck.h"
#include "./CuBlasSupport.h"
#include "./CuSolverSupport.h"
namespace Eigen {
namespace gpu {
/** \ingroup GPU_Module
* \class Context
* \brief Unified GPU execution context owning a CUDA stream and library handles.
*
* Each Context instance creates a dedicated CUDA stream and a cuBLAS handle
* bound to that stream. The cuSOLVER handle is created on first use via
* cusolverHandle(); translation units that never call it do not require
* cuSOLVER at link time. Multiple contexts enable concurrent execution on
* independent streams.
*
* A lazily-created thread-local default is available via threadLocal() for
* simple single-stream usage. A single Context is not thread-safe — use one
* per thread, or external synchronization, since cuBLAS / cuSOLVER handles
* are not thread-safe per handle and cusolverHandle() lazy-init is racy.
*/
class Context {
public:
Context() {
EIGEN_CUDA_RUNTIME_CHECK(cudaStreamCreate(&stream_));
EIGEN_CUBLAS_CHECK(cublasCreate(&cublas_));
EIGEN_CUBLAS_CHECK(cublasSetStream(cublas_, stream_));
}
~Context() {
// Indirect call keeps cusolverDnDestroy out of TUs that never call cusolverHandle().
if (cusolver_destroyer_) (void)cusolver_destroyer_(cusolver_);
if (cublas_) (void)cublasDestroy(cublas_);
if (stream_) (void)cudaStreamDestroy(stream_);
}
// Non-copyable, non-movable (owns library handles).
Context(const Context&) = delete;
Context& operator=(const Context&) = delete;
Context(Context&&) = delete;
Context& operator=(Context&&) = delete;
/** Lazily-created thread-local default context.
*
* \note The thread-local instance is destroyed when the thread exits (or at
* static destruction time for the main thread). On some CUDA driver
* configurations this may print "CUDA_ERROR_DEINITIALIZED" to stderr if the
* CUDA context has already been torn down. These errors are harmless and are
* suppressed in the destructor, but they can produce noise in test output.
* To avoid this, call cudaDeviceReset() only after all Context instances
* (including thread-local ones) have been destroyed. */
static Context& threadLocal() {
thread_local Context ctx;
return ctx;
}
cudaStream_t stream() const { return stream_; }
cublasHandle_t cublasHandle() const { return cublas_; }
/** Returns the cuSOLVER handle, creating it on first call. */
cusolverDnHandle_t cusolverHandle() {
if (!cusolver_) {
EIGEN_CUSOLVER_CHECK(cusolverDnCreate(&cusolver_));
EIGEN_CUSOLVER_CHECK(cusolverDnSetStream(cusolver_, stream_));
cusolver_destroyer_ = &destroyCusolver;
}
return cusolver_;
}
private:
static cusolverStatus_t destroyCusolver(cusolverDnHandle_t h) { return cusolverDnDestroy(h); }
cudaStream_t stream_ = nullptr;
cublasHandle_t cublas_ = nullptr;
cusolverDnHandle_t cusolver_ = nullptr;
cusolverStatus_t (*cusolver_destroyer_)(cusolverDnHandle_t) = nullptr;
};
} // namespace gpu
} // namespace Eigen
#endif // EIGEN_GPU_CONTEXT_H