blob: 2c3511f28f07625e22ecb8f80deaa50ae64749a1 [file]
# GPU benchmarks require CUDA runtime + cuSOLVER.
# Build separately from the main benchmark tree since they need CUDA toolchain.
#
# Usage:
# cmake -G Ninja -B build-bench-gpu -S unsupported/benchmarks/GPU \
# -DCMAKE_CUDA_ARCHITECTURES=89
# cmake --build build-bench-gpu
#
# Profiling:
# nsys profile --trace=cuda ./build-bench-gpu/bench_solvers
# ncu --set full -o profile ./build-bench-gpu/bench_solvers --benchmark_filter=BM_GpuLLT_Compute/4096
cmake_minimum_required(VERSION 3.17)
project(EigenGpuBenchmarks CXX)
find_package(benchmark REQUIRED)
find_package(CUDAToolkit REQUIRED)
set(EIGEN_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../..")
function(eigen_add_gpu_benchmark name source)
cmake_parse_arguments(BENCH "" "" "LIBRARIES;DEFINITIONS" ${ARGN})
if(NOT IS_ABSOLUTE "${source}")
set(source "${CMAKE_CURRENT_SOURCE_DIR}/${source}")
endif()
add_executable(${name} ${source})
target_compile_features(${name} PRIVATE cxx_std_14)
target_include_directories(${name} PRIVATE
${EIGEN_SOURCE_DIR}
${CUDAToolkit_INCLUDE_DIRS})
target_link_libraries(${name} PRIVATE
benchmark::benchmark benchmark::benchmark_main
CUDA::cudart CUDA::cusolver CUDA::cublas)
if(BENCH_LIBRARIES)
target_link_libraries(${name} PRIVATE ${BENCH_LIBRARIES})
endif()
target_compile_options(${name} PRIVATE -O3 -DNDEBUG)
target_compile_definitions(${name} PRIVATE EIGEN_USE_GPU)
if(BENCH_DEFINITIONS)
target_compile_definitions(${name} PRIVATE ${BENCH_DEFINITIONS})
endif()
endfunction()
# Solver benchmarks: LLT/LU compute + solve, host vs device paths, CPU baselines.
eigen_add_gpu_benchmark(bench_solvers bench_solvers.cpp)
eigen_add_gpu_benchmark(bench_solvers_float bench_solvers.cpp DEFINITIONS SCALAR=float)
# Chaining benchmarks: async pipeline efficiency, host-roundtrip vs device chain.
eigen_add_gpu_benchmark(bench_chaining bench_chaining.cpp)
eigen_add_gpu_benchmark(bench_chaining_float bench_chaining.cpp DEFINITIONS SCALAR=float)
# Batching benchmarks: multi-stream concurrency for many small systems.
eigen_add_gpu_benchmark(bench_batching bench_batching.cpp)
eigen_add_gpu_benchmark(bench_batching_float bench_batching.cpp DEFINITIONS SCALAR=float)