GPU: Fix size_t/int mismatch in NPP stream context attribute query cudaDeviceGetAttribute writes into an int*, but NppStreamContext:: nSharedMemPerBlock is size_t. Query into a local int and cast. CI failure on the previous commit pinned all five GPU builds (cuda-11.5 gcc-10/clang-14, cuda-12.6 gcc-13/clang-19, msvc-14.29). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/unsupported/Eigen/src/GPU/DeviceScalarOps.h b/unsupported/Eigen/src/GPU/DeviceScalarOps.h index cd1171d..d6bc46a 100644 --- a/unsupported/Eigen/src/GPU/DeviceScalarOps.h +++ b/unsupported/Eigen/src/GPU/DeviceScalarOps.h
@@ -36,7 +36,9 @@ cudaDeviceGetAttribute(&ctx.nMultiProcessorCount, cudaDevAttrMultiProcessorCount, ctx.nCudaDeviceId); cudaDeviceGetAttribute(&ctx.nMaxThreadsPerMultiProcessor, cudaDevAttrMaxThreadsPerMultiProcessor, ctx.nCudaDeviceId); cudaDeviceGetAttribute(&ctx.nMaxThreadsPerBlock, cudaDevAttrMaxThreadsPerBlock, ctx.nCudaDeviceId); - cudaDeviceGetAttribute(&ctx.nSharedMemPerBlock, cudaDevAttrMaxSharedMemoryPerBlock, ctx.nCudaDeviceId); + int shared_mem_per_block = 0; + cudaDeviceGetAttribute(&shared_mem_per_block, cudaDevAttrMaxSharedMemoryPerBlock, ctx.nCudaDeviceId); + ctx.nSharedMemPerBlock = static_cast<size_t>(shared_mem_per_block); cudaStreamGetFlags(stream, &ctx.nStreamFlags); return ctx; }