Updates corresponding to the latest round of PR feedback
The major changes are
1. Moving CUDA/PacketMath.h to GPU/PacketMath.h
2. Moving CUDA/MathFunctions.h to GPU/MathFunction.h
3. Moving CUDA/CudaSpecialFunctions.h to GPU/GpuSpecialFunctions.h
The above three changes effectively enable the Eigen "Packet" layer for the HIP platform
4. Merging the "hip_basic" and "cuda_basic" unit tests into one ("gpu_basic")
5. Updating the "EIGEN_DEVICE_FUNC" marking in some places
The change has been tested on the HIP and CUDA platforms.
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 72aa68d..f16476a 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -982,7 +982,12 @@
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y)
{
+#if defined(EIGEN_HIPCC)
+ // no "fminl" on HIP yet
+ return (x < y) ? x : y;
+#else
return fminl(x, y);
+#endif
}
template<typename T>
@@ -1007,7 +1012,12 @@
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y)
{
+#if defined(EIGEN_HIPCC)
+ // no "fmaxl" on HIP yet
+ return (x > y) ? x : y;
+#else
return fmaxl(x, y);
+#endif
}
#endif
diff --git a/Eigen/src/Core/arch/GPU/MathFunctions.h b/Eigen/src/Core/arch/GPU/MathFunctions.h
index ff6256c..d2b3a25 100644
--- a/Eigen/src/Core/arch/GPU/MathFunctions.h
+++ b/Eigen/src/Core/arch/GPU/MathFunctions.h
@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_MATH_FUNCTIONS_CUDA_H
-#define EIGEN_MATH_FUNCTIONS_CUDA_H
+#ifndef EIGEN_MATH_FUNCTIONS_GPU_H
+#define EIGEN_MATH_FUNCTIONS_GPU_H
namespace Eigen {
@@ -17,7 +17,7 @@
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
-#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 plog<float4>(const float4& a)
{
@@ -100,4 +100,4 @@
} // end namespace Eigen
-#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
+#endif // EIGEN_MATH_FUNCTIONS_GPU_H
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index ab8e477..ddf37b9 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_PACKET_MATH_CUDA_H
-#define EIGEN_PACKET_MATH_CUDA_H
+#ifndef EIGEN_PACKET_MATH_GPU_H
+#define EIGEN_PACKET_MATH_GPU_H
namespace Eigen {
@@ -17,7 +17,7 @@
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
-#if defined(EIGEN_CUDACC) && defined(EIGEN_USE_GPU)
+#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
template<> struct is_arithmetic<float4> { enum { value = true }; };
template<> struct is_arithmetic<double2> { enum { value = true }; };
@@ -338,4 +338,4 @@
} // end namespace Eigen
-#endif // EIGEN_PACKET_MATH_CUDA_H
+#endif // EIGEN_PACKET_MATH_GPU_H
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index b2a71bc..767feb9 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -48,7 +48,7 @@
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
-EIGEN_DONT_INLINE static void run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsMapper& lhs,
const RhsMapper& rhs,
@@ -57,7 +57,7 @@
};
template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
-EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
Index rows, Index cols,
const LhsMapper& alhs,
const RhsMapper& rhs,
@@ -231,7 +231,7 @@
typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
-EIGEN_DONT_INLINE static void run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void run(
Index rows, Index cols,
const LhsMapper& lhs,
const RhsMapper& rhs,
@@ -240,7 +240,7 @@
};
template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
-EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
+EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
Index rows, Index cols,
const LhsMapper& alhs,
const RhsMapper& rhs,