Apply clang-format
diff --git a/Eigen/src/Core/arch/GPU/Complex.h b/Eigen/src/Core/arch/GPU/Complex.h
index 8a7869c..fa46aec 100644
--- a/Eigen/src/Core/arch/GPU/Complex.h
+++ b/Eigen/src/Core/arch/GPU/Complex.h
@@ -31,7 +31,7 @@
// to the first inclusion of <complex>.
#if defined(EIGEN_GPUCC) && defined(EIGEN_GPU_COMPILE_PHASE)
-
+
// ICC already specializes std::complex<float> and std::complex<double>
// operators, preventing us from making them device functions here.
// This will lead to silent runtime errors if the operators are used on device.
@@ -62,33 +62,30 @@
// Specialized std::complex overloads.
namespace complex_operator_detail {
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_multiply(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_multiply(const std::complex<T>& a,
+ const std::complex<T>& b) {
const T a_real = numext::real(a);
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
const T b_imag = numext::imag(b);
- return std::complex<T>(
- a_real * b_real - a_imag * b_imag,
- a_imag * b_real + a_real * b_imag);
+ return std::complex<T>(a_real * b_real - a_imag * b_imag, a_imag * b_real + a_real * b_imag);
}
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_divide_fast(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide_fast(const std::complex<T>& a,
+ const std::complex<T>& b) {
const T a_real = numext::real(a);
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
const T b_imag = numext::imag(b);
const T norm = (b_real * b_real + b_imag * b_imag);
- return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm,
- (a_imag * b_real - a_real * b_imag) / norm);
+ return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm, (a_imag * b_real - a_real * b_imag) / norm);
}
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_divide_stable(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide_stable(const std::complex<T>& a,
+ const std::complex<T>& b) {
const T a_real = numext::real(a);
const T a_imag = numext::imag(a);
const T b_real = numext::real(b);
@@ -99,13 +96,13 @@
const T rscale = scale_imag ? T(1) : b_real / b_imag;
const T iscale = scale_imag ? b_imag / b_real : T(1);
const T denominator = b_real * rscale + b_imag * iscale;
- return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator,
+ return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator,
(a_imag * rscale - a_real * iscale) / denominator);
}
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_divide(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide(const std::complex<T>& a,
+ const std::complex<T>& b) {
#if EIGEN_FAST_MATH
return complex_divide_fast(a, b);
#else
@@ -118,131 +115,107 @@
// since they are already specialized for float/double/long double within
// the standard <complex> header. We also do not specialize the stream
// operators.
-#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T) \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator+(const std::complex<T>& a) { return a; } \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator-(const std::complex<T>& a) { \
- return std::complex<T>(-numext::real(a), -numext::imag(a)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator+(const std::complex<T>& a, const std::complex<T>& b) { \
- return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator+(const std::complex<T>& a, const T& b) { \
- return std::complex<T>(numext::real(a) + b, numext::imag(a)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator+(const T& a, const std::complex<T>& b) { \
- return std::complex<T>(a + numext::real(b), numext::imag(b)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator-(const std::complex<T>& a, const std::complex<T>& b) { \
- return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator-(const std::complex<T>& a, const T& b) { \
- return std::complex<T>(numext::real(a) - b, numext::imag(a)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator-(const T& a, const std::complex<T>& b) { \
- return std::complex<T>(a - numext::real(b), -numext::imag(b)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator*(const std::complex<T>& a, const std::complex<T>& b) { \
- return complex_multiply(a, b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator*(const std::complex<T>& a, const T& b) { \
- return std::complex<T>(numext::real(a) * b, numext::imag(a) * b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator*(const T& a, const std::complex<T>& b) { \
- return std::complex<T>(a * numext::real(b), a * numext::imag(b)); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator/(const std::complex<T>& a, const std::complex<T>& b) { \
- return complex_divide(a, b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator/(const std::complex<T>& a, const T& b) { \
- return std::complex<T>(numext::real(a) / b, numext::imag(a) / b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T> operator/(const T& a, const std::complex<T>& b) { \
- return complex_divide(std::complex<T>(a, 0), b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) { \
- numext::real_ref(a) += numext::real(b); \
- numext::imag_ref(a) += numext::imag(b); \
- return a; \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) { \
- numext::real_ref(a) -= numext::real(b); \
- numext::imag_ref(a) -= numext::imag(b); \
- return a; \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) { \
- a = complex_multiply(a, b); \
- return a; \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-std::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) { \
- a = complex_divide(a, b); \
- return a; \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-bool operator==(const std::complex<T>& a, const std::complex<T>& b) { \
- return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-bool operator==(const std::complex<T>& a, const T& b) { \
- return numext::real(a) == b && numext::imag(a) == 0; \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-bool operator==(const T& a, const std::complex<T>& b) { \
- return a == numext::real(b) && 0 == numext::imag(b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-bool operator!=(const std::complex<T>& a, const std::complex<T>& b) { \
- return !(a == b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-bool operator!=(const std::complex<T>& a, const T& b) { \
- return !(a == b); \
-} \
- \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-bool operator!=(const T& a, const std::complex<T>& b) { \
- return !(a == b); \
-}
+#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T) \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const std::complex<T>& a) { return a; } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const std::complex<T>& a) { \
+ return std::complex<T>(-numext::real(a), -numext::imag(a)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const std::complex<T>& a, \
+ const std::complex<T>& b) { \
+ return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) + b, numext::imag(a)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const T& a, const std::complex<T>& b) { \
+ return std::complex<T>(a + numext::real(b), numext::imag(b)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const std::complex<T>& a, \
+ const std::complex<T>& b) { \
+ return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) - b, numext::imag(a)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const T& a, const std::complex<T>& b) { \
+ return std::complex<T>(a - numext::real(b), -numext::imag(b)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator*(const std::complex<T>& a, \
+ const std::complex<T>& b) { \
+ return complex_multiply(a, b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator*(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) * b, numext::imag(a) * b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator*(const T& a, const std::complex<T>& b) { \
+ return std::complex<T>(a * numext::real(b), a * numext::imag(b)); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator/(const std::complex<T>& a, \
+ const std::complex<T>& b) { \
+ return complex_divide(a, b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator/(const std::complex<T>& a, const T& b) { \
+ return std::complex<T>(numext::real(a) / b, numext::imag(a) / b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator/(const T& a, const std::complex<T>& b) { \
+ return complex_divide(std::complex<T>(a, 0), b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) { \
+ numext::real_ref(a) += numext::real(b); \
+ numext::imag_ref(a) += numext::imag(b); \
+ return a; \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) { \
+ numext::real_ref(a) -= numext::real(b); \
+ numext::imag_ref(a) -= numext::imag(b); \
+ return a; \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) { \
+ a = complex_multiply(a, b); \
+ return a; \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) { \
+ a = complex_divide(a, b); \
+ return a; \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator==(const std::complex<T>& a, const std::complex<T>& b) { \
+ return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator==(const std::complex<T>& a, const T& b) { \
+ return numext::real(a) == b && numext::imag(a) == 0; \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator==(const T& a, const std::complex<T>& b) { \
+ return a == numext::real(b) && 0 == numext::imag(b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator!=(const std::complex<T>& a, const std::complex<T>& b) { \
+ return !(a == b); \
+ } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator!=(const std::complex<T>& a, const T& b) { return !(a == b); } \
+ \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator!=(const T& a, const std::complex<T>& b) { return !(a == b); }
// Do not specialize for long double, since that reduces to double on device.
EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(float)
@@ -250,7 +223,6 @@
#undef EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS
-
} // namespace complex_operator_detail
EIGEN_USING_STD_COMPLEX_OPERATORS
diff --git a/Eigen/src/Core/arch/GPU/MathFunctions.h b/Eigen/src/Core/arch/GPU/MathFunctions.h
index f8191db..606215f 100644
--- a/Eigen/src/Core/arch/GPU/MathFunctions.h
+++ b/Eigen/src/Core/arch/GPU/MathFunctions.h
@@ -21,86 +21,73 @@
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 plog<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plog<float4>(const float4& a) {
return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 plog<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plog<double2>(const double2& a) {
using ::log;
return make_double2(log(a.x), log(a.y));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 plog1p<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plog1p<float4>(const float4& a) {
return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 plog1p<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plog1p<double2>(const double2& a) {
return make_double2(log1p(a.x), log1p(a.y));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pexp<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pexp<float4>(const float4& a) {
return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pexp<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pexp<double2>(const double2& a) {
using ::exp;
return make_double2(exp(a.x), exp(a.y));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pexpm1<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pexpm1<float4>(const float4& a) {
return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pexpm1<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pexpm1<double2>(const double2& a) {
return make_double2(expm1(a.x), expm1(a.y));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 psqrt<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt<float4>(const float4& a) {
return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 psqrt<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psqrt<double2>(const double2& a) {
using ::sqrt;
return make_double2(sqrt(a.x), sqrt(a.y));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 prsqrt<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 prsqrt<float4>(const float4& a) {
return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 prsqrt<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 prsqrt<double2>(const double2& a) {
return make_double2(rsqrt(a.x), rsqrt(a.y));
}
-
#endif
-} // end namespace internal
+} // end namespace internal
-} // end namespace Eigen
+} // end namespace Eigen
-#endif // EIGEN_MATH_FUNCTIONS_GPU_H
+#endif // EIGEN_MATH_FUNCTIONS_GPU_H
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 5c959ed..7900b0e 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -36,23 +36,29 @@
// we'll use on the host side (SSE, AVX, ...)
#if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
-template<> struct is_arithmetic<float4> { enum { value = true }; };
-template<> struct is_arithmetic<double2> { enum { value = true }; };
+template <>
+struct is_arithmetic<float4> {
+ enum { value = true };
+};
+template <>
+struct is_arithmetic<double2> {
+ enum { value = true };
+};
-template<> struct packet_traits<float> : default_packet_traits
-{
+template <>
+struct packet_traits<float> : default_packet_traits {
typedef float4 type;
typedef float4 half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
- size=4,
+ size = 4,
- HasDiv = 1,
- HasSin = 0,
- HasCos = 0,
- HasLog = 1,
- HasExp = 1,
+ HasDiv = 1,
+ HasSin = 0,
+ HasCos = 0,
+ HasLog = 1,
+ HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasLGamma = 1,
@@ -74,18 +80,18 @@
};
};
-template<> struct packet_traits<double> : default_packet_traits
-{
+template <>
+struct packet_traits<double> : default_packet_traits {
typedef double2 type;
typedef double2 half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
- size=2,
+ size = 2,
- HasDiv = 1,
- HasLog = 1,
- HasExp = 1,
+ HasDiv = 1,
+ HasLog = 1,
+ HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasLGamma = 1,
@@ -107,14 +113,37 @@
};
};
+template <>
+struct unpacket_traits<float4> {
+ typedef float type;
+ enum {
+ size = 4,
+ alignment = Aligned16,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+ typedef float4 half;
+};
+template <>
+struct unpacket_traits<double2> {
+ typedef double type;
+ enum {
+ size = 2,
+ alignment = Aligned16,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+ typedef double2 half;
+};
-template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef float4 half; };
-template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef double2 half; };
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
return make_float4(from, from, from, from);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
return make_double2(from, from);
}
@@ -123,259 +152,254 @@
// of the functions, while the latter can only deal with one of them.
#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
- const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a, const float& b) {
return __int_as_float(__float_as_int(a) & __float_as_int(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a,
- const double& b) {
- return __longlong_as_double(__double_as_longlong(a) &
- __double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a, const double& b) {
+ return __longlong_as_double(__double_as_longlong(a) & __double_as_longlong(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a,
- const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a, const float& b) {
return __int_as_float(__float_as_int(a) | __float_as_int(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a,
- const double& b) {
- return __longlong_as_double(__double_as_longlong(a) |
- __double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a, const double& b) {
+ return __longlong_as_double(__double_as_longlong(a) | __double_as_longlong(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a,
- const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a, const float& b) {
return __int_as_float(__float_as_int(a) ^ __float_as_int(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a,
- const double& b) {
- return __longlong_as_double(__double_as_longlong(a) ^
- __double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a, const double& b) {
+ return __longlong_as_double(__double_as_longlong(a) ^ __double_as_longlong(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a,
- const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a, const float& b) {
return __int_as_float(__float_as_int(a) & ~__float_as_int(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a,
- const double& b) {
- return __longlong_as_double(__double_as_longlong(a) &
- ~__double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a, const double& b) {
+ return __longlong_as_double(__double_as_longlong(a) & ~__double_as_longlong(b));
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a,
- const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a, const float& b) {
return __int_as_float(a == b ? 0xffffffffu : 0u);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a,
- const double& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a, const double& b) {
return __longlong_as_double(a == b ? 0xffffffffffffffffull : 0ull);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float lt_mask(const float& a,
- const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float lt_mask(const float& a, const float& b) {
return __int_as_float(a < b ? 0xffffffffu : 0u);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a,
- const double& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a, const double& b) {
return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float le_mask(const float& a,
- const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float le_mask(const float& a, const float& b) {
return __int_as_float(a <= b ? 0xffffffffu : 0u);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double le_mask(const double& a,
- const double& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double le_mask(const double& a, const double& b) {
return __longlong_as_double(a <= b ? 0xffffffffffffffffull : 0ull);
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
- const float4& b) {
- return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y),
- bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a, const float4& b) {
+ return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y), bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a,
- const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a, const double2& b) {
return make_double2(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a,
- const float4& b) {
- return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y),
- bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a, const float4& b) {
+ return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y), bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a,
- const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a, const double2& b) {
return make_double2(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a,
- const float4& b) {
- return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y),
- bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a, const float4& b) {
+ return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y), bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a,
- const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a, const double2& b) {
return make_double2(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a,
- const float4& b) {
- return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y),
- bitwise_andnot(a.z, b.z), bitwise_andnot(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a, const float4& b) {
+ return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y), bitwise_andnot(a.z, b.z),
+ bitwise_andnot(a.w, b.w));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pandnot<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pandnot<double2>(const double2& a, const double2& b) {
return make_double2(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a,
- const float4& b) {
- return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z),
- eq_mask(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a, const float4& b) {
+ return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z), eq_mask(a.w, b.w));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_lt<float4>(const float4& a,
- const float4& b) {
- return make_float4(lt_mask(a.x, b.x), lt_mask(a.y, b.y), lt_mask(a.z, b.z),
- lt_mask(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_lt<float4>(const float4& a, const float4& b) {
+ return make_float4(lt_mask(a.x, b.x), lt_mask(a.y, b.y), lt_mask(a.z, b.z), lt_mask(a.w, b.w));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_le<float4>(const float4& a,
- const float4& b) {
- return make_float4(le_mask(a.x, b.x), le_mask(a.y, b.y), le_mask(a.z, b.z),
- le_mask(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_le<float4>(const float4& a, const float4& b) {
+ return make_float4(le_mask(a.x, b.x), le_mask(a.y, b.y), le_mask(a.z, b.z), le_mask(a.w, b.w));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pcmp_eq<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pcmp_eq<double2>(const double2& a, const double2& b) {
return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pcmp_lt<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pcmp_lt<double2>(const double2& a, const double2& b) {
return make_double2(lt_mask(a.x, b.x), lt_mask(a.y, b.y));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pcmp_le<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pcmp_le<double2>(const double2& a, const double2& b) {
return make_double2(le_mask(a.x, b.x), le_mask(a.y, b.y));
}
-#endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
+#endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG &&
+ // !EIGEN_COMP_NVCC)
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
- return make_float4(a, a+1, a+2, a+3);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
+ return make_float4(a, a + 1, a + 2, a + 3);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
- return make_double2(a, a+1);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
+ return make_double2(a, a + 1);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
- return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
- return make_double2(a.x+b.x, a.y+b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x + b.x, a.y + b.y);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
- return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
- return make_double2(a.x-b.x, a.y-b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x - b.x, a.y - b.y);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
return make_float4(-a.x, -a.y, -a.z, -a.w);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
return make_double2(-a.x, -a.y);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
- return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) {
+ return a;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
- return make_double2(a.x*b.x, a.y*b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) {
+ return a;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
- return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
- return make_double2(a.x/b.x, a.y/b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x * b.x, a.y * b.y);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
+ return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
+ return make_double2(a.x / b.x, a.y / b.y);
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
return *reinterpret_cast<const float4*>(from);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
return *reinterpret_cast<const double2*>(from);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
return make_float4(from[0], from[1], from[2], from[3]);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
return make_double2(from[0], from[1]);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
return make_float4(from[0], from[0], from[1], from[1]);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
return make_double2(from[0], from[0]);
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
*reinterpret_cast<float4*>(to) = from;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
*reinterpret_cast<double2*>(to) = from;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
to[0] = from.x;
to[1] = from.y;
to[2] = from.z;
to[3] = from.w;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
to[0] = from.x;
to[1] = from.y;
}
-template<>
+template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
#if defined(EIGEN_GPU_HAS_LDG)
return __ldg(reinterpret_cast<const float4*>(from));
@@ -383,7 +407,7 @@
return make_float4(from[0], from[1], from[2], from[3]);
#endif
}
-template<>
+template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
#if defined(EIGEN_GPU_HAS_LDG)
return __ldg(reinterpret_cast<const double2*>(from));
@@ -392,93 +416,110 @@
#endif
}
-template<>
+template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
#if defined(EIGEN_GPU_HAS_LDG)
- return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+ return make_float4(__ldg(from + 0), __ldg(from + 1), __ldg(from + 2), __ldg(from + 3));
#else
return make_float4(from[0], from[1], from[2], from[3]);
#endif
}
-template<>
+template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
#if defined(EIGEN_GPU_HAS_LDG)
- return make_double2(__ldg(from+0), __ldg(from+1));
+ return make_double2(__ldg(from + 0), __ldg(from + 1));
#else
return make_double2(from[0], from[1]);
#endif
}
-template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
- return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+template <>
+EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
+ return make_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
}
-template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
- return make_double2(from[0*stride], from[1*stride]);
+template <>
+EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
+ return make_double2(from[0 * stride], from[1 * stride]);
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
- to[stride*0] = from.x;
- to[stride*1] = from.y;
- to[stride*2] = from.z;
- to[stride*3] = from.w;
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
+ to[stride * 0] = from.x;
+ to[stride * 1] = from.y;
+ to[stride * 2] = from.z;
+ to[stride * 3] = from.w;
}
-template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
- to[stride*0] = from.x;
- to[stride*1] = from.y;
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
+ to[stride * 0] = from.x;
+ to[stride * 1] = from.y;
}
-template<> EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
return a.x;
}
-template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
return a.x;
}
-template<> EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
return a.x + a.y + a.z + a.w;
}
-template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
return a.x + a.y;
}
-template<> EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
}
-template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
return fmax(a.x, a.y);
}
-template<> EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
}
-template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
return fmin(a.x, a.y);
}
-template<> EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
return a.x * a.y * a.z * a.w;
}
-template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
return a.x * a.y;
}
-template<> EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
}
-template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
return make_double2(fabs(a.x), fabs(a.y));
}
-template<> EIGEN_DEVICE_FUNC inline float4 pfloor<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float4 pfloor<float4>(const float4& a) {
return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
}
-template<> EIGEN_DEVICE_FUNC inline double2 pfloor<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double2 pfloor<double2>(const double2& a) {
return make_double2(floor(a.x), floor(a.y));
}
-EIGEN_DEVICE_FUNC inline void
-ptranspose(PacketBlock<float4,4>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<float4, 4>& kernel) {
float tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
kernel.packet[1].x = tmp;
@@ -504,14 +545,13 @@
kernel.packet[3].z = tmp;
}
-EIGEN_DEVICE_FUNC inline void
-ptranspose(PacketBlock<double2,2>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<double2, 2>& kernel) {
double tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x;
kernel.packet[1].x = tmp;
}
-#endif // defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
+#endif // defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
// Half-packet functions are not available on the host for CUDA 9.0-9.2, only
// on device. There is no benefit to using them on the host anyways, since they are
@@ -519,41 +559,68 @@
#if (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
typedef ulonglong2 Packet4h2;
-template<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };
-template<> struct is_arithmetic<Packet4h2> { enum { value = true }; };
+template <>
+struct unpacket_traits<Packet4h2> {
+ typedef Eigen::half type;
+ enum {
+ size = 8,
+ alignment = Aligned16,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+ typedef Packet4h2 half;
+};
+template <>
+struct is_arithmetic<Packet4h2> {
+ enum { value = true };
+};
-template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef half2 half; };
-template<> struct is_arithmetic<half2> { enum { value = true }; };
+template <>
+struct unpacket_traits<half2> {
+ typedef Eigen::half type;
+ enum {
+ size = 2,
+ alignment = Aligned16,
+ vectorizable = true,
+ masked_load_available = false,
+ masked_store_available = false
+ };
+ typedef half2 half;
+};
+template <>
+struct is_arithmetic<half2> {
+ enum { value = true };
+};
-template<> struct packet_traits<Eigen::half> : default_packet_traits
-{
+template <>
+struct packet_traits<Eigen::half> : default_packet_traits {
typedef Packet4h2 type;
typedef Packet4h2 half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
- size=8,
- HasAdd = 1,
- HasSub = 1,
- HasMul = 1,
- HasDiv = 1,
- HasSqrt = 1,
- HasRsqrt = 1,
- HasExp = 1,
- HasExpm1 = 1,
- HasLog = 1,
- HasLog1p = 1
+ size = 8,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasExp = 1,
+ HasExpm1 = 1,
+ HasLog = 1,
+ HasLog1p = 1
};
};
-template<>
+template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
return __half2half2(from);
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pset1<Packet4h2>(const Eigen::half& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pset1<Packet4h2>(const Eigen::half& from) {
Packet4h2 r;
half2* p_alias = reinterpret_cast<half2*>(&r);
p_alias[0] = pset1<half2>(from);
@@ -569,59 +636,48 @@
return *reinterpret_cast<const half2*>(from);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) {
- return __halves2half2(from[0], from[1]);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) { return __halves2half2(from[0], from[1]); }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half* from) {
return __halves2half2(from[0], from[0]);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
- const half2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const half2& from) {
*reinterpret_cast<half2*>(to) = from;
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
- const half2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to, const half2& from) {
to[0] = __low2half(from);
to[1] = __high2half(from);
}
-
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
- const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(const Eigen::half* from) {
#if defined(EIGEN_GPU_HAS_LDG)
// Input is guaranteed to be properly aligned.
return __ldg(reinterpret_cast<const half2*>(from));
#else
- return __halves2half2(*(from+0), *(from+1));
+ return __halves2half2(*(from + 0), *(from + 1));
#endif
}
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(
- const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(const Eigen::half* from) {
#if defined(EIGEN_GPU_HAS_LDG)
- return __halves2half2(__ldg(from+0), __ldg(from+1));
+ return __halves2half2(__ldg(from + 0), __ldg(from + 1));
#else
- return __halves2half2(*(from+0), *(from+1));
+ return __halves2half2(*(from + 0), *(from + 1));
#endif
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,
- Index stride) {
- return __halves2half2(from[0*stride], from[1*stride]);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from, Index stride) {
+ return __halves2half2(from[0 * stride], from[1 * stride]);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter(
- Eigen::half* to, const half2& from, Index stride) {
- to[stride*0] = __low2half(from);
- to[stride*1] = __high2half(from);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter(Eigen::half* to, const half2& from, Index stride) {
+ to[stride * 0] = __low2half(from);
+ to[stride * 1] = __high2half(from);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) {
- return __low2half(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) { return __low2half(a); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs(const half2& a) {
half a1 = __low2half(a);
@@ -641,8 +697,7 @@
return pset1<half2>(false_half);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
-ptranspose(PacketBlock<half2,2>& kernel) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<half2, 2>& kernel) {
__half a1 = __low2half(kernel.packet[0]);
__half a2 = __high2half(kernel.packet[0]);
__half b1 = __low2half(kernel.packet[1]);
@@ -660,9 +715,7 @@
#endif
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,
- const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask, const half2& a, const half2& b) {
half mask_low = __low2half(mask);
half mask_high = __high2half(mask);
half result_low = mask_low == half(0) ? __low2half(b) : __low2half(a);
@@ -670,8 +723,7 @@
return __halves2half2(result_low, result_high);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_eq(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_eq(const half2& a, const half2& b) {
half true_half = half_impl::raw_uint16_to_half(0xffffu);
half false_half = half_impl::raw_uint16_to_half(0x0000u);
half a1 = __low2half(a);
@@ -683,8 +735,7 @@
return __halves2half2(eq1, eq2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_lt(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_lt(const half2& a, const half2& b) {
half true_half = half_impl::raw_uint16_to_half(0xffffu);
half false_half = half_impl::raw_uint16_to_half(0x0000u);
half a1 = __low2half(a);
@@ -696,8 +747,7 @@
return __halves2half2(eq1, eq2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_le(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_le(const half2& a, const half2& b) {
half true_half = half_impl::raw_uint16_to_half(0xffffu);
half false_half = half_impl::raw_uint16_to_half(0x0000u);
half a1 = __low2half(a);
@@ -709,8 +759,7 @@
return __halves2half2(eq1, eq2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pand(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pand(const half2& a, const half2& b) {
half a1 = __low2half(a);
half a2 = __high2half(a);
half b1 = __low2half(b);
@@ -720,8 +769,7 @@
return __halves2half2(result1, result2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 por(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 por(const half2& a, const half2& b) {
half a1 = __low2half(a);
half a2 = __high2half(a);
half b1 = __low2half(b);
@@ -731,8 +779,7 @@
return __halves2half2(result1, result2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pxor(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pxor(const half2& a, const half2& b) {
half a1 = __low2half(a);
half a2 = __high2half(a);
half b1 = __low2half(b);
@@ -742,8 +789,7 @@
return __halves2half2(result1, result2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a, const half2& b) {
half a1 = __low2half(a);
half a2 = __high2half(a);
half b1 = __low2half(b);
@@ -753,8 +799,7 @@
return __halves2half2(result1, result2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a, const half2& b) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
return __hadd2(a, b);
#else
@@ -768,8 +813,7 @@
#endif
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a, const half2& b) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
return __hsub2(a, b);
#else
@@ -795,8 +839,7 @@
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a, const half2& b) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
return __hmul2(a, b);
#else
@@ -810,11 +853,9 @@
#endif
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
- const half2& b,
- const half2& c) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a, const half2& b, const half2& c) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
- return __hfma2(a, b, c);
+ return __hfma2(a, b, c);
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
@@ -828,8 +869,7 @@
#endif
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a, const half2& b) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
return __h2div(a, b);
#else
@@ -843,8 +883,7 @@
#endif
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -854,8 +893,7 @@
return __halves2half2(r1, r2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,
- const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -925,28 +963,15 @@
return __floats2half2_rn(r1, r2);
}
-#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || \
- defined(EIGEN_HIP_DEVICE_COMPILE)
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || defined(EIGEN_HIP_DEVICE_COMPILE)
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 plog(const half2& a) {
- return h2log(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog(const half2& a) { return h2log(a); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 pexp(const half2& a) {
- return h2exp(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) { return h2exp(a); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 psqrt(const half2& a) {
- return h2sqrt(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt(const half2& a) { return h2sqrt(a); }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 prsqrt(const half2& a) {
- return h2rsqrt(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt(const half2& a) { return h2rsqrt(a); }
#else
@@ -982,18 +1007,16 @@
return __floats2half2_rn(r1, r2);
}
#endif
-} // namespace
+} // namespace
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pload<Packet4h2>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pload<Packet4h2>(const Eigen::half* from) {
return *reinterpret_cast<const Packet4h2*>(from);
}
// unaligned load;
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-ploadu<Packet4h2>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ploadu<Packet4h2>(const Eigen::half* from) {
Packet4h2 r;
half2* p_alias = reinterpret_cast<half2*>(&r);
p_alias[0] = ploadu(from + 0);
@@ -1004,8 +1027,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-ploaddup<Packet4h2>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ploaddup<Packet4h2>(const Eigen::half* from) {
Packet4h2 r;
half2* p_alias = reinterpret_cast<half2*>(&r);
p_alias[0] = ploaddup(from + 0);
@@ -1016,24 +1038,21 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(
- Eigen::half* to, const Packet4h2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4h2& from) {
*reinterpret_cast<Packet4h2*>(to) = from;
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(
- Eigen::half* to, const Packet4h2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet4h2& from) {
const half2* from_alias = reinterpret_cast<const half2*>(&from);
- pstoreu(to + 0,from_alias[0]);
- pstoreu(to + 2,from_alias[1]);
- pstoreu(to + 4,from_alias[2]);
- pstoreu(to + 6,from_alias[3]);
+ pstoreu(to + 0, from_alias[0]);
+ pstoreu(to + 2, from_alias[1]);
+ pstoreu(to + 4, from_alias[2]);
+ pstoreu(to + 6, from_alias[3]);
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
-ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2 ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
#if defined(EIGEN_GPU_HAS_LDG)
Packet4h2 r;
r = __ldg(reinterpret_cast<const Packet4h2*>(from));
@@ -1050,8 +1069,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
-ploadt_ro<Packet4h2, Unaligned>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2 ploadt_ro<Packet4h2, Unaligned>(const Eigen::half* from) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
r_alias[0] = ploadt_ro_unaligned(from + 0);
@@ -1062,8 +1080,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pgather<Eigen::half, Packet4h2>(const Eigen::half* from, Index stride) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pgather<Eigen::half, Packet4h2>(const Eigen::half* from, Index stride) {
Packet4h2 r;
half2* p_alias = reinterpret_cast<half2*>(&r);
p_alias[0] = __halves2half2(from[0 * stride], from[1 * stride]);
@@ -1074,8 +1091,8 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h2>(
- Eigen::half* to, const Packet4h2& from, Index stride) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h2>(Eigen::half* to, const Packet4h2& from,
+ Index stride) {
const half2* from_alias = reinterpret_cast<const half2*>(&from);
pscatter(to + stride * 0, from_alias[0], stride);
pscatter(to + stride * 2, from_alias[1], stride);
@@ -1084,14 +1101,12 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h2>(
- const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h2>(const Packet4h2& a) {
return pfirst(*(reinterpret_cast<const half2*>(&a)));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pabs<Packet4h2>(
- const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pabs<Packet4h2>(const Packet4h2& a) {
Packet4h2 r;
half2* p_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1103,8 +1118,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ptrue<Packet4h2>(
- const Packet4h2& /*a*/) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ptrue<Packet4h2>(const Packet4h2& /*a*/) {
half true_half = half_impl::raw_uint16_to_half(0xffffu);
return pset1<Packet4h2>(true_half);
}
@@ -1115,9 +1129,9 @@
return pset1<Packet4h2>(false_half);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_double(
- double* d_row0, double* d_row1, double* d_row2, double* d_row3,
- double* d_row4, double* d_row5, double* d_row6, double* d_row7) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_double(double* d_row0, double* d_row1, double* d_row2,
+ double* d_row3, double* d_row4, double* d_row5,
+ double* d_row6, double* d_row7) {
double d_tmp;
d_tmp = d_row0[1];
d_row0[1] = d_row4[0];
@@ -1136,8 +1150,8 @@
d_row7[0] = d_tmp;
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half2(
- half2* f_row0, half2* f_row1, half2* f_row2, half2* f_row3) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half2(half2* f_row0, half2* f_row1, half2* f_row2,
+ half2* f_row3) {
half2 f_tmp;
f_tmp = f_row0[1];
f_row0[1] = f_row2[0];
@@ -1148,8 +1162,7 @@
f_row3[0] = f_tmp;
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
-ptranspose_half(half2& f0, half2& f1) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half(half2& f0, half2& f1) {
__half a1 = __low2half(f0);
__half a2 = __high2half(f0);
__half b1 = __low2half(f1);
@@ -1158,8 +1171,7 @@
f1 = __halves2half2(a2, b2);
}
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
-ptranspose(PacketBlock<Packet4h2,8>& kernel) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4h2, 8>& kernel) {
double* d_row0 = reinterpret_cast<double*>(&kernel.packet[0]);
double* d_row1 = reinterpret_cast<double*>(&kernel.packet[1]);
double* d_row2 = reinterpret_cast<double*>(&kernel.packet[2]);
@@ -1168,9 +1180,7 @@
double* d_row5 = reinterpret_cast<double*>(&kernel.packet[5]);
double* d_row6 = reinterpret_cast<double*>(&kernel.packet[6]);
double* d_row7 = reinterpret_cast<double*>(&kernel.packet[7]);
- ptranspose_double(d_row0, d_row1, d_row2, d_row3,
- d_row4, d_row5, d_row6, d_row7);
-
+ ptranspose_double(d_row0, d_row1, d_row2, d_row3, d_row4, d_row5, d_row6, d_row7);
half2* f_row0 = reinterpret_cast<half2*>(d_row0);
half2* f_row1 = reinterpret_cast<half2*>(d_row1);
@@ -1211,23 +1221,18 @@
ptranspose_half(f_row0[1], f_row1[1]);
ptranspose_half(f_row2[0], f_row3[0]);
ptranspose_half(f_row2[1], f_row3[1]);
-
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-plset<Packet4h2>(const Eigen::half& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 plset<Packet4h2>(const Eigen::half& a) {
#if defined(EIGEN_HIP_DEVICE_COMPILE)
Packet4h2 r;
half2* p_alias = reinterpret_cast<half2*>(&r);
p_alias[0] = __halves2half2(a, __hadd(a, __float2half(1.0f)));
- p_alias[1] = __halves2half2(__hadd(a, __float2half(2.0f)),
- __hadd(a, __float2half(3.0f)));
- p_alias[2] = __halves2half2(__hadd(a, __float2half(4.0f)),
- __hadd(a, __float2half(5.0f)));
- p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)),
- __hadd(a, __float2half(7.0f)));
+ p_alias[1] = __halves2half2(__hadd(a, __float2half(2.0f)), __hadd(a, __float2half(3.0f)));
+ p_alias[2] = __halves2half2(__hadd(a, __float2half(4.0f)), __hadd(a, __float2half(5.0f)));
+ p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)), __hadd(a, __float2half(7.0f)));
return r;
#elif defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
Packet4h2 r;
@@ -1235,8 +1240,8 @@
half2 b = pset1<half2>(a);
half2 c;
- half2 half_offset0 = __halves2half2(__float2half(0.0f),__float2half(2.0f));
- half2 half_offset1 = __halves2half2(__float2half(4.0f),__float2half(6.0f));
+ half2 half_offset0 = __halves2half2(__float2half(0.0f), __float2half(2.0f));
+ half2 half_offset1 = __halves2half2(__float2half(4.0f), __float2half(6.0f));
c = __hadd2(b, half_offset0);
r_alias[0] = plset(__low2half(c));
@@ -1261,9 +1266,8 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pselect<Packet4h2>(const Packet4h2& mask, const Packet4h2& a,
- const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pselect<Packet4h2>(const Packet4h2& mask, const Packet4h2& a,
+ const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* mask_alias = reinterpret_cast<const half2*>(&mask);
@@ -1277,8 +1281,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pcmp_eq<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcmp_eq<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1291,8 +1294,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pcmp_lt<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcmp_lt<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1305,8 +1307,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pcmp_le<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcmp_le<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1319,8 +1320,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pand<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pand<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1333,8 +1333,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 por<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 por<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1347,8 +1346,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pxor<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pxor<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1361,8 +1359,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pandnot<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pandnot<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1375,8 +1372,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 padd<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 padd<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1389,8 +1385,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psub<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psub<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1420,8 +1415,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmul<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmul<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1434,8 +1428,8 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmadd<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b, const Packet4h2& c) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmadd<Packet4h2>(const Packet4h2& a, const Packet4h2& b,
+ const Packet4h2& c) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1449,8 +1443,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pdiv<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pdiv<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1463,8 +1456,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmin<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmin<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1477,8 +1469,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmax<Packet4h2>(
- const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmax<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1491,64 +1482,53 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<Packet4h2>(
- const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<Packet4h2>(const Packet4h2& a) {
const half2* a_alias = reinterpret_cast<const half2*>(&a);
- return predux(a_alias[0]) + predux(a_alias[1]) +
- predux(a_alias[2]) + predux(a_alias[3]);
+ return predux(a_alias[0]) + predux(a_alias[1]) + predux(a_alias[2]) + predux(a_alias[3]);
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(
- const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(const Packet4h2& a) {
const half2* a_alias = reinterpret_cast<const half2*>(&a);
- half2 m0 = __halves2half2(predux_max(a_alias[0]),
- predux_max(a_alias[1]));
- half2 m1 = __halves2half2(predux_max(a_alias[2]),
- predux_max(a_alias[3]));
- __half first = predux_max(m0);
+ half2 m0 = __halves2half2(predux_max(a_alias[0]), predux_max(a_alias[1]));
+ half2 m1 = __halves2half2(predux_max(a_alias[2]), predux_max(a_alias[3]));
+ __half first = predux_max(m0);
__half second = predux_max(m1);
#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
return (__hgt(first, second) ? first : second);
#else
- float ffirst = __half2float(first);
+ float ffirst = __half2float(first);
float fsecond = __half2float(second);
- return (ffirst > fsecond)? first: second;
+ return (ffirst > fsecond) ? first : second;
#endif
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(
- const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(const Packet4h2& a) {
const half2* a_alias = reinterpret_cast<const half2*>(&a);
- half2 m0 = __halves2half2(predux_min(a_alias[0]),
- predux_min(a_alias[1]));
- half2 m1 = __halves2half2(predux_min(a_alias[2]),
- predux_min(a_alias[3]));
- __half first = predux_min(m0);
+ half2 m0 = __halves2half2(predux_min(a_alias[0]), predux_min(a_alias[1]));
+ half2 m1 = __halves2half2(predux_min(a_alias[2]), predux_min(a_alias[3]));
+ __half first = predux_min(m0);
__half second = predux_min(m1);
#if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
return (__hlt(first, second) ? first : second);
#else
- float ffirst = __half2float(first);
+ float ffirst = __half2float(first);
float fsecond = __half2float(second);
- return (ffirst < fsecond)? first: second;
+ return (ffirst < fsecond) ? first : second;
#endif
}
// likely overflow/underflow
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4h2>(
- const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4h2>(const Packet4h2& a) {
const half2* a_alias = reinterpret_cast<const half2*>(&a);
- return predux_mul(pmul(pmul(a_alias[0], a_alias[1]),
- pmul(a_alias[2], a_alias[3])));
+ return predux_mul(pmul(pmul(a_alias[0], a_alias[1]), pmul(a_alias[2], a_alias[3])));
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-plog1p<Packet4h2>(const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 plog1p<Packet4h2>(const Packet4h2& a) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1560,8 +1540,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pexpm1<Packet4h2>(const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pexpm1<Packet4h2>(const Packet4h2& a) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1609,8 +1588,7 @@
}
template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-prsqrt<Packet4h2>(const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 prsqrt<Packet4h2>(const Packet4h2& a) {
Packet4h2 r;
half2* r_alias = reinterpret_cast<half2*>(&r);
const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1623,9 +1601,8 @@
// The following specialized padd, pmul, pdiv, pmin, pmax, pset1 are needed for
// the implementation of GPU half reduction.
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
- const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
return __hadd2(a, b);
#else
@@ -1639,9 +1616,8 @@
#endif
}
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
- const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
return __hmul2(a, b);
#else
@@ -1655,9 +1631,8 @@
#endif
}
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a,
- const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
#if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
return __h2div(a, b);
#else
@@ -1671,9 +1646,8 @@
#endif
}
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a,
- const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -1683,9 +1657,8 @@
return __halves2half2(r1, r2);
}
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a,
- const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -1695,15 +1668,14 @@
return __halves2half2(r1, r2);
}
-#endif // (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
+#endif // (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
#undef EIGEN_GPU_HAS_LDG
#undef EIGEN_CUDA_HAS_FP16_ARITHMETIC
#undef EIGEN_GPU_HAS_FP16_ARITHMETIC
-} // end namespace internal
+} // end namespace internal
-} // end namespace Eigen
+} // end namespace Eigen
-
-#endif // EIGEN_PACKET_MATH_GPU_H
+#endif // EIGEN_PACKET_MATH_GPU_H
diff --git a/Eigen/src/Core/arch/GPU/Tuple.h b/Eigen/src/Core/arch/GPU/Tuple.h
index e223ca1..6bea9ac 100644
--- a/Eigen/src/Core/arch/GPU/Tuple.h
+++ b/Eigen/src/Core/arch/GPU/Tuple.h
@@ -20,196 +20,173 @@
namespace tuple_impl {
// Internal tuple implementation.
-template<size_t N, typename... Types>
+template <size_t N, typename... Types>
class TupleImpl;
// Generic recursive tuple.
-template<size_t N, typename T1, typename... Ts>
+template <size_t N, typename T1, typename... Ts>
class TupleImpl<N, T1, Ts...> {
public:
// Tuple may contain Eigen types.
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
-
+
// Default constructor, enable if all types are default-constructible.
- template<typename U1 = T1, typename EnableIf = std::enable_if_t<
- std::is_default_constructible<U1>::value
- && reduce_all<std::is_default_constructible<Ts>::value...>::value
- >>
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
- TupleImpl() : head_{}, tail_{} {}
-
+ template <typename U1 = T1,
+ typename EnableIf = std::enable_if_t<std::is_default_constructible<U1>::value &&
+ reduce_all<std::is_default_constructible<Ts>::value...>::value>>
+ EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC TupleImpl() : head_{}, tail_{} {}
+
// Element constructor.
- template<typename U1, typename... Us,
- // Only enable if...
- typename EnableIf = std::enable_if_t<
- // the number of input arguments match, and ...
- sizeof...(Us) == sizeof...(Ts) && (
- // this does not look like a copy/move constructor.
- N > 1 || std::is_convertible<U1, T1>::value)
- >>
- EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
- TupleImpl(U1&& arg1, Us&&... args)
- : head_(std::forward<U1>(arg1)), tail_(std::forward<Us>(args)...) {}
-
- // The first stored value.
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- T1& head() {
- return head_;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- const T1& head() const {
- return head_;
- }
-
+ template <typename U1, typename... Us,
+ // Only enable if...
+ typename EnableIf = std::enable_if_t<
+ // the number of input arguments match, and ...
+ sizeof...(Us) == sizeof...(Ts) && (
+ // this does not look like a copy/move constructor.
+ N > 1 || std::is_convertible<U1, T1>::value)>>
+ EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC TupleImpl(U1&& arg1, Us&&... args)
+ : head_(std::forward<U1>(arg1)), tail_(std::forward<Us>(args)...) {}
+
+ // The first stored value.
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T1& head() { return head_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const T1& head() const { return head_; }
+
// The tail values.
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- TupleImpl<N-1, Ts...>& tail() {
- return tail_;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- const TupleImpl<N-1, Ts...>& tail() const {
- return tail_;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- void swap(TupleImpl& other) {
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TupleImpl<N - 1, Ts...>& tail() { return tail_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const TupleImpl<N - 1, Ts...>& tail() const { return tail_; }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void swap(TupleImpl& other) {
using numext::swap;
swap(head_, other.head_);
swap(tail_, other.tail_);
}
-
- template<typename... UTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TupleImpl& operator=(const TupleImpl<N, UTypes...>& other) {
+
+ template <typename... UTypes>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleImpl& operator=(const TupleImpl<N, UTypes...>& other) {
head_ = other.head_;
tail_ = other.tail_;
return *this;
}
-
- template<typename... UTypes>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- TupleImpl& operator=(TupleImpl<N, UTypes...>&& other) {
+
+ template <typename... UTypes>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleImpl& operator=(TupleImpl<N, UTypes...>&& other) {
head_ = std::move(other.head_);
tail_ = std::move(other.tail_);
return *this;
}
-
+
private:
// Allow related tuples to reference head_/tail_.
- template<size_t M, typename... UTypes>
+ template <size_t M, typename... UTypes>
friend class TupleImpl;
-
+
T1 head_;
- TupleImpl<N-1, Ts...> tail_;
+ TupleImpl<N - 1, Ts...> tail_;
};
// Empty tuple specialization.
-template<>
+template <>
class TupleImpl<size_t(0)> {};
-template<typename TupleType>
+template <typename TupleType>
struct is_tuple : std::false_type {};
-template<typename... Types>
-struct is_tuple< TupleImpl<sizeof...(Types), Types...> > : std::true_type {};
+template <typename... Types>
+struct is_tuple<TupleImpl<sizeof...(Types), Types...>> : std::true_type {};
// Gets an element from a tuple.
-template<size_t Idx, typename T1, typename... Ts>
+template <size_t Idx, typename T1, typename... Ts>
struct tuple_get_impl {
using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
using ReturnType = typename tuple_get_impl<Idx - 1, Ts...>::ReturnType;
-
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- ReturnType& run(TupleType& tuple) {
- return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ReturnType& run(TupleType& tuple) {
+ return tuple_get_impl<Idx - 1, Ts...>::run(tuple.tail());
}
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- const ReturnType& run(const TupleType& tuple) {
- return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const ReturnType& run(const TupleType& tuple) {
+ return tuple_get_impl<Idx - 1, Ts...>::run(tuple.tail());
}
};
// Base case, getting the head element.
-template<typename T1, typename... Ts>
+template <typename T1, typename... Ts>
struct tuple_get_impl<0, T1, Ts...> {
using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
using ReturnType = T1;
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- T1& run(TupleType& tuple) {
- return tuple.head();
- }
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T1& run(TupleType& tuple) { return tuple.head(); }
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- const T1& run(const TupleType& tuple) {
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const T1& run(const TupleType& tuple) {
return tuple.head();
}
};
// Concatenates N Tuples.
-template<size_t NTuples, typename... Tuples>
+template <size_t NTuples, typename... Tuples>
struct tuple_cat_impl;
-template<size_t NTuples, size_t N1, typename... Args1, size_t N2, typename... Args2, typename... Tuples>
+template <size_t NTuples, size_t N1, typename... Args1, size_t N2, typename... Args2, typename... Tuples>
struct tuple_cat_impl<NTuples, TupleImpl<N1, Args1...>, TupleImpl<N2, Args2...>, Tuples...> {
using TupleType1 = TupleImpl<N1, Args1...>;
using TupleType2 = TupleImpl<N2, Args2...>;
using MergedTupleType = TupleImpl<N1 + N2, Args1..., Args2...>;
-
- using ReturnType = typename tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::ReturnType;
-
+
+ using ReturnType = typename tuple_cat_impl<NTuples - 1, MergedTupleType, Tuples...>::ReturnType;
+
// Uses the index sequences to extract and merge elements from tuple1 and tuple2,
// then recursively calls again.
- template<typename Tuple1, size_t... I1s, typename Tuple2, size_t... I2s, typename... MoreTuples>
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- ReturnType run(Tuple1&& tuple1, std::index_sequence<I1s...>,
- Tuple2&& tuple2, std::index_sequence<I2s...>,
- MoreTuples&&... tuples) {
- return tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::run(
+ template <typename Tuple1, size_t... I1s, typename Tuple2, size_t... I2s, typename... MoreTuples>
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run(Tuple1&& tuple1,
+ std::index_sequence<I1s...>,
+ Tuple2&& tuple2,
+ std::index_sequence<I2s...>,
+ MoreTuples&&... tuples) {
+ return tuple_cat_impl<NTuples - 1, MergedTupleType, Tuples...>::run(
MergedTupleType(tuple_get_impl<I1s, Args1...>::run(std::forward<Tuple1>(tuple1))...,
tuple_get_impl<I2s, Args2...>::run(std::forward<Tuple2>(tuple2))...),
std::forward<MoreTuples>(tuples)...);
}
-
+
// Concatenates the first two tuples.
- template<typename Tuple1, typename Tuple2, typename... MoreTuples>
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- ReturnType run(Tuple1&& tuple1, Tuple2&& tuple2, MoreTuples&&... tuples) {
- return run(std::forward<Tuple1>(tuple1), std::make_index_sequence<N1>{},
- std::forward<Tuple2>(tuple2), std::make_index_sequence<N2>{},
- std::forward<MoreTuples>(tuples)...);
+ template <typename Tuple1, typename Tuple2, typename... MoreTuples>
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run(Tuple1&& tuple1, Tuple2&& tuple2,
+ MoreTuples&&... tuples) {
+ return run(std::forward<Tuple1>(tuple1), std::make_index_sequence<N1>{}, std::forward<Tuple2>(tuple2),
+ std::make_index_sequence<N2>{}, std::forward<MoreTuples>(tuples)...);
}
};
// Base case with a single tuple.
-template<size_t N, typename... Args>
-struct tuple_cat_impl<1, TupleImpl<N, Args...> > {
+template <size_t N, typename... Args>
+struct tuple_cat_impl<1, TupleImpl<N, Args...>> {
using ReturnType = TupleImpl<N, Args...>;
-
- template<typename Tuple1>
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- ReturnType run(Tuple1&& tuple1) {
+
+ template <typename Tuple1>
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run(Tuple1&& tuple1) {
return tuple1;
}
};
// Special case of no tuples.
-template<>
-struct tuple_cat_impl<0> {
+template <>
+struct tuple_cat_impl<0> {
using ReturnType = TupleImpl<0>;
- static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- ReturnType run() {return ReturnType{}; }
+ static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run() { return ReturnType{}; }
};
// For use in make_tuple, unwraps a reference_wrapper.
template <typename T>
-struct unwrap_reference_wrapper { using type = T; };
-
+struct unwrap_reference_wrapper {
+ using type = T;
+};
+
template <typename T>
-struct unwrap_reference_wrapper<std::reference_wrapper<T> > { using type = T&; };
+struct unwrap_reference_wrapper<std::reference_wrapper<T>> {
+ using type = T&;
+};
// For use in make_tuple, decays a type and unwraps a reference_wrapper.
template <typename T>
@@ -220,11 +197,11 @@
/**
* Utility for determining a tuple's size.
*/
-template<typename Tuple>
+template <typename Tuple>
struct tuple_size;
-template<typename... Types >
-struct tuple_size< TupleImpl<sizeof...(Types), Types...> > : std::integral_constant<size_t, sizeof...(Types)> {};
+template <typename... Types>
+struct tuple_size<TupleImpl<sizeof...(Types), Types...>> : std::integral_constant<size_t, sizeof...(Types)> {};
/**
* Gets an element of a tuple.
@@ -233,17 +210,15 @@
* \param tuple the tuple.
* \return a reference to the desired element.
*/
-template<size_t Idx, typename... Types>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-const typename tuple_get_impl<Idx, Types...>::ReturnType&
-get(const TupleImpl<sizeof...(Types), Types...>& tuple) {
+template <size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename tuple_get_impl<Idx, Types...>::ReturnType& get(
+ const TupleImpl<sizeof...(Types), Types...>& tuple) {
return tuple_get_impl<Idx, Types...>::run(tuple);
}
-template<size_t Idx, typename... Types>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename tuple_get_impl<Idx, Types...>::ReturnType&
-get(TupleImpl<sizeof...(Types), Types...>& tuple) {
+template <size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename tuple_get_impl<Idx, Types...>::ReturnType& get(
+ TupleImpl<sizeof...(Types), Types...>& tuple) {
return tuple_get_impl<Idx, Types...>::run(tuple);
}
@@ -252,31 +227,27 @@
* \param tuples ... list of tuples.
* \return concatenated tuple.
*/
-template<typename... Tuples,
- typename EnableIf = std::enable_if_t<
- internal::reduce_all<
- is_tuple<typename std::decay<Tuples>::type>::value...>::value>>
+template <typename... Tuples, typename EnableIf = std::enable_if_t<
+ internal::reduce_all<is_tuple<typename std::decay<Tuples>::type>::value...>::value>>
EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::ReturnType
-tuple_cat(Tuples&&... tuples) {
+ typename tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::ReturnType
+ tuple_cat(Tuples&&... tuples) {
return tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::run(std::forward<Tuples>(tuples)...);
}
/**
* Tie arguments together into a tuple.
*/
-template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), Args&...> >
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-ReturnType tie(Args&... args) EIGEN_NOEXCEPT {
- return ReturnType{args...};
+template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), Args&...>>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType tie(Args&... args) EIGEN_NOEXCEPT {
+ return ReturnType{args...};
}
/**
* Create a tuple of l-values with the supplied arguments.
*/
-template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), typename unwrap_decay<Args>::type...> >
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-ReturnType make_tuple(Args&&... args) {
+template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), typename unwrap_decay<Args>::type...>>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType make_tuple(Args&&... args) {
return ReturnType{std::forward<Args>(args)...};
}
@@ -284,15 +255,15 @@
* Forward a set of arguments as a tuple.
*/
template <typename... Args>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-TupleImpl<sizeof...(Args), Args...> forward_as_tuple(Args&&... args) {
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleImpl<sizeof...(Args), Args...> forward_as_tuple(
+ Args&&... args) {
return TupleImpl<sizeof...(Args), Args...>(std::forward<Args>(args)...);
}
/**
* Alternative to std::tuple that can be used on device.
*/
-template<typename... Types>
+template <typename... Types>
using tuple = TupleImpl<sizeof...(Types), Types...>;
} // namespace tuple_impl
diff --git a/Eigen/src/Core/arch/GPU/TypeCasting.h b/Eigen/src/Core/arch/GPU/TypeCasting.h
index aa89cd2..ae43f8e 100644
--- a/Eigen/src/Core/arch/GPU/TypeCasting.h
+++ b/Eigen/src/Core/arch/GPU/TypeCasting.h
@@ -22,61 +22,56 @@
template <>
struct type_casting_traits<Eigen::half, float> {
- enum {
- VectorizedCast = 1,
- SrcCoeffRatio = 1,
- TgtCoeffRatio = 2
- };
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
};
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
float2 r1 = __half22float2(a);
float2 r2 = __half22float2(b);
return make_float4(r1.x, r1.y, r2.x, r2.y);
}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
Packet4h2 r;
- half2* r_alias=reinterpret_cast<half2*>(&r);
- r_alias[0]=__floats2half2_rn(a.x,a.y);
- r_alias[1]=__floats2half2_rn(a.z,a.w);
- r_alias[2]=__floats2half2_rn(b.x,b.y);
- r_alias[3]=__floats2half2_rn(b.z,b.w);
+ half2* r_alias = reinterpret_cast<half2*>(&r);
+ r_alias[0] = __floats2half2_rn(a.x, a.y);
+ r_alias[1] = __floats2half2_rn(a.z, a.w);
+ r_alias[2] = __floats2half2_rn(b.x, b.y);
+ r_alias[3] = __floats2half2_rn(b.z, b.w);
return r;
}
template <>
struct type_casting_traits<float, Eigen::half> {
- enum {
- VectorizedCast = 1,
- SrcCoeffRatio = 2,
- TgtCoeffRatio = 1
- };
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
};
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
// Simply discard the second half of the input
float4 r;
- const half2* a_alias=reinterpret_cast<const half2*>(&a);
+ const half2* a_alias = reinterpret_cast<const half2*>(&a);
float2 r1 = __half22float2(a_alias[0]);
float2 r2 = __half22float2(a_alias[1]);
- r.x=static_cast<float>(r1.x);
- r.y=static_cast<float>(r1.y);
- r.z=static_cast<float>(r2.x);
- r.w=static_cast<float>(r2.y);
+ r.x = static_cast<float>(r1.x);
+ r.y = static_cast<float>(r1.y);
+ r.z = static_cast<float>(r2.x);
+ r.w = static_cast<float>(r2.y);
return r;
}
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
// Simply discard the second half of the input
return __floats2half2_rn(a.x, a.y);
}
#endif
-} // end namespace internal
+} // end namespace internal
-} // end namespace Eigen
+} // end namespace Eigen
-#endif // EIGEN_TYPE_CASTING_GPU_H
+#endif // EIGEN_TYPE_CASTING_GPU_H