Vectorize atanh & add a missing definition and unit test for atan.
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 499a94b..8b5f030 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -82,6 +82,7 @@
HasASin = 0,
HasACos = 0,
HasATan = 0,
+ HasATanh = 0,
HasSinh = 0,
HasCosh = 0,
HasTanh = 0,
@@ -857,9 +858,6 @@
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
-/** \internal \returns the arc tangent of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
@@ -869,10 +867,18 @@
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
+/** \internal \returns the arc tangent of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
+
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
+/** \internal \returns the arc tangent of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patanh(const Packet& a) { EIGEN_USING_STD(atanh); return atanh(a); }
+
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h
index cb7d7b8..8ce181e 100644
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@@ -58,6 +58,12 @@
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
+patanh<Packet8f>(const Packet8f& _x) {
+ return patanh_float(_x);
+}
+
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
plog<Packet8f>(const Packet8f& _x) {
return plog_float(_x);
}
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 60e1ff4..89963eb 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -76,6 +76,7 @@
HasACos = 1,
HasASin = 1,
HasATan = 1,
+ HasATanh = 1,
HasLog = 1,
HasLog1p = 1,
HasExpm1 = 1,
diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h
index af47a85..be0e4dd 100644
--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -276,6 +276,12 @@
}
template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
+patanh<Packet16f>(const Packet16f& _x) {
+ return patanh_float(_x);
+}
+
+template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
patan<Packet8d>(const Packet8d& _x) {
return patan_double(_x);
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 916babf..4628b21 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -125,6 +125,7 @@
HasACos = 1,
HasASin = 1,
HasATan = 1,
+ HasATanh = 1,
#if EIGEN_HAS_AVX512_MATH
HasLog = 1,
HasLog1p = 1,
diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
index 45761e2..7648b40 100644
--- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h
+++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
@@ -60,6 +60,12 @@
return patan_float(_x);
}
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f patanh<Packet4f>(const Packet4f& _x)
+{
+ return patanh_float(_x);
+}
+
#ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f psqrt<Packet4f>(const Packet4f& x)
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index 4cebd8f..f1676af 100644
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -171,6 +171,7 @@
HasACos = 1,
HasASin = 1,
HasATan = 1,
+ HasATanh = 1,
HasLog = 1,
HasExp = 1,
#ifdef EIGEN_VECTORIZE_VSX
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index a322bc4..52d076a 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -969,6 +969,34 @@
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patanh_float(const Packet& x) {
+ typedef typename unpacket_traits<Packet>::type Scalar;
+ static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
+ const Packet half = pset1<Packet>(0.5f);
+ const Packet x_gt_half = pcmp_le(half, pabs(x));
+ // For |x| in [0:0.5] we use a polynomial approximation of the form
+ // P(x) = x + x^3*(c3 + x^2 * (c5 + x^2 * (... x^2 * c11) ... )).
+ const Packet C3 = pset1<Packet>(0.3333373963832855224609375f);
+ const Packet C5 = pset1<Packet>(0.1997792422771453857421875f);
+ const Packet C7 = pset1<Packet>(0.14672131836414337158203125f);
+ const Packet C9 = pset1<Packet>(8.2311116158962249755859375e-2f);
+ const Packet C11 = pset1<Packet>(0.1819281280040740966796875f);
+ const Packet x2 = pmul(x,x);
+ Packet p = pmadd(C11, x2, C9);
+ p = pmadd(x2, p, C7);
+ p = pmadd(x2, p, C5);
+ p = pmadd(x2, p, C3);
+ p = pmadd(pmul(x,x2), p, x);
+
+ // For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x));
+ const Packet one = pset1<Packet>(1.0f);
+ Packet r = pdiv(padd(one, x), psub(one, x));
+ r = pmul(half, plog(r));
+ return pselect(x_gt_half, r, p);
+}
+
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pdiv_complex(const Packet& x, const Packet& y) {
typedef typename unpacket_traits<Packet>::as_real RealPacket;
// In the following we annotate the code for the case where the inputs
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index 179c55c..d733579 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -109,6 +109,11 @@
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan_double(const Packet& x);
+/** \internal \returns atan(x) for single precision float */
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patanh_float(const Packet& x);
+
/** \internal \returns sqrt(x) for complex types */
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index 6ff3e20..c8ca33a 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -779,6 +779,12 @@
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) {
return half(::acosf(float(a)));
}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atan(const half& a) {
+ return half(::atanf(float(a)));
+}
+EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atanh(const half& a) {
+ return half(::atanhf(float(a)));
+}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
defined(EIGEN_HIP_DEVICE_COMPILE)
diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h
index aea5149..f4ea6f9 100644
--- a/Eigen/src/Core/arch/NEON/MathFunctions.h
+++ b/Eigen/src/Core/arch/NEON/MathFunctions.h
@@ -55,6 +55,12 @@
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x)
{ return internal::generic_fast_tanh_float(x); }
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patanh<Packet2f>(const Packet2f& x)
+{ return patanh_float(x); }
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patanh<Packet4f>(const Packet4f& x)
+{ return patanh_float(x); }
+
+
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
template <>
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index aa873e5..e31f717 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -215,6 +215,7 @@
HasACos = 1,
HasASin = 1,
HasATan = 1,
+ HasATanh = 1,
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h
index f98fb7a..b0cd13a 100644
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -81,11 +81,6 @@
return pacos_float(_x);
}
-template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet2d patan<Packet2d>(const Packet2d& _x) {
- return patan_double(_x);
-}
-
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pasin<Packet4f>(const Packet4f& _x)
{
@@ -98,6 +93,17 @@
return patan_float(_x);
}
+template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet2d patan<Packet2d>(const Packet2d& _x) {
+ return patan_double(_x);
+}
+
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet4f patanh<Packet4f>(const Packet4f& _x)
+{
+ return patanh_float(_x);
+}
+
// Notice that for newer processors, it is counterproductive to use Newton
// iteration for square root. In particular, Skylake and Zen2 processors
// have approximately doubled throughput of the _mm_sqrt_ps instruction
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 62646a1..7d608bb 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -142,6 +142,7 @@
HasACos = 1,
HasASin = 1,
HasATan = 1,
+ HasATanh = 1,
HasLog = 1,
HasLog1p = 1,
HasExpm1 = 1,
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index e518f98..1faed10 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -622,11 +622,16 @@
template <typename Scalar>
struct scalar_atanh_op {
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::atanh(a); }
+ template <typename Packet>
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return patanh(x); }
};
template <typename Scalar>
struct functor_traits<scalar_atanh_op<Scalar> > {
- enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
+ enum {
+ Cost = 5 * NumTraits<Scalar>::MulCost,
+ PacketAccess = packet_traits<Scalar>::HasATanh
+ };
};
/** \internal
diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 6090576..44a6a50 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp
@@ -93,27 +93,91 @@
VERIFY(all_pass);
}
+#define BINARY_FUNCTOR_TEST_ARGS(fun) #fun, \
+ [](const auto& x, const auto& y) { return (Eigen::fun)(x, y); }, \
+ [](const auto& x, const auto& y) { return (std::fun)(x, y); }
+
+
template <typename Scalar>
void binary_ops_test() {
- binary_op_test<Scalar>("pow",
- [](const auto& x, const auto& y) { return Eigen::pow(x, y); },
- [](const auto& x, const auto& y) { return std::pow(x, y); });
- binary_op_test<Scalar>("atan2",
- [](const auto& x, const auto& y) { return Eigen::atan2(x, y); },
- [](const auto& x, const auto& y) {
- auto t = std::atan2(x, y);
-#if EIGEN_COMP_MSVC
- // Work around MSVC return value on underflow.
- // |atan(y/x)| is bounded above by |y/x|, so on underflow return y/x according to POSIX spec.
- // MSVC otherwise returns denorm_min.
- if (EIGEN_PREDICT_FALSE(std::abs(t) == std::numeric_limits<decltype(t)>::denorm_min())) {
- return x/y;
- }
+ binary_op_test<Scalar>(BINARY_FUNCTOR_TEST_ARGS(pow));
+#ifndef EIGEN_COMP_MSVC
+ binary_op_test<Scalar>(BINARY_FUNCTOR_TEST_ARGS(atan2));
+#else
+ binary_op_test<Scalar>(
+ "atan2", [](const auto& x, const auto& y) { return Eigen::atan2(x, y); },
+ [](Scalar x, Scalar y) {
+ auto t = Scalar(std::atan2(x, y));
+ // Work around MSVC return value on underflow.
+ // |atan(y/x)| is bounded above by |y/x|, so on underflow return y/x according to POSIX spec.
+ // MSVC otherwise returns denorm_min.
+ if (EIGEN_PREDICT_FALSE(std::abs(t) == std::numeric_limits<decltype(t)>::denorm_min())) {
+ return x / y;
+ }
+ return t;
+ });
#endif
- return t;
- });
}
+
+template <typename Scalar, typename Fn, typename RefFn>
+void unary_op_test(std::string name, Fn fun, RefFn ref) {
+ const Scalar tol = test_precision<Scalar>();
+ auto values = special_values<Scalar>();
+ Map<Array<Scalar, Dynamic, 1>> x(values.data(), values.size());
+
+ Array<Scalar, Dynamic, Dynamic> actual = fun(x);
+ bool all_pass = true;
+ for (Index i = 0; i < x.size(); ++i) {
+ Scalar e = static_cast<Scalar>(ref(x(i)));
+ Scalar a = actual(i);
+ bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
+ ((numext::isnan)(a) && (numext::isnan)(e));
+ if ((a == a) && (e == e)) success &= (bool)numext::signbit(e) == (bool)numext::signbit(a);
+ all_pass &= success;
+ if (!success) {
+ std::cout << name << "(" << x(i) << ") = " << a << " != " << e << std::endl;
+ }
+ }
+ VERIFY(all_pass);
+}
+
+#define UNARY_FUNCTOR_TEST_ARGS(fun) #fun, \
+ [](const auto& x) { return (Eigen::fun)(x); }, \
+ [](const auto& x) { return (std::fun)(x); }
+
+template <typename Scalar>
+void unary_ops_test() {
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sqrt));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(exp));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(log));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sin));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(cos));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(tan));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(asin));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(acos));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(atan));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sinh));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(cosh));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(tanh));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(asinh));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(acosh));
+ unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(atanh));
+ /* FIXME: Enable when the behavior of rsqrt on denormals for half and double is fixed.
+ unary_op_test<Scalar>("rsqrt",
+ [](const auto& x) { return Eigen::rsqrt(x); },
+ [](Scalar x) {
+ if (x >= 0 && x < (std::numeric_limits<Scalar>::min)()) {
+ // rsqrt return +inf for positive subnormals.
+ return NumTraits<Scalar>::infinity();
+ } else {
+ return Scalar(std::sqrt(Scalar(1)/x));
+ }
+ });
+ */
+}
+
+
template <typename Scalar>
void pow_scalar_exponent_test() {
using Int_t = typename internal::make_integer<Scalar>::type;
@@ -630,6 +694,7 @@
VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
VERIFY_IS_APPROX(m1.sinh().asinh(), asinh(sinh(m1)));
VERIFY_IS_APPROX(m1.cosh().acosh(), acosh(cosh(m1)));
+ VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
VERIFY_IS_APPROX(m1.arg(), arg(m1));
@@ -722,6 +787,7 @@
VERIFY_IS_APPROX(m3.pow(RealScalar(-2)), m3.square().inverse());
// Test pow and atan2 on special IEEE values.
+ unary_ops_test<Scalar>();
binary_ops_test<Scalar>();
pow_scalar_exponent_test<Scalar>();
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index f082985..76332d7 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -879,6 +879,9 @@
}
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
+ CHECK_CWISE1_IF(PacketTraits::HasATan, std::atan, internal::patan);
+ CHECK_CWISE1_IF(PacketTraits::HasATanh, std::atanh, internal::patanh);
+
for (int i = 0; i < size; ++i) {
data1[i] = Scalar(internal::random<double>(-87, 88));