Fix arm32 issues.

commit: a73970a8640330c4908d68ef9257fd31a4fdae93 [log] [tgz]
author: Antonio Sánchez <cantonios@google.com> Tue Jan 23 22:04:55 2024 +0000
committer: Antonio Sánchez <cantonios@google.com> Tue Jan 23 22:04:55 2024 +0000
tree: 3aec13bb4056e8f728f56cb0f72699bd185a7bf1
parent: 5808122017ba45edbbad5b85fb17726e307b5a75 [diff]
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 8fb5b68..d84b1cc 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h

@@ -582,8 +582,8 @@
 
 // Subtract y * Pi/2 to reduce x to the interval -Pi/4 <= x <= +Pi/4
 // using "Extended precision modular arithmetic"
-#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
-  // This version requires true FMA for high accuracy
+#if defined(EIGEN_VECTORIZE_FMA)
+  // This version requires true FMA for high accuracy.
   // It provides a max error of 1ULP up to (with absolute_error < 5.9605e-08):
   const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;
   x = pmadd(y, pset1<Packet>(-1.57079601287841796875f), x);
@@ -1181,7 +1181,7 @@
   s_lo = psub(y, t);
 }
 
-#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#ifdef EIGEN_VECTORIZE_FMA
 // This function implements the extended precision product of
 // a pair of floating point numbers. Given {x, y}, it computes the pair
 // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
@@ -1227,7 +1227,7 @@
   p_lo = pmadd(x_lo, y_lo, p_lo);
 }
 
-#endif  // EIGEN_HAS_SINGLE_INSTRUCTION_MADD
+#endif  // EIGEN_VECTORIZE_FMA
 
 // This function implements Dekker's algorithm for the addition
 // of two double word numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.

diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 4e3a14d..71e5f5f 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h

@@ -1271,7 +1271,7 @@
   return pset1<Packet2ul>(0ULL);
 }
 
-#ifdef __ARM_FEATURE_FMA
+#ifdef EIGEN_VECTORIZE_FMA
 template <>
 EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
   return vfmaq_f32(c, a, b);
@@ -5249,7 +5249,7 @@
   return vdivq_f64(a, b);
 }
 
-#ifdef __ARM_FEATURE_FMA
+#ifdef EIGEN_VECTORIZE_FMA
 // See bug 936. See above comment about FMA for float.
 template <>
 EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {

diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h
index b16952a..e692438 100644
--- a/Eigen/src/Core/util/ConfigureVectorization.h
+++ b/Eigen/src/Core/util/ConfigureVectorization.h

@@ -354,6 +354,7 @@
 
 #define EIGEN_VECTORIZE
 #define EIGEN_VECTORIZE_VSX 1
+#define EIGEN_VECTORIZE_FMA
 #include <altivec.h>
 // We need to #undef all these ugly tokens defined in <altivec.h>
 // => use __vector instead of vector
@@ -365,6 +366,7 @@
 
 #define EIGEN_VECTORIZE
 #define EIGEN_VECTORIZE_ALTIVEC
+#define EIGEN_VECTORIZE_FMA
 #include <altivec.h>
 // We need to #undef all these ugly tokens defined in <altivec.h>
 // => use __vector instead of vector
@@ -431,6 +433,11 @@
 #include <arm_fp16.h>
 #endif
 
+// Enable FMA for ARM.
+#if defined(__ARM_FEATURE_FMA)
+#define EIGEN_VECTORIZE_FMA
+#endif
+
 #if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
 // We can use the optimized fp16 to float and float to fp16 conversion routines
 #define EIGEN_HAS_FP16_C

diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp
index 0cfea8b..91db3f9 100644
--- a/test/array_cwise.cpp
+++ b/test/array_cwise.cpp

@@ -98,9 +98,12 @@
       Scalar a = actual(i, j);
 #if EIGEN_ARCH_ARM
       // Work around NEON flush-to-zero mode
-      // if ref returns denormalized value and Eigen returns 0, then skip the test
-      int ref_fpclass = std::fpclassify(e);
-      if (a == Scalar(0) && ref_fpclass == FP_SUBNORMAL) continue;
+      // if ref returns a subnormal value and Eigen returns 0, then skip the test
+      if (a == Scalar(0) &&
+          (e > -(std::numeric_limits<Scalar>::min)() && e < (std::numeric_limits<Scalar>::min)() &&
+           e >= -std::numeric_limits<Scalar>::denorm_min() && e <= std::numeric_limits<Scalar>::denorm_min())) {
+        continue;
+      }
 #endif
       bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
                      ((numext::isnan)(a) && (numext::isnan)(e));
commit	a73970a8640330c4908d68ef9257fd31a4fdae93	[log] [tgz]
author	Antonio Sánchez <cantonios@google.com>	Tue Jan 23 22:04:55 2024 +0000
committer	Antonio Sánchez <cantonios@google.com>	Tue Jan 23 22:04:55 2024 +0000
tree	3aec13bb4056e8f728f56cb0f72699bd185a7bf1
parent	5808122017ba45edbbad5b85fb17726e307b5a75 [diff]