Prevent premature overflow to infinity in exp(x). The changes also provide a 3-4% speedup.

commit: e7c799b7c984f9b8bea27967bb04a97c52e62582 [log] [tgz]
author: Rasmus Munk Larsen <rmlarsen@google.com> Tue Nov 19 13:08:18 2024 -0800
committer: Rasmus Munk Larsen <rmlarsen@google.com> Tue Nov 19 13:08:18 2024 -0800
tree: f3dc43841a861a06a69c2f328f90138ef4fe5b03
parent: 00af47102d910ef7709e564398c98b763a324368 [diff]
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 8b7d762..e21d3ef 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h

@@ -512,6 +512,7 @@
   const Packet cst_half = pset1<Packet>(0.5f);
   const Packet cst_exp_hi = pset1<Packet>(88.723f);
   const Packet cst_exp_lo = pset1<Packet>(-104.f);
+  const Packet cst_pldexp_threshold = pset1<Packet>(87.0);
 
   const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
   const Packet cst_p2 = pset1<Packet>(0.49999988079071044921875f);
@@ -547,10 +548,11 @@
   y = pmadd(r2, y, p_low);
 
   // Return 2^m * exp(r).
-  const Packet fast_pldexp_unsafe = pandnot(pcmp_lt(x, pset1<Packet>(-87.0)), zero_mask);
+  const Packet fast_pldexp_unsafe = pcmp_lt(cst_pldexp_threshold, pabs(x));
   if (!predux_any(fast_pldexp_unsafe)) {
-    // For x >= -87, we can safely use the fast version of pldexp.
-    return pselect(zero_mask, cst_zero, pmax(pldexp_fast(y, m), _x));
+    // For |x| <= 87, we know the result is not zero or inf, and we can safely use
+    // the fast version of pldexp.
+    return pmax(pldexp_fast(y, m), _x);
   }
   return pselect(zero_mask, cst_zero, pmax(pldexp(y, m), _x));
 }
@@ -565,7 +567,7 @@
 
   const Packet cst_exp_hi = pset1<Packet>(709.784);
   const Packet cst_exp_lo = pset1<Packet>(-745.519);
-
+  const Packet cst_pldexp_threshold = pset1<Packet>(708.0);
   const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
   const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
   const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
@@ -618,10 +620,11 @@
 
   // Construct the result 2^n * exp(g) = e * x. The max is used to catch
   // non-finite values in the input.
-  const Packet fast_pldexp_unsafe = pandnot(pcmp_lt(_x, pset1<Packet>(-708.0)), zero_mask);
+  const Packet fast_pldexp_unsafe = pcmp_lt(cst_pldexp_threshold, pabs(_x));
   if (!predux_any(fast_pldexp_unsafe)) {
-    // For x >= -708, we can safely use the fast version of pldexp.
-    return pselect(zero_mask, cst_zero, pmax(pldexp_fast(x, fx), _x));
+    // For |x| <= 708, we know the result is not zero or inf, and we can safely use
+    // the fast version of pldexp.
+    return pmax(pldexp_fast(x, fx), _x);
   }
   return pselect(zero_mask, cst_zero, pmax(pldexp(x, fx), _x));
 }
commit	e7c799b7c984f9b8bea27967bb04a97c52e62582	[log] [tgz]
author	Rasmus Munk Larsen <rmlarsen@google.com>	Tue Nov 19 13:08:18 2024 -0800
committer	Rasmus Munk Larsen <rmlarsen@google.com>	Tue Nov 19 13:08:18 2024 -0800
tree	f3dc43841a861a06a69c2f328f90138ef4fe5b03
parent	00af47102d910ef7709e564398c98b763a324368 [diff]