Prevent premature overflow to infinity in exp(x). The changes also provide a 3-4% speedup.
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 8b7d762..e21d3ef 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -512,6 +512,7 @@
const Packet cst_half = pset1<Packet>(0.5f);
const Packet cst_exp_hi = pset1<Packet>(88.723f);
const Packet cst_exp_lo = pset1<Packet>(-104.f);
+ const Packet cst_pldexp_threshold = pset1<Packet>(87.0);
const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
const Packet cst_p2 = pset1<Packet>(0.49999988079071044921875f);
@@ -547,10 +548,11 @@
y = pmadd(r2, y, p_low);
// Return 2^m * exp(r).
- const Packet fast_pldexp_unsafe = pandnot(pcmp_lt(x, pset1<Packet>(-87.0)), zero_mask);
+ const Packet fast_pldexp_unsafe = pcmp_lt(cst_pldexp_threshold, pabs(x));
if (!predux_any(fast_pldexp_unsafe)) {
- // For x >= -87, we can safely use the fast version of pldexp.
- return pselect(zero_mask, cst_zero, pmax(pldexp_fast(y, m), _x));
+ // For |x| <= 87, we know the result is not zero or inf, and we can safely use
+ // the fast version of pldexp.
+ return pmax(pldexp_fast(y, m), _x);
}
return pselect(zero_mask, cst_zero, pmax(pldexp(y, m), _x));
}
@@ -565,7 +567,7 @@
const Packet cst_exp_hi = pset1<Packet>(709.784);
const Packet cst_exp_lo = pset1<Packet>(-745.519);
-
+ const Packet cst_pldexp_threshold = pset1<Packet>(708.0);
const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
@@ -618,10 +620,11 @@
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
// non-finite values in the input.
- const Packet fast_pldexp_unsafe = pandnot(pcmp_lt(_x, pset1<Packet>(-708.0)), zero_mask);
+ const Packet fast_pldexp_unsafe = pcmp_lt(cst_pldexp_threshold, pabs(_x));
if (!predux_any(fast_pldexp_unsafe)) {
- // For x >= -708, we can safely use the fast version of pldexp.
- return pselect(zero_mask, cst_zero, pmax(pldexp_fast(x, fx), _x));
+ // For |x| <= 708, we know the result is not zero or inf, and we can safely use
+ // the fast version of pldexp.
+ return pmax(pldexp_fast(x, fx), _x);
}
return pselect(zero_mask, cst_zero, pmax(pldexp(x, fx), _x));
}