Speed up pldexp_generic.
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 58a197f..eab717f 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -1293,13 +1293,13 @@
/** \internal \returns -(a * b) + c (coeff-wise) */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
- return padd(pnegate(pmul(a, b)), c);
+ return psub(c, pmul(a, b));
}
-/** \internal \returns -(a * b) - c (coeff-wise) */
+/** \internal \returns -((a * b + c) (coeff-wise) */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
- return psub(pnegate(pmul(a, b)), c);
+ return pnegate(pmadd(a, b, c));
}
/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 78dbf20..c973efd 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -129,8 +129,8 @@
const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
PacketI b = parithmetic_shift_right<2>(e); // floor(e/4);
Packet c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^b
- Packet out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
- b = psub(psub(psub(e, b), b), b); // e - 3b
+ Packet out = pmul(pmul(a, c), pmul(c, c)); // a * 2^(3b)
+ b = pnmadd(pset1<PacketI>(3), b, e); // e - 3b
c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias))); // 2^(e-3*b)
out = pmul(out, c);
return out;