Speed up pldexp_generic.

commit: 5226566a14ddb4d84214c40809531038f087d187 [log] [tgz]
author: Rasmus Munk Larsen <rmlarsen@google.com> Fri Apr 12 01:32:17 2024 +0000
committer: Rasmus Munk Larsen <rmlarsen@google.com> Fri Apr 12 01:32:17 2024 +0000
tree: 558b7d530dc865330c87c179126654f498103861
parent: 3c6521ed90ad02dcd1ec528b060eb74b93a8a079 [diff]
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 58a197f..eab717f 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h

@@ -1293,13 +1293,13 @@
 /** \internal \returns -(a * b) + c (coeff-wise) */
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
-  return padd(pnegate(pmul(a, b)), c);
+  return psub(c, pmul(a, b));
 }
 
-/** \internal \returns -(a * b) - c (coeff-wise) */
+/** \internal \returns -((a * b + c) (coeff-wise) */
 template <typename Packet>
 EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
-  return psub(pnegate(pmul(a, b)), c);
+  return pnegate(pmadd(a, b, c));
 }
 
 /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned

diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 78dbf20..c973efd 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h

@@ -129,8 +129,8 @@
   const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
   PacketI b = parithmetic_shift_right<2>(e);                                          // floor(e/4);
   Packet c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias)));  // 2^b
-  Packet out = pmul(pmul(pmul(a, c), c), c);                                          // a * 2^(3b)
-  b = psub(psub(psub(e, b), b), b);                                                   // e - 3b
+  Packet out = pmul(pmul(a, c), pmul(c, c));                                          // a * 2^(3b)
+  b = pnmadd(pset1<PacketI>(3), b, e);                                                // e - 3b
   c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(padd(b, bias)));         // 2^(e-3*b)
   out = pmul(out, c);
   return out;
commit	5226566a14ddb4d84214c40809531038f087d187	[log] [tgz]
author	Rasmus Munk Larsen <rmlarsen@google.com>	Fri Apr 12 01:32:17 2024 +0000
committer	Rasmus Munk Larsen <rmlarsen@google.com>	Fri Apr 12 01:32:17 2024 +0000
tree	558b7d530dc865330c87c179126654f498103861
parent	3c6521ed90ad02dcd1ec528b060eb74b93a8a079 [diff]