Add support for complex numbers in the generic clang backend
libeigen/eigen!2078
Co-authored-by: Rasmus Munk Larsen <rmlarsen@google.com>
diff --git a/Eigen/Core b/Eigen/Core
index 59f12eb..5f46dde 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -208,6 +208,7 @@
#if defined(EIGEN_VECTORIZE_GENERIC) && !defined(EIGEN_DONT_VECTORIZE)
#include "src/Core/arch/clang/PacketMath.h"
#include "src/Core/arch/clang/TypeCasting.h"
+#include "src/Core/arch/clang/Complex.h"
#include "src/Core/arch/clang/Reductions.h"
#include "src/Core/arch/clang/MathFunctions.h"
#else
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index ba38cd1..dc3e03d 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -1010,12 +1010,26 @@
return a;
}
-/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
+/** \internal \returns \a a with real and imaginary parts flipped (for complex types only) */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
return Packet(numext::imag(a), numext::real(a));
}
+/** \internal \returns \a a with real part duplicated (for complex types only) */
+// TODO(rmlarsen): Define and use in all complex backends.
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pdupreal(const Packet& a) {
+ return Packet(numext::real(a), numext::real(a));
+}
+
+/** \internal \returns \a a with imaginary part duplicated (for complex types only) */
+// TODO(rmlarsen): Define and use in all complex backends.
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pdupimag(const Packet& a) {
+ return Packet(numext::imag(a), numext::imag(a));
+}
+
/**************************
* Special math functions
***************************/
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 827386f..96d2783 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -1130,6 +1130,20 @@
return psincos_double<false>(x);
}
+template <bool ComputeSin, typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+ std::enable_if_t<std::is_same<typename unpacket_traits<Packet>::type, float>::value, Packet>
+ psincos_selector(const Packet& x) {
+ return psincos_float<ComputeSin, Packet, true>(x);
+}
+
+template <bool ComputeSin, typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+ std::enable_if_t<std::is_same<typename unpacket_traits<Packet>::type, double>::value, Packet>
+ psincos_selector(const Packet& x) {
+ return psincos_double<ComputeSin, Packet, true>(x);
+}
+
// Generic implementation of acos(x).
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos_float(const Packet& x_in) {
@@ -1470,14 +1484,24 @@
}
template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pmul_complex(const Packet& x, const Packet& y) {
+ // In the following we annotate the code for the case where the inputs
+ // are a pair length-2 SIMD vectors representing a single pair of complex
+ // numbers x = a + i*b, y = c + i*d.
+ Packet x_re = pdupreal(x); // a, a
+ Packet x_im = pdupimag(x); // b, b
+ Packet tmp_re = Packet(pmul(x_re.v, y.v)); // a*c, a*d
+ Packet tmp_im = Packet(pmul(x_im.v, y.v)); // b*c, b*d
+ tmp_im = pcplxflip(pconj(tmp_im)); // -b*d, d*c
+ return padd(tmp_im, tmp_re); // a*c - b*d, a*d + b*c
+}
+
+template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Packet& x) {
typedef typename unpacket_traits<Packet>::type Scalar;
typedef typename Scalar::value_type RealScalar;
typedef typename unpacket_traits<Packet>::as_real RealPacket;
- RealPacket real_mask_rp = peven_mask(x.v);
- Packet real_mask(real_mask_rp);
-
// Real part
RealPacket x_flip = pcplxflip(x).v; // b, a
Packet x_norm = phypot_complex(x); // sqrt(a^2 + b^2), sqrt(a^2 + b^2)
@@ -1493,12 +1517,12 @@
RealPacket is_any_inf = por(is_x_pos_inf, is_y_pos_inf);
RealPacket xreal = pselect(is_any_inf, cst_pos_inf, xlogr);
- Packet xres = pselect(real_mask, Packet(xreal), Packet(ximg)); // log(sqrt(a^2 + b^2)), atan2(b, a)
- return xres;
+ return Packet(pselect(peven_mask(xreal), xreal, ximg)); // log(sqrt(a^2 + b^2)), atan2(b, a)
}
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_complex(const Packet& a) {
+ // FIXME(rmlarsen): This does not work correctly for Packets of std::complex<double>.
typedef typename unpacket_traits<Packet>::as_real RealPacket;
typedef typename unpacket_traits<Packet>::type Scalar;
typedef typename Scalar::value_type RealScalar;
@@ -1516,7 +1540,7 @@
// cis(y):
RealPacket y = pand(odd_mask, a.v);
y = por(y, pcplxflip(Packet(y)).v);
- RealPacket cisy = psincos_float<false, RealPacket, true>(y);
+ RealPacket cisy = psincos_selector<false, RealPacket>(y);
cisy = pcplxflip(Packet(cisy)).v; // cos(y) + i * sin(y)
const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index de30bcb..44ccb74 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -150,6 +150,10 @@
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Packet& x, const Packet& y);
+/** \internal \returns x * y for complex types */
+template <typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pmul_complex(const Packet& x, const Packet& y);
+
template <typename Packet, int N>
struct ppolevl;
diff --git a/Eigen/src/Core/arch/clang/Complex.h b/Eigen/src/Core/arch/clang/Complex.h
new file mode 100644
index 0000000..d6cc435
--- /dev/null
+++ b/Eigen/src/Core/arch/clang/Complex.h
@@ -0,0 +1,404 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2025 Rasmus Munk Larsen
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_COMPLEX_CLANG_H
+#define EIGEN_COMPLEX_CLANG_H
+
+// IWYU pragma: private
+#include "../../InternalHeaderCheck.h"
+
+namespace Eigen {
+namespace internal {
+
+template <typename RealScalar, int N>
+struct complex_packet_wrapper {
+ using RealPacketT = detail::VectorType<RealScalar, 2 * N>;
+ EIGEN_STRONG_INLINE complex_packet_wrapper() = default;
+ EIGEN_STRONG_INLINE explicit complex_packet_wrapper(const RealPacketT& a) : v(a) {}
+ EIGEN_STRONG_INLINE constexpr std::complex<RealScalar> operator[](Index i) const {
+ return std::complex<RealScalar>(v[2 * i], v[2 * i + 1]);
+ }
+ RealPacketT v;
+};
+
+using Packet8cf = complex_packet_wrapper<float, 8>;
+using Packet4cf = complex_packet_wrapper<float, 4>;
+using Packet2cf = complex_packet_wrapper<float, 2>;
+using Packet4cd = complex_packet_wrapper<double, 4>;
+using Packet2cd = complex_packet_wrapper<double, 2>;
+
+struct generic_complex_packet_traits : default_packet_traits {
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasDiv = 1,
+ HasNegate = 1,
+ HasAbs = 0,
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
+ HasArg = 0,
+ HasSetLinear = 0,
+ HasConj = 1,
+ // Math functions
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ };
+};
+
+template <>
+struct packet_traits<std::complex<float>> : generic_complex_packet_traits {
+ using type = Packet8cf;
+ using half = Packet8cf;
+ enum {
+ size = 8,
+ };
+};
+
+template <>
+struct unpacket_traits<Packet8cf> : generic_unpacket_traits {
+ using type = std::complex<float>;
+ using half = Packet8cf;
+ using as_real = Packet16f;
+ enum {
+ size = 8,
+ };
+};
+
+template <>
+struct packet_traits<std::complex<double>> : generic_complex_packet_traits {
+ using type = Packet4cd;
+ using half = Packet4cd;
+ enum {
+ size = 4,
+ HasExp = 0, // FIXME(rmlarsen): pexp_complex is broken for double.
+ };
+};
+
+template <>
+struct unpacket_traits<Packet4cd> : generic_unpacket_traits {
+ using type = std::complex<double>;
+ using half = Packet4cd;
+ using as_real = Packet8d;
+ enum {
+ size = 4,
+ };
+};
+
+// ------------ Load and store ops ----------
+#define EIGEN_CLANG_COMPLEX_LOAD_STORE(PACKET_TYPE) \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE ploadu<PACKET_TYPE>(const unpacket_traits<PACKET_TYPE>::type* from) { \
+ return PACKET_TYPE(ploadu<typename unpacket_traits<PACKET_TYPE>::as_real>(&numext::real_ref(*from))); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE pload<PACKET_TYPE>(const unpacket_traits<PACKET_TYPE>::type* from) { \
+ return PACKET_TYPE(pload<typename unpacket_traits<PACKET_TYPE>::as_real>(&numext::real_ref(*from))); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE void pstoreu<typename unpacket_traits<PACKET_TYPE>::type, PACKET_TYPE>( \
+ typename unpacket_traits<PACKET_TYPE>::type * to, const PACKET_TYPE& from) { \
+ pstoreu(&numext::real_ref(*to), from.v); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE void pstore<typename unpacket_traits<PACKET_TYPE>::type, PACKET_TYPE>( \
+ typename unpacket_traits<PACKET_TYPE>::type * to, const PACKET_TYPE& from) { \
+ pstore(&numext::real_ref(*to), from.v); \
+ }
+
+EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet8cf);
+EIGEN_CLANG_COMPLEX_LOAD_STORE(Packet4cd);
+#undef EIGEN_CLANG_COMPLEX_LOAD_STORE
+
+template <>
+EIGEN_STRONG_INLINE Packet8cf pset1<Packet8cf>(const std::complex<float>& from) {
+ const float re = numext::real(from);
+ const float im = numext::imag(from);
+ return Packet8cf(Packet16f{re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im});
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4cd pset1<Packet4cd>(const std::complex<double>& from) {
+ const double re = numext::real(from);
+ const double im = numext::imag(from);
+ return Packet4cd(Packet8d{re, im, re, im, re, im, re, im});
+}
+
+// ----------- Unary ops ------------------
+#define DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, OP) \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE OP<PACKET_TYPE>(const PACKET_TYPE& a) { \
+ return PACKET_TYPE(OP(a.v)); \
+ }
+
+#define EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(PACKET_TYPE) \
+ DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, pnegate) \
+ DELEGATE_UNARY_TO_REAL_OP(PACKET_TYPE, pzero) \
+ template <> \
+ EIGEN_STRONG_INLINE unpacket_traits<PACKET_TYPE>::type pfirst<PACKET_TYPE>(const PACKET_TYPE& a) { \
+ return a[0]; \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE pexp<PACKET_TYPE>(const PACKET_TYPE& a) { \
+ return pexp_complex(a); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE plog<PACKET_TYPE>(const PACKET_TYPE& a) { \
+ return plog_complex(a); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE psqrt<PACKET_TYPE>(const PACKET_TYPE& a) { \
+ return psqrt_complex(a); \
+ }
+
+EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet8cf);
+EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS(Packet4cd);
+
+template <>
+EIGEN_STRONG_INLINE Packet8cf pconj<Packet8cf>(const Packet8cf& a) {
+ return Packet8cf(__builtin_shufflevector(a.v, -a.v, 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cf pconj<Packet4cf>(const Packet4cf& a) {
+ return Packet4cf(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cf pconj<Packet2cf>(const Packet2cf& a) {
+ return Packet2cf(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4cd pconj<Packet4cd>(const Packet4cd& a) {
+ return Packet4cd(__builtin_shufflevector(a.v, -a.v, 0, 9, 2, 11, 4, 13, 6, 15));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cd pconj<Packet2cd>(const Packet2cd& a) {
+ return Packet2cd(__builtin_shufflevector(a.v, -a.v, 0, 5, 2, 7));
+}
+
+#undef DELEGATE_UNARY_TO_REAL_OP
+#undef EIGEN_CLANG_COMPLEX_UNARY_CWISE_OPS
+
+// Flip real and imaginary parts, i.e. {re(a), im(a)} -> {im(a), re(a)}.
+template <>
+EIGEN_STRONG_INLINE Packet8cf pcplxflip<Packet8cf>(const Packet8cf& a) {
+ return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& a) {
+ return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a) {
+ return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cd pcplxflip<Packet4cd>(const Packet4cd& a) {
+ return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2, 5, 4, 7, 6));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& a) {
+ return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 0, 3, 2));
+}
+
+// Copy real to imaginary part, i.e. {re(a), im(a)} -> {re(a), re(a)}.
+template <>
+EIGEN_STRONG_INLINE Packet8cf pdupreal<Packet8cf>(const Packet8cf& a) {
+ return Packet8cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cf pdupreal<Packet4cf>(const Packet4cf& a) {
+ return Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cf pdupreal<Packet2cf>(const Packet2cf& a) {
+ return Packet2cf(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cd pdupreal<Packet4cd>(const Packet4cd& a) {
+ return Packet4cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2, 4, 4, 6, 6));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cd pdupreal<Packet2cd>(const Packet2cd& a) {
+ return Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 0, 2, 2));
+}
+
+// Copy imaginary to real part, i.e. {re(a), im(a)} -> {im(a), im(a)}.
+template <>
+EIGEN_STRONG_INLINE Packet8cf pdupimag<Packet8cf>(const Packet8cf& a) {
+ return Packet8cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cf pdupimag<Packet4cf>(const Packet4cf& a) {
+ return Packet4cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cf pdupimag<Packet2cf>(const Packet2cf& a) {
+ return Packet2cf(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cd pdupimag<Packet4cd>(const Packet4cd& a) {
+ return Packet4cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3, 5, 5, 7, 7));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2cd pdupimag<Packet2cd>(const Packet2cd& a) {
+ return Packet2cd(__builtin_shufflevector(a.v, a.v, 1, 1, 3, 3));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8cf ploaddup<Packet8cf>(const std::complex<float>* from) {
+ return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
+ std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
+ std::real(from[2]), std::imag(from[2]), std::real(from[2]), std::imag(from[2]),
+ std::real(from[3]), std::imag(from[3]), std::real(from[3]), std::imag(from[3])});
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cd ploaddup<Packet4cd>(const std::complex<double>* from) {
+ return Packet4cd(Packet8d{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
+ std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8cf ploadquad<Packet8cf>(const std::complex<float>* from) {
+ return Packet8cf(Packet16f{std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
+ std::real(from[0]), std::imag(from[0]), std::real(from[0]), std::imag(from[0]),
+ std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1]),
+ std::real(from[1]), std::imag(from[1]), std::real(from[1]), std::imag(from[1])});
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cd ploadquad<Packet4cd>(const std::complex<double>* from) {
+ return pset1<Packet4cd>(*from);
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8cf preverse<Packet8cf>(const Packet8cf& a) {
+ return Packet8cf(reinterpret_cast<Packet16f>(preverse(reinterpret_cast<Packet8d>(a.v))));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4cd preverse<Packet4cd>(const Packet4cd& a) {
+ return Packet4cd(__builtin_shufflevector(a.v, a.v, 6, 7, 4, 5, 2, 3, 0, 1));
+}
+
+// ----------- Binary ops ------------------
+#define DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, OP) \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE OP<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
+ return PACKET_TYPE(OP(a.v, b.v)); \
+ }
+
+#define EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(PACKET_TYPE) \
+ DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, psub) \
+ DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, pand) \
+ DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, por) \
+ DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, pxor) \
+ DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, pandnot) \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE pdiv<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
+ return pdiv_complex(a, b); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE pcmp_eq<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
+ const PACKET_TYPE t = PACKET_TYPE(pcmp_eq(a.v, b.v)); \
+ return PACKET_TYPE(pand(pdupreal(t).v, pdupimag(t).v)); \
+ }
+
+EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet8cf);
+EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS(Packet4cd);
+
+// Binary ops that are needed on sub-packets for predux and predux_mul.
+#define EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(PACKET_TYPE) \
+ DELEGATE_BINARY_TO_REAL_OP(PACKET_TYPE, padd) \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE pmul<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
+ return pmul_complex(a, b); \
+ }
+
+EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet8cf);
+EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cf);
+EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cf);
+EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet4cd);
+EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS(Packet2cd);
+
+#define EIGEN_CLANG_PACKET_SCATTER_GATHER(PACKET_TYPE) \
+ template <> \
+ EIGEN_STRONG_INLINE void pscatter(unpacket_traits<PACKET_TYPE>::type* to, const PACKET_TYPE& from, Index stride) { \
+ constexpr int size = unpacket_traits<PACKET_TYPE>::size; \
+ for (int i = 0; i < size; ++i) { \
+ to[i * stride] = from[i]; \
+ } \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE pgather<typename unpacket_traits<PACKET_TYPE>::type, PACKET_TYPE>( \
+ const unpacket_traits<PACKET_TYPE>::type* from, Index stride) { \
+ constexpr int size = unpacket_traits<PACKET_TYPE>::size; \
+ PACKET_TYPE result; \
+ for (int i = 0; i < size; ++i) { \
+ const unpacket_traits<PACKET_TYPE>::type from_i = from[i * stride]; \
+ result.v[2 * i] = numext::real(from_i); \
+ result.v[2 * i + 1] = numext::imag(from_i); \
+ } \
+ return result; \
+ }
+
+EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8cf);
+EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet4cd);
+#undef EIGEN_CLANG_PACKET_SCATTER_GATHER
+
+#undef DELEGATE_BINARY_TO_REAL_OP
+#undef EIGEN_CLANG_COMPLEX_BINARY_CWISE_OPS
+#undef EIGEN_CLANG_COMPLEX_REDUCER_BINARY_CWISE_OPS
+
+// ------------ ternary ops -------------
+template <>
+EIGEN_STRONG_INLINE Packet8cf pselect<Packet8cf>(const Packet8cf& mask, const Packet8cf& a, const Packet8cf& b) {
+ return Packet8cf(reinterpret_cast<Packet16f>(
+ pselect(reinterpret_cast<Packet8d>(mask.v), reinterpret_cast<Packet8d>(a.v), reinterpret_cast<Packet8d>(b.v))));
+}
+
+namespace detail {
+template <>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet8cf>(Packet8cf& p1, Packet8cf& p2) {
+ Packet16f tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23);
+ p2.v = __builtin_shufflevector(p1.v, p2.v, 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31);
+ p1.v = tmp;
+}
+
+template <>
+EIGEN_ALWAYS_INLINE void zip_in_place<Packet4cd>(Packet4cd& p1, Packet4cd& p2) {
+ Packet8d tmp = __builtin_shufflevector(p1.v, p2.v, 0, 1, 8, 9, 2, 3, 10, 11);
+ p2.v = __builtin_shufflevector(p1.v, p2.v, 4, 5, 12, 13, 6, 7, 14, 15);
+ p1.v = tmp;
+}
+} // namespace detail
+
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 8>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8cf, 2>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4cd, 4>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4cd, 2>& kernel) {
+ detail::ptranspose_impl(kernel);
+}
+
+} // end namespace internal
+} // end namespace Eigen
+
+#endif // EIGEN_COMPLEX_CLANG_H
diff --git a/Eigen/src/Core/arch/clang/PacketMath.h b/Eigen/src/Core/arch/clang/PacketMath.h
index 55ddc1c..e142264 100644
--- a/Eigen/src/Core/arch/clang/PacketMath.h
+++ b/Eigen/src/Core/arch/clang/PacketMath.h
@@ -245,28 +245,30 @@
// --- Intrinsic-like specializations ---
// --- Load/Store operations ---
-#define EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PACKET_TYPE, SCALAR_TYPE) \
- template <> \
- EIGEN_STRONG_INLINE PACKET_TYPE ploadu<PACKET_TYPE>(const SCALAR_TYPE* from) { \
- return detail::load_vector_unaligned<PACKET_TYPE>(from); \
- } \
- template <> \
- EIGEN_STRONG_INLINE PACKET_TYPE pload<PACKET_TYPE>(const SCALAR_TYPE* from) { \
- return detail::load_vector_aligned<PACKET_TYPE>(from); \
- } \
- template <> \
- EIGEN_STRONG_INLINE void pstoreu<SCALAR_TYPE, PACKET_TYPE>(SCALAR_TYPE * to, const PACKET_TYPE& from) { \
- detail::store_vector_unaligned<PACKET_TYPE>(to, from); \
- } \
- template <> \
- EIGEN_STRONG_INLINE void pstore<SCALAR_TYPE, PACKET_TYPE>(SCALAR_TYPE * to, const PACKET_TYPE& from) { \
- detail::store_vector_aligned<PACKET_TYPE>(to, from); \
+#define EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(PACKET_TYPE) \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE ploadu<PACKET_TYPE>(const detail::scalar_type_of_vector_t<PACKET_TYPE>* from) { \
+ return detail::load_vector_unaligned<PACKET_TYPE>(from); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE PACKET_TYPE pload<PACKET_TYPE>(const detail::scalar_type_of_vector_t<PACKET_TYPE>* from) { \
+ return detail::load_vector_aligned<PACKET_TYPE>(from); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE void pstoreu<detail::scalar_type_of_vector_t<PACKET_TYPE>, PACKET_TYPE>( \
+ detail::scalar_type_of_vector_t<PACKET_TYPE> * to, const PACKET_TYPE& from) { \
+ detail::store_vector_unaligned<PACKET_TYPE>(to, from); \
+ } \
+ template <> \
+ EIGEN_STRONG_INLINE void pstore<detail::scalar_type_of_vector_t<PACKET_TYPE>, PACKET_TYPE>( \
+ detail::scalar_type_of_vector_t<PACKET_TYPE> * to, const PACKET_TYPE& from) { \
+ detail::store_vector_aligned<PACKET_TYPE>(to, from); \
}
-EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16f, float)
-EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8d, double)
-EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16i, int32_t)
-EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8l, int64_t)
+EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16f)
+EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8d)
+EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet16i)
+EIGEN_CLANG_PACKET_LOAD_STORE_PACKET(Packet8l)
#undef EIGEN_CLANG_PACKET_LOAD_STORE_PACKET
// --- Broadcast operation ---
@@ -330,6 +332,10 @@
// Bitwise ops for integer packets
#define EIGEN_CLANG_PACKET_BITWISE_INT(PACKET_TYPE) \
template <> \
+ constexpr EIGEN_STRONG_INLINE PACKET_TYPE pzero<PACKET_TYPE>(const PACKET_TYPE& /*unused*/) { \
+ return PACKET_TYPE(0); \
+ } \
+ template <> \
constexpr EIGEN_STRONG_INLINE PACKET_TYPE ptrue<PACKET_TYPE>(const PACKET_TYPE& /*unused*/) { \
return PACKET_TYPE(0) == PACKET_TYPE(0); \
} \
@@ -370,8 +376,9 @@
// Bitwise ops for floating point packets
#define EIGEN_CLANG_PACKET_BITWISE_FLOAT(PACKET_TYPE, CAST_TO_INT, CAST_FROM_INT) \
template <> \
- EIGEN_STRONG_INLINE PACKET_TYPE ptrue<PACKET_TYPE>(const PACKET_TYPE& a) { \
- return CAST_FROM_INT(CAST_TO_INT(a) == CAST_TO_INT(a)); \
+ EIGEN_STRONG_INLINE PACKET_TYPE ptrue<PACKET_TYPE>(const PACKET_TYPE& /* unused */) { \
+ using Scalar = detail::scalar_type_of_vector_t<PACKET_TYPE>; \
+ return CAST_FROM_INT(PACKET_TYPE(Scalar(0)) == PACKET_TYPE(Scalar(0))); \
} \
template <> \
EIGEN_STRONG_INLINE PACKET_TYPE pand<PACKET_TYPE>(const PACKET_TYPE& a, const PACKET_TYPE& b) { \
@@ -537,6 +544,7 @@
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8d)
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet16i)
EIGEN_CLANG_PACKET_SCATTER_GATHER(Packet8l)
+
#undef EIGEN_CLANG_PACKET_SCATTER_GATHER
// ---- Various operations that depend on __builtin_shufflevector.
@@ -653,6 +661,21 @@
return Packet8l{a + 0, a + 1, a + 2, a + 3, a + 4, a + 5, a + 6, a + 7};
}
+template <>
+EIGEN_STRONG_INLINE Packet16f peven_mask(const Packet16f& /* unused */) {
+ float kTrue = numext::bit_cast<float>(int32_t(-1));
+ float kFalse = 0.0f;
+ return Packet16f{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse,
+ kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet8d peven_mask(const Packet8d& /* unused */) {
+ double kTrue = numext::bit_cast<double>(int64_t(-1l));
+ double kFalse = 0.0;
+ return Packet8d{kTrue, kFalse, kTrue, kFalse, kTrue, kFalse, kTrue, kFalse};
+}
+
// Helpers for ptranspose.
namespace detail {
diff --git a/Eigen/src/Core/arch/clang/Reductions.h b/Eigen/src/Core/arch/clang/Reductions.h
index d9cc558..1a6387a 100644
--- a/Eigen/src/Core/arch/clang/Reductions.h
+++ b/Eigen/src/Core/arch/clang/Reductions.h
@@ -57,54 +57,100 @@
#if EIGEN_HAS_BUILTIN(__builtin_shufflevector)
namespace detail {
template <typename VectorT>
-EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceAdd16(const VectorT& a) {
- auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6, 8, 10, 12, 14) +
- __builtin_shufflevector(a, a, 1, 3, 5, 7, 9, 11, 13, 15);
- auto t2 = __builtin_shufflevector(t1, t1, 0, 2, 4, 6) + __builtin_shufflevector(t1, t1, 1, 3, 5, 7);
- auto t3 = __builtin_shufflevector(t2, t2, 0, 2) + __builtin_shufflevector(t2, t2, 1, 3);
- return t3[0] + t3[1];
+EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd16(
+ const VectorT& a) {
+ const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7) +
+ __builtin_shufflevector(a, a, 8, 9, 10, 11, 12, 13, 14, 15);
+ const auto t2 = __builtin_shufflevector(t1, t1, 0, 1, 2, 3) + __builtin_shufflevector(t1, t1, 4, 5, 6, 7);
+ const auto t3 = __builtin_shufflevector(t2, t2, 0, 1) + __builtin_shufflevector(t2, t2, 2, 3);
+ return {t3[0], t3[1]};
}
template <typename VectorT>
-EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceAdd8(const VectorT& a) {
- auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6) + __builtin_shufflevector(a, a, 1, 3, 5, 7);
- auto t2 = __builtin_shufflevector(t1, t1, 0, 2) + __builtin_shufflevector(t1, t1, 1, 3);
- return t2[0] + t2[1];
+EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceAdd8(
+ const VectorT& a) {
+ const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) + __builtin_shufflevector(a, a, 4, 5, 6, 7);
+ const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) + __builtin_shufflevector(t1, t1, 2, 3);
+ return {t2[0], t2[1]};
}
template <typename VectorT>
-EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceMul16(const VectorT& a) {
- auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6, 8, 10, 12, 14) *
- __builtin_shufflevector(a, a, 1, 3, 5, 7, 9, 11, 13, 15);
- auto t2 = __builtin_shufflevector(t1, t1, 0, 2, 4, 6) * __builtin_shufflevector(t1, t1, 1, 3, 5, 7);
- auto t3 = __builtin_shufflevector(t2, t2, 0, 2) * __builtin_shufflevector(t2, t2, 1, 3);
- return t3[0] * t3[1];
+EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul16(
+ const VectorT& a) {
+ const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3, 4, 5, 6, 7) *
+ __builtin_shufflevector(a, a, 8, 9, 10, 11, 12, 13, 14, 15);
+ const auto t2 = __builtin_shufflevector(t1, t1, 0, 1, 2, 3) * __builtin_shufflevector(t1, t1, 4, 5, 6, 7);
+ const auto t3 = __builtin_shufflevector(t2, t2, 0, 1) * __builtin_shufflevector(t2, t2, 2, 3);
+ return {t3[0], t3[1]};
}
template <typename VectorT>
-EIGEN_STRONG_INLINE scalar_type_of_vector_t<VectorT> ReduceMul8(const VectorT& a) {
- auto t1 = __builtin_shufflevector(a, a, 0, 2, 4, 6) * __builtin_shufflevector(a, a, 1, 3, 5, 7);
- auto t2 = __builtin_shufflevector(t1, t1, 0, 2) * __builtin_shufflevector(t1, t1, 1, 3);
- return t2[0] * t2[1];
+EIGEN_STRONG_INLINE std::pair<scalar_type_of_vector_t<VectorT>, scalar_type_of_vector_t<VectorT>> ReduceMul8(
+ const VectorT& a) {
+ const auto t1 = __builtin_shufflevector(a, a, 0, 1, 2, 3) * __builtin_shufflevector(a, a, 4, 5, 6, 7);
+ const auto t2 = __builtin_shufflevector(t1, t1, 0, 1) * __builtin_shufflevector(t1, t1, 2, 3);
+ return {t2[0], t2[1]};
}
} // namespace detail
template <>
EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
- return detail::ReduceAdd16(a);
+ float even, odd;
+ std::tie(even, odd) = detail::ReduceAdd16(a);
+ return even + odd;
}
template <>
EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
- return detail::ReduceAdd8(a);
+ double even, odd;
+ std::tie(even, odd) = detail::ReduceAdd8(a);
+ return even + odd;
}
template <>
EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
- return detail::ReduceMul16(a);
+ float even, odd;
+ std::tie(even, odd) = detail::ReduceMul16(a);
+ return even * odd;
}
template <>
EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
- return detail::ReduceMul8(a);
+ double even, odd;
+ std::tie(even, odd) = detail::ReduceMul8(a);
+ return even * odd;
}
+
+template <>
+EIGEN_STRONG_INLINE std::complex<float> predux<Packet8cf>(const Packet8cf& a) {
+ float re, im;
+ std::tie(re, im) = detail::ReduceAdd16(a.v);
+ return std::complex<float>(re, im);
+}
+
+template <>
+EIGEN_STRONG_INLINE std::complex<double> predux<Packet4cd>(const Packet4cd& a) {
+ double re, im;
+ std::tie(re, im) = detail::ReduceAdd8(a.v);
+ return std::complex<double>(re, im);
+}
+
+template <>
+EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet8cf>(const Packet8cf& a) {
+ const Packet4cf lower4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3, 4, 5, 6, 7));
+ const Packet4cf upper4 = Packet4cf(__builtin_shufflevector(a.v, a.v, 8, 9, 10, 11, 12, 13, 14, 15));
+ const Packet4cf prod4 = pmul<Packet4cf>(lower4, upper4);
+ const Packet2cf lower2 = Packet2cf(__builtin_shufflevector(prod4.v, prod4.v, 0, 1, 2, 3));
+ const Packet2cf upper2 = Packet2cf(__builtin_shufflevector(prod4.v, prod4.v, 4, 5, 6, 7));
+ const Packet2cf prod2 = pmul<Packet2cf>(lower2, upper2);
+ return prod2[0] * prod2[1];
+}
+
+template <>
+EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet4cd>(const Packet4cd& a) {
+ const Packet2cd lower2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 0, 1, 2, 3));
+ const Packet2cd upper2 = Packet2cd(__builtin_shufflevector(a.v, a.v, 4, 5, 6, 7));
+ const Packet2cd prod2 = pmul<Packet2cd>(lower2, upper2);
+ return prod2[0] * prod2[1];
+}
+
#endif
} // end namespace internal