Introducing "vectorized" byte on unpacket_traits structs
This is a preparation to a change on gebp_traits, where a new template
argument will be introduced to dictate the packet size, so it won't be
bound to the current/max packet size only anymore.
By having packet types defined early on gebp_traits, one has now to
act on packet types, not scalars anymore, for the enum values defined
on that class. One approach for reaching the vectorizable/size
properties one needs there could be getting the packet's scalar again
with unpacket_traits<>, then the size/Vectorizable enum entries from
packet_traits<>. It turns out guards like "#ifndef
EIGEN_VECTORIZE_AVX512" at AVX/PacketMath.h will hide smaller packet
variations of packet_traits<> for some types (and it makes sense to
keep that). In other words, one can't go back to the scalar and create
a new PacketType, as this will always lead to the maximum packet type
for the architecture.
The less costly/invasive solution for that, thus, is to add the
vectorizable info on every unpacket_traits struct as well.
diff --git a/Eigen/src/Core/arch/AVX/Complex.h b/Eigen/src/Core/arch/AVX/Complex.h
index 08d021b..e7e2a10 100644
--- a/Eigen/src/Core/arch/AVX/Complex.h
+++ b/Eigen/src/Core/arch/AVX/Complex.h
@@ -47,7 +47,7 @@
};
#endif
-template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32, vectorizable=true}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
@@ -255,7 +255,7 @@
};
#endif
-template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32, vectorizable=true}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index e5aeb63..e771c0f 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -117,14 +117,14 @@
typedef float type;
typedef Packet4f half;
typedef Packet8i integer_packet;
- enum {size=8, alignment=Aligned32};
+ enum {size=8, alignment=Aligned32, vectorizable=true};
};
template<> struct unpacket_traits<Packet4d> {
typedef double type;
typedef Packet2d half;
- enum {size=4, alignment=Aligned32};
+ enum {size=4, alignment=Aligned32, vectorizable=true};
};
-template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
+template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32, vectorizable=false}; };
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 247f898..569ee01 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -50,7 +50,8 @@
typedef std::complex<float> type;
enum {
size = 8,
- alignment=unpacket_traits<Packet16f>::alignment
+ alignment=unpacket_traits<Packet16f>::alignment,
+ vectorizable=true
};
typedef Packet4cf half;
};
@@ -237,7 +238,8 @@
typedef std::complex<double> type;
enum {
size = 4,
- alignment = unpacket_traits<Packet8d>::alignment
+ alignment = unpacket_traits<Packet8d>::alignment,
+ vectorizable=true
};
typedef Packet2cd half;
};
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 10284dd..9c31210 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -102,19 +102,19 @@
typedef float type;
typedef Packet8f half;
typedef Packet16i integer_packet;
- enum { size = 16, alignment=Aligned64 };
+ enum { size = 16, alignment=Aligned64, vectorizable=true };
};
template <>
struct unpacket_traits<Packet8d> {
typedef double type;
typedef Packet4d half;
- enum { size = 8, alignment=Aligned64 };
+ enum { size = 8, alignment=Aligned64, vectorizable=true };
};
template <>
struct unpacket_traits<Packet16i> {
typedef int type;
typedef Packet8i half;
- enum { size = 16, alignment=Aligned64 };
+ enum { size = 16, alignment=Aligned64, vectorizable=false };
};
template <>
diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h
index 3e66573..5404a62 100644
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -60,7 +60,7 @@
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -286,7 +286,7 @@
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from) { return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h
index d0ee93f..2c06003 100755
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -192,13 +192,13 @@
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16};
+ enum {size=4, alignment=Aligned16, vectorizable=true};
};
template<> struct unpacket_traits<Packet4i>
{
typedef int type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16};
+ enum {size=4, alignment=Aligned16, vectorizable=false};
};
inline std::ostream & operator <<(std::ostream & s, const Packet16uc & v)
@@ -916,7 +916,7 @@
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; };
inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
{
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index ddf37b9..eaba60e 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -90,8 +90,8 @@
};
-template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16}; typedef float4 half; };
-template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16}; typedef double2 half; };
+template<> struct unpacket_traits<float4> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef float4 half; };
+template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef double2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
return make_float4(from, from, from, from);
diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
index f3d721d..cc5c484 100644
--- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h
@@ -41,7 +41,7 @@
};
};
-template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16}; typedef half2 half; };
+template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef half2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
@@ -517,7 +517,7 @@
};
-template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; enum {size=16, alignment=Aligned32}; typedef Packet16h half; };
+template<> struct unpacket_traits<Packet16h> { typedef Eigen::half type; enum {size=16, alignment=Aligned32, vectorizable=true}; typedef Packet16h half; };
template<> EIGEN_STRONG_INLINE Packet16h pset1<Packet16h>(const Eigen::half& from) {
Packet16h result;
@@ -984,7 +984,7 @@
};
-template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16}; typedef Packet8h half; };
+template<> struct unpacket_traits<Packet8h> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true}; typedef Packet8h half; };
template<> EIGEN_STRONG_INLINE Packet8h pset1<Packet8h>(const Eigen::half& from) {
Packet8h result;
@@ -1329,7 +1329,7 @@
};
-template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16}; typedef Packet4h half; };
+template<> struct unpacket_traits<Packet4h> { typedef Eigen::half type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4h half; };
template<> EIGEN_STRONG_INLINE Packet4h pset1<Packet4h>(const Eigen::half& from) {
Packet4h result;
diff --git a/Eigen/src/Core/arch/MSA/Complex.h b/Eigen/src/Core/arch/MSA/Complex.h
index 9a45cf5..fa64d35 100644
--- a/Eigen/src/Core/arch/MSA/Complex.h
+++ b/Eigen/src/Core/arch/MSA/Complex.h
@@ -127,7 +127,7 @@
template <>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
- enum { size = 2, alignment = Aligned16 };
+ enum { size = 2, alignment = Aligned16, vectorizable=true };
typedef Packet2cf half;
};
@@ -500,7 +500,7 @@
template <>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
- enum { size = 1, alignment = Aligned16 };
+ enum { size = 1, alignment = Aligned16, vectorizable=true };
typedef Packet1cd half;
};
diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h
index 094c874..a97156a 100644
--- a/Eigen/src/Core/arch/MSA/PacketMath.h
+++ b/Eigen/src/Core/arch/MSA/PacketMath.h
@@ -117,14 +117,14 @@
template <>
struct unpacket_traits<Packet4f> {
typedef float type;
- enum { size = 4, alignment = Aligned16 };
+ enum { size = 4, alignment = Aligned16, vectorizable=true };
typedef Packet4f half;
};
template <>
struct unpacket_traits<Packet4i> {
typedef int32_t type;
- enum { size = 4, alignment = Aligned16 };
+ enum { size = 4, alignment = Aligned16, vectorizable=true };
typedef Packet4i half;
};
@@ -925,7 +925,7 @@
template <>
struct unpacket_traits<Packet2d> {
typedef double type;
- enum { size = 2, alignment = Aligned16 };
+ enum { size = 2, alignment = Aligned16, vectorizable=true };
typedef Packet2d half;
};
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 306a309..5e6de1f 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -62,7 +62,7 @@
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
@@ -328,7 +328,7 @@
};
};
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index ed3cec8..ca4f2bf 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -145,13 +145,13 @@
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16};
+ enum {size=4, alignment=Aligned16, vectorizable=true};
};
template<> struct unpacket_traits<Packet4i>
{
typedef int32_t type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16};
+ enum {size=4, alignment=Aligned16, vectorizable=true};
};
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return vdupq_n_f32(from); }
@@ -650,7 +650,7 @@
};
};
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; };
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return vdupq_n_f64(from); }
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
index 0f89603..911fe06 100644
--- a/Eigen/src/Core/arch/SSE/Complex.h
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -50,7 +50,7 @@
};
#endif
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
@@ -280,7 +280,7 @@
};
#endif
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 3e7a75b..4c7dc5b 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -166,17 +166,17 @@
typedef float type;
typedef Packet4f half;
typedef Packet4i integer_packet;
- enum {size=4, alignment=Aligned16};
+ enum {size=4, alignment=Aligned16, vectorizable=true};
};
template<> struct unpacket_traits<Packet2d> {
typedef double type;
typedef Packet2d half;
- enum {size=2, alignment=Aligned16};
+ enum {size=2, alignment=Aligned16, vectorizable=true};
};
template<> struct unpacket_traits<Packet4i> {
typedef int type;
typedef Packet4i half;
- enum {size=4, alignment=Aligned16};
+ enum {size=4, alignment=Aligned16, vectorizable=false};
};
#ifndef EIGEN_VECTORIZE_AVX
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
index c1da40d..294cb10 100644
--- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h
+++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -88,7 +88,7 @@
#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\
template<> struct unpacket_traits<packet_type> {\
typedef unpacket_type type;\
- enum {size=lengths, alignment=Aligned16};\
+ enum {size=lengths, alignment=Aligned16, vectorizable=true};\
typedef packet_type half;\
};
SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h
index 95aba42..167c3ee 100644
--- a/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/Eigen/src/Core/arch/ZVector/Complex.h
@@ -91,8 +91,8 @@
};
};
-template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16}; typedef Packet2cf half; };
-template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16}; typedef Packet1cd half; };
+template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2cf half; };
+template<> struct unpacket_traits<Packet1cd> { typedef std::complex<double> type; enum {size=1, alignment=Aligned16, vectorizable=true}; typedef Packet1cd half; };
/* Forward declaration */
EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet2cf,2>& kernel);
diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h
index 0b37f49..c8e90f1 100755
--- a/Eigen/src/Core/arch/ZVector/PacketMath.h
+++ b/Eigen/src/Core/arch/ZVector/PacketMath.h
@@ -239,9 +239,9 @@
};
};
-template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
-template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16}; typedef Packet4f half; };
-template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
+template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4i half; };
+template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true}; typedef Packet4f half; };
+template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef Packet2d half; };
/* Forward declaration */
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 836ff47..91c2e42 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -184,7 +184,8 @@
enum
{
size = 1,
- alignment = 1
+ alignment = 1,
+ vectorizable = false
};
};