more avx predux_any
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index e280294..58fdb08 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -2025,6 +2025,15 @@
return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0;
}
+template <>
+EIGEN_STRONG_INLINE bool predux_any(const Packet8h& x) {
+ return _mm_movemask_epi8(x) != 0;
+}
+template <>
+EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& x) {
+ return _mm_movemask_epi8(x) != 0;
+}
+
EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8f, 8>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 0a167c8..0681a0a 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -1640,16 +1640,23 @@
}
template <>
-EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x) {
- Packet16i xi = _mm512_castps_si512(x);
- __mmask16 tmp = _mm512_test_epi32_mask(xi, xi);
- return !_mm512_kortestz(tmp, tmp);
+EIGEN_STRONG_INLINE bool predux_any(const Packet16f& a) {
+ return _mm512_reduce_or_epi32(_mm512_castps_si512(a)) != 0;
}
template <>
-EIGEN_STRONG_INLINE bool predux_any(const Packet16i& x) {
- __mmask16 tmp = _mm512_test_epi32_mask(x, x);
- return !_mm512_kortestz(tmp, tmp);
+EIGEN_STRONG_INLINE bool predux_any(const Packet16i& a) {
+ return _mm512_reduce_or_epi32(a) != 0;
+}
+
+template <>
+EIGEN_STRONG_INLINE bool predux_any(const Packet8d& a) {
+ return _mm512_reduce_or_epi64(_mm512_castpd_si512(a)) != 0;
+}
+
+template <>
+EIGEN_STRONG_INLINE bool predux_any(const Packet8l& a) {
+ return _mm512_reduce_or_epi64(a) != 0;
}
#define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \