more avx predux_any
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index e280294..58fdb08 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -2025,6 +2025,15 @@ return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0; } +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet8h& x) { + return _mm_movemask_epi8(x) != 0; +} +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& x) { + return _mm_movemask_epi8(x) != 0; +} + EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet8f, 8>& kernel) { __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]); __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 0a167c8..0681a0a 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -1640,16 +1640,23 @@ } template <> -EIGEN_STRONG_INLINE bool predux_any(const Packet16f& x) { - Packet16i xi = _mm512_castps_si512(x); - __mmask16 tmp = _mm512_test_epi32_mask(xi, xi); - return !_mm512_kortestz(tmp, tmp); +EIGEN_STRONG_INLINE bool predux_any(const Packet16f& a) { + return _mm512_reduce_or_epi32(_mm512_castps_si512(a)) != 0; } template <> -EIGEN_STRONG_INLINE bool predux_any(const Packet16i& x) { - __mmask16 tmp = _mm512_test_epi32_mask(x, x); - return !_mm512_kortestz(tmp, tmp); +EIGEN_STRONG_INLINE bool predux_any(const Packet16i& a) { + return _mm512_reduce_or_epi32(a) != 0; +} + +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet8d& a) { + return _mm512_reduce_or_epi64(_mm512_castpd_si512(a)) != 0; +} + +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet8l& a) { + return _mm512_reduce_or_epi64(a) != 0; } #define PACK_OUTPUT(OUTPUT, INPUT, INDEX, STRIDE) \