Fix a bug for pcmp_lt_or_nan and Add sqrt support for SVE
diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h
index 924f897..51bbfe0 100644
--- a/Eigen/src/Core/arch/SVE/PacketMath.h
+++ b/Eigen/src/Core/arch/SVE/PacketMath.h
@@ -358,7 +358,7 @@
HasCos = EIGEN_FAST_MATH,
HasLog = 1,
HasExp = 1,
- HasSqrt = 0,
+ HasSqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasErf = EIGEN_FAST_MATH
};
@@ -478,12 +478,12 @@
return svreinterpret_f32_u32(svdup_n_u32_z(svcmpeq_f32(svptrue_b32(), a, b), 0xffffffffu));
}
-// Do a predicate inverse (svnot_b_x) on the predicate resulted from the
+// Do a predicate inverse (svnot_b_z) on the predicate resulted from the
// greater/equal comparison (svcmpge_f32). Then fill a float vector with the
// active elements.
template <>
EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b) {
- return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_x(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
+ return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_z(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
}
template <>
@@ -660,6 +660,11 @@
return pldexp_generic(a, exponent);
}
+template <>
+EIGEN_STRONG_INLINE PacketXf psqrt<PacketXf>(const PacketXf& a) {
+ return svsqrt_f32_x(svptrue_b32(), a);
+}
+
} // namespace internal
} // namespace Eigen