| // This file is part of Eigen, a lightweight C++ template library |
| // for linear algebra. |
| // |
| // Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> |
| // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> |
| // |
| // This Source Code Form is subject to the terms of the Mozilla |
| // Public License v. 2.0. If a copy of the MPL was not distributed |
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H |
| #define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H |
| |
| // IWYU pragma: private |
| #include "./InternalHeaderCheck.h" |
| |
| namespace Eigen { |
| |
| namespace internal { |
| |
| /** \internal \returns the arcsin of \a a (coeff-wise) */ |
| template <typename Packet> |
| inline static Packet pasin(Packet a) { |
| return std::asin(a); |
| } |
| |
| #ifdef EIGEN_VECTORIZE_SSE |
| |
| template <> |
| EIGEN_DONT_INLINE Packet4f pasin(Packet4f x) { |
| EIGEN_DECLARE_CONST_Packet4f(half, 0.5); |
| EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5); |
| EIGEN_DECLARE_CONST_Packet4f(3half, 1.5); |
| |
| EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); |
| |
| EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654); |
| EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654 * 0.5); |
| |
| EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2); |
| EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2); |
| EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2); |
| EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2); |
| EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1); |
| |
| Packet4f a = pabs(x); // got the absolute value |
| |
| Packet4f sign_bit = _mm_and_ps(x, p4f_sign_mask); // extracted the sign bit |
| |
| Packet4f z1, z2; // will need them during computation |
| |
| // will compute the two branches for asin |
| // so first compare with half |
| |
| Packet4f branch_mask = _mm_cmpgt_ps(a, p4f_half); // this is to select which branch to take |
| // both will be taken, and finally results will be merged |
| // the branch for values >0.5 |
| |
| { |
| // the core series expansion |
| z1 = pmadd(p4f_minus_half, a, p4f_half); |
| Packet4f x1 = psqrt(z1); |
| Packet4f s1 = pmadd(p4f_asin1, z1, p4f_asin2); |
| Packet4f s2 = pmadd(s1, z1, p4f_asin3); |
| Packet4f s3 = pmadd(s2, z1, p4f_asin4); |
| Packet4f s4 = pmadd(s3, z1, p4f_asin5); |
| Packet4f temp = pmul(s4, z1); // not really a madd but a mul by z so that the next term can be a madd |
| z1 = pmadd(temp, x1, x1); |
| z1 = padd(z1, z1); |
| z1 = psub(p4f_pi_over_2, z1); |
| } |
| |
| { |
| // the core series expansion |
| Packet4f x2 = a; |
| z2 = pmul(x2, x2); |
| Packet4f s1 = pmadd(p4f_asin1, z2, p4f_asin2); |
| Packet4f s2 = pmadd(s1, z2, p4f_asin3); |
| Packet4f s3 = pmadd(s2, z2, p4f_asin4); |
| Packet4f s4 = pmadd(s3, z2, p4f_asin5); |
| Packet4f temp = pmul(s4, z2); // not really a madd but a mul by z so that the next term can be a madd |
| z2 = pmadd(temp, x2, x2); |
| } |
| |
| /* select the correct result from the two branch evaluations */ |
| z1 = _mm_and_ps(branch_mask, z1); |
| z2 = _mm_andnot_ps(branch_mask, z2); |
| Packet4f z = _mm_or_ps(z1, z2); |
| |
| /* update the sign */ |
| return _mm_xor_ps(z, sign_bit); |
| } |
| |
| #endif // EIGEN_VECTORIZE_SSE |
| |
| } // end namespace internal |
| |
| } // end namespace Eigen |
| |
| #endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H |