|  | /* | 
|  | Copyright (c) 2011, Intel Corporation. All rights reserved. | 
|  | Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr> | 
|  |  | 
|  | Redistribution and use in source and binary forms, with or without modification, | 
|  | are permitted provided that the following conditions are met: | 
|  |  | 
|  | * Redistributions of source code must retain the above copyright notice, this | 
|  | list of conditions and the following disclaimer. | 
|  | * Redistributions in binary form must reproduce the above copyright notice, | 
|  | this list of conditions and the following disclaimer in the documentation | 
|  | and/or other materials provided with the distribution. | 
|  | * Neither the name of Intel Corporation nor the names of its contributors may | 
|  | be used to endorse or promote products derived from this software without | 
|  | specific prior written permission. | 
|  |  | 
|  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | 
|  | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
|  | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
|  | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | 
|  | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
|  | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
|  | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | 
|  | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
|  | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  |  | 
|  | ******************************************************************************** | 
|  | *   Content : Eigen bindings to Intel(R) MKL | 
|  | *   MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() | 
|  | ******************************************************************************** | 
|  | */ | 
|  |  | 
|  | #ifndef EIGEN_ASSIGN_VML_H | 
|  | #define EIGEN_ASSIGN_VML_H | 
|  |  | 
|  | // IWYU pragma: private | 
|  | #include "./InternalHeaderCheck.h" | 
|  |  | 
|  | namespace Eigen { | 
|  |  | 
|  | namespace internal { | 
|  |  | 
|  | template <typename Dst, typename Src> | 
|  | class vml_assign_traits { | 
|  | private: | 
|  | enum { | 
|  | DstHasDirectAccess = Dst::Flags & DirectAccessBit, | 
|  | SrcHasDirectAccess = Src::Flags & DirectAccessBit, | 
|  | StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), | 
|  | InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) | 
|  | : int(Dst::Flags) & RowMajorBit ? int(Dst::ColsAtCompileTime) | 
|  | : int(Dst::RowsAtCompileTime), | 
|  | InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) | 
|  | : int(Dst::Flags) & RowMajorBit ? int(Dst::MaxColsAtCompileTime) | 
|  | : int(Dst::MaxRowsAtCompileTime), | 
|  | MaxSizeAtCompileTime = Dst::SizeAtCompileTime, | 
|  |  | 
|  | MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && | 
|  | Src::InnerStrideAtCompileTime == 1 && Dst::InnerStrideAtCompileTime == 1, | 
|  | MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), | 
|  | VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, | 
|  | LargeEnough = VmlSize == Dynamic || VmlSize >= EIGEN_MKL_VML_THRESHOLD | 
|  | }; | 
|  |  | 
|  | public: | 
|  | enum { EnableVml = MightEnableVml && LargeEnough, Traversal = MightLinearize ? LinearTraversal : DefaultTraversal }; | 
|  | }; | 
|  |  | 
|  | #define EIGEN_PP_EXPAND(ARG) ARG | 
|  | #if !defined(EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) | 
|  | #define EIGEN_VMLMODE_EXPAND_xLA , VML_HA | 
|  | #else | 
|  | #define EIGEN_VMLMODE_EXPAND_xLA , VML_LA | 
|  | #endif | 
|  |  | 
|  | #define EIGEN_VMLMODE_EXPAND_x_ | 
|  |  | 
|  | #define EIGEN_VMLMODE_PREFIX_xLA vm | 
|  | #define EIGEN_VMLMODE_PREFIX_x_ v | 
|  | #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x, VMLMODE) | 
|  |  | 
|  | #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                      \ | 
|  | template <typename DstXprType, typename SrcXprNested>                                                    \ | 
|  | struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>,              \ | 
|  | assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense,                                          \ | 
|  | std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> {            \ | 
|  | typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType;                       \ | 
|  | static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \ | 
|  | resize_if_allowed(dst, src, func);                                                                   \ | 
|  | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                  \ | 
|  | if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == LinearTraversal) {                     \ | 
|  | VMLOP(dst.size(), (const VMLTYPE *)src.nestedExpression().data(),                                  \ | 
|  | (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE));                     \ | 
|  | } else {                                                                                             \ | 
|  | const Index outerSize = dst.outerSize();                                                           \ | 
|  | for (Index outer = 0; outer < outerSize; ++outer) {                                                \ | 
|  | const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer, 0))         \ | 
|  | : &(src.nestedExpression().coeffRef(0, outer));        \ | 
|  | EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer));     \ | 
|  | VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr,                                                 \ | 
|  | (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE));                      \ | 
|  | }                                                                                                  \ | 
|  | }                                                                                                    \ | 
|  | }                                                                                                      \ | 
|  | }; | 
|  |  | 
|  | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)                                                \ | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), s##VMLOP), float, float, VMLMODE) \ | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), d##VMLOP), double, double, VMLMODE) | 
|  |  | 
|  | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)                                   \ | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), c##VMLOP), scomplex, \ | 
|  | MKL_Complex8, VMLMODE)                                                 \ | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), z##VMLOP), dcomplex, \ | 
|  | MKL_Complex16, VMLMODE) | 
|  |  | 
|  | #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)  \ | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) | 
|  |  | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) | 
|  | // EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs,   Abs,    _) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) | 
|  |  | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) | 
|  | EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(cbrt, Cbrt, _) | 
|  |  | 
|  | #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)                        \ | 
|  | template <typename DstXprType, typename SrcXprNested, typename Plain>                                    \ | 
|  | struct Assignment<DstXprType,                                                                            \ | 
|  | CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested,               \ | 
|  | const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>>,   \ | 
|  | assign_op<EIGENTYPE, EIGENTYPE>, Dense2Dense,                                          \ | 
|  | std::enable_if_t<vml_assign_traits<DstXprType, SrcXprNested>::EnableVml>> {            \ | 
|  | typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE, EIGENTYPE>, SrcXprNested,                       \ | 
|  | const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>, Plain>>            \ | 
|  | SrcXprType;                                                                                        \ | 
|  | static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE, EIGENTYPE> &func) { \ | 
|  | resize_if_allowed(dst, src, func);                                                                   \ | 
|  | eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());                                  \ | 
|  | VMLTYPE exponent = reinterpret_cast<const VMLTYPE &>(src.rhs().functor().m_other);                   \ | 
|  | if (vml_assign_traits<DstXprType, SrcXprNested>::Traversal == LinearTraversal) {                     \ | 
|  | VMLOP(dst.size(), (const VMLTYPE *)src.lhs().data(), exponent,                                     \ | 
|  | (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE));                     \ | 
|  | } else {                                                                                             \ | 
|  | const Index outerSize = dst.outerSize();                                                           \ | 
|  | for (Index outer = 0; outer < outerSize; ++outer) {                                                \ | 
|  | const EIGENTYPE *src_ptr =                                                                       \ | 
|  | src.IsRowMajor ? &(src.lhs().coeffRef(outer, 0)) : &(src.lhs().coeffRef(0, outer));          \ | 
|  | EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer));     \ | 
|  | VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, exponent,                                       \ | 
|  | (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE));                      \ | 
|  | }                                                                                                  \ | 
|  | }                                                                                                    \ | 
|  | }                                                                                                      \ | 
|  | }; | 
|  |  | 
|  | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) | 
|  | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) | 
|  | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) | 
|  | EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) | 
|  |  | 
|  | }  // end namespace internal | 
|  |  | 
|  | }  // end namespace Eigen | 
|  |  | 
|  | #endif  // EIGEN_ASSIGN_VML_H |