Merged in jiayq/eigen (pull request PR-159) Modifications to the tensor benchmarks to allow compilation in a standalone fashion.
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 8bd73b8..7776948 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h
@@ -994,7 +994,7 @@ CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), - Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))), + Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit, Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized };
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index f94629e..39820fd 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -32,8 +32,8 @@ // we still want to handle the case when the result type is different. typedef typename result_of< BinaryOp( - typename Lhs::Scalar, - typename Rhs::Scalar + const typename Lhs::Scalar&, + const typename Rhs::Scalar& ) >::type Scalar; typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 5a809cf..22db783 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -19,7 +19,7 @@ : traits<XprType> { typedef typename result_of< - UnaryOp(typename XprType::Scalar) + UnaryOp(const typename XprType::Scalar&) >::type Scalar; typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index 5a7db2b..a9252ed 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h
@@ -18,7 +18,7 @@ : traits<MatrixType> { typedef typename result_of< - ViewOp(typename traits<MatrixType>::Scalar) + ViewOp(const typename traits<MatrixType>::Scalar&) >::type Scalar; typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 8ad51ba..4c7d1d8 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h
@@ -75,6 +75,7 @@ HasCosh = 0, HasTanh = 0, HasLGamma = 0, + HasDiGamma = 0, HasErf = 0, HasErfc = 0, @@ -439,6 +440,10 @@ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } +/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */ +template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); } + /** \internal \returns the erf(\a a) (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perf(const Packet& a) { using numext::erf; return erf(a); }
diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 62fec70..396da8e 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h
@@ -50,6 +50,7 @@ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index e47070a..e87b60f 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h
@@ -748,9 +748,9 @@ } //MSVC defines a _isnan builtin function, but for double only -EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } -EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } -EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; } EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
diff --git a/Eigen/src/Core/SpecialFunctions.h b/Eigen/src/Core/SpecialFunctions.h index d43cf23..9f89e18 100644 --- a/Eigen/src/Core/SpecialFunctions.h +++ b/Eigen/src/Core/SpecialFunctions.h
@@ -13,79 +13,349 @@ namespace Eigen { namespace internal { +// Parts of this code are based on the Cephes Math Library. +// +// Cephes Math Library Release 2.8: June, 2000 +// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier +// +// Permission has been kindly provided by the original author +// to incorporate the Cephes software into the Eigen codebase: +// +// From: Stephen Moshier +// To: Eugene Brevdo +// Subject: Re: Permission to wrap several cephes functions in Eigen +// +// Hello Eugene, +// +// Thank you for writing. +// +// If your licensing is similar to BSD, the formal way that has been +// handled is simply to add a statement to the effect that you are incorporating +// the Cephes software by permission of the author. +// +// Good luck with your project, +// Steve + +namespace cephes { + +/* polevl (modified for Eigen) + * + * Evaluate polynomial + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N+1]; + * + * y = polevl<decltype(x), N>( x, coef); + * + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + * + * The Eigen implementation is templatized. For best speed, store + * coef as a const array (constexpr), e.g. + * + * const double coef[] = {1.0, 2.0, 3.0, ...}; + * + */ +template <typename Scalar, int N> +struct polevl { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static Scalar run(const Scalar x, const Scalar coef[]) { + EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N]; + } +}; + +template <typename Scalar> +struct polevl<Scalar, 0> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + static Scalar run(const Scalar, const Scalar coef[]) { + return coef[0]; + } +}; + +} // end namespace cephes + /**************************************************************************** * Implementation of lgamma * ****************************************************************************/ -template<typename Scalar> -struct lgamma_impl -{ +template <typename Scalar> +struct lgamma_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar&) - { + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), THIS_TYPE_IS_NOT_SUPPORTED); return Scalar(0); } }; -template<typename Scalar> -struct lgamma_retval -{ +template <typename Scalar> +struct lgamma_retval { typedef Scalar type; }; #ifdef EIGEN_HAS_C99_MATH -template<> -struct lgamma_impl<float> -{ +template <> +struct lgamma_impl<float> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); } + static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); } }; -template<> -struct lgamma_impl<double> -{ +template <> +struct lgamma_impl<double> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); } + static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); } }; #endif /**************************************************************************** + * Implementation of digamma (psi) * + ****************************************************************************/ + +#ifdef EIGEN_HAS_C99_MATH + +/* + * + * Polynomial evaluation helper for the Psi (digamma) function. + * + * digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for + * input Scalar s, assuming s is above 10.0. + * + * If s is above a certain threshold for the given Scalar type, zero + * is returned. Otherwise the polynomial is evaluated with enough + * coefficients for results matching Scalar machine precision. + * + * + */ +template <typename Scalar> +struct digamma_impl_maybe_poly { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { + EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), + THIS_TYPE_IS_NOT_SUPPORTED); + return Scalar(0); + } +}; + + +template <> +struct digamma_impl_maybe_poly<float> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE float run(const float s) { + const float A[] = { + -4.16666666666666666667E-3, + 3.96825396825396825397E-3, + -8.33333333333333333333E-3, + 8.33333333333333333333E-2 + }; + + float z; + if (s < 1.0e8f) { + z = 1.0f / (s * s); + return z * cephes::polevl<float, 3>::run(z, A); + } else return 0.0f; + } +}; + +template <> +struct digamma_impl_maybe_poly<double> { + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE double run(const double s) { + const double A[] = { + 8.33333333333333333333E-2, + -2.10927960927960927961E-2, + 7.57575757575757575758E-3, + -4.16666666666666666667E-3, + 3.96825396825396825397E-3, + -8.33333333333333333333E-3, + 8.33333333333333333333E-2 + }; + + double z; + if (s < 1.0e17) { + z = 1.0 / (s * s); + return z * cephes::polevl<double, 6>::run(z, A); + } + else return 0.0; + } +}; + +#endif // EIGEN_HAS_C99_MATH + +template <typename Scalar> +struct digamma_retval { + typedef Scalar type; +}; + +#ifdef EIGEN_HAS_C99_MATH +template <typename Scalar> +struct digamma_impl { + EIGEN_DEVICE_FUNC + static Scalar run(Scalar x) { + /* + * + * Psi (digamma) function (modified for Eigen) + * + * + * SYNOPSIS: + * + * double x, y, psi(); + * + * y = psi( x ); + * + * + * DESCRIPTION: + * + * d - + * psi(x) = -- ln | (x) + * dx + * + * is the logarithmic derivative of the gamma function. + * For integer x, + * n-1 + * - + * psi(n) = -EUL + > 1/k. + * - + * k=1 + * + * If x is negative, it is transformed to a positive argument by the + * reflection formula psi(1-x) = psi(x) + pi cot(pi x). + * For general positive x, the argument is made greater than 10 + * using the recurrence psi(x+1) = psi(x) + 1/x. + * Then the following asymptotic expansion is applied: + * + * inf. B + * - 2k + * psi(x) = log(x) - 1/2x - > ------- + * - 2k + * k=1 2k x + * + * where the B2k are Bernoulli numbers. + * + * ACCURACY (float): + * Relative error (except absolute when |psi| < 1): + * arithmetic domain # trials peak rms + * IEEE 0,30 30000 1.3e-15 1.4e-16 + * IEEE -30,0 40000 1.5e-15 2.2e-16 + * + * ACCURACY (double): + * Absolute error, relative when |psi| > 1 : + * arithmetic domain # trials peak rms + * IEEE -33,0 30000 8.2e-7 1.2e-7 + * IEEE 0,33 100000 7.3e-7 7.7e-8 + * + * ERROR MESSAGES: + * message condition value returned + * psi singularity x integer <=0 INFINITY + */ + + Scalar p, q, nz, s, w, y; + bool negative; + + const Scalar maxnum = std::numeric_limits<Scalar>::infinity(); + const Scalar m_pi = 3.14159265358979323846; + + negative = 0; + nz = 0.0; + + const Scalar zero = 0.0; + const Scalar one = 1.0; + const Scalar half = 0.5; + + if (x <= zero) { + negative = one; + q = x; + p = ::floor(q); + if (p == q) { + return maxnum; + } + /* Remove the zeros of tan(m_pi x) + * by subtracting the nearest integer from x + */ + nz = q - p; + if (nz != half) { + if (nz > half) { + p += one; + nz = q - p; + } + nz = m_pi / ::tan(m_pi * nz); + } + else { + nz = zero; + } + x = one - x; + } + + /* use the recurrence psi(x+1) = psi(x) + 1/x. */ + s = x; + w = zero; + while (s < Scalar(10)) { + w += one / s; + s += one; + } + + y = digamma_impl_maybe_poly<Scalar>::run(s); + + y = ::log(s) - (half / s) - y - w; + + return (negative) ? y - nz : y; + } +}; + +#endif // EIGEN_HAS_C99_MATH + +/**************************************************************************** * Implementation of erf * ****************************************************************************/ -template<typename Scalar> -struct erf_impl -{ +template <typename Scalar> +struct erf_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar&) - { + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), THIS_TYPE_IS_NOT_SUPPORTED); return Scalar(0); } }; -template<typename Scalar> -struct erf_retval -{ +template <typename Scalar> +struct erf_retval { typedef Scalar type; }; #ifdef EIGEN_HAS_C99_MATH -template<> -struct erf_impl<float> -{ +template <> +struct erf_impl<float> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); } + static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); } }; -template<> -struct erf_impl<double> -{ +template <> +struct erf_impl<double> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); } + static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); } }; #endif // EIGEN_HAS_C99_MATH @@ -93,35 +363,30 @@ * Implementation of erfc * ****************************************************************************/ -template<typename Scalar> -struct erfc_impl -{ +template <typename Scalar> +struct erfc_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE Scalar run(const Scalar&) - { + static EIGEN_STRONG_INLINE Scalar run(const Scalar) { EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), THIS_TYPE_IS_NOT_SUPPORTED); return Scalar(0); } }; -template<typename Scalar> -struct erfc_retval -{ +template <typename Scalar> +struct erfc_retval { typedef Scalar type; }; #ifdef EIGEN_HAS_C99_MATH -template<> -struct erfc_impl<float> -{ +template <> +struct erfc_impl<float> { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } }; -template<> -struct erfc_impl<double> -{ +template <> +struct erfc_impl<double> { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } }; @@ -129,27 +394,29 @@ } // end namespace internal - namespace numext { -template<typename Scalar> -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x) -{ +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) + lgamma(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); } -template<typename Scalar> -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) -{ +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar) + digamma(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x); +} + +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) + erf(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); } -template<typename Scalar> -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x) -{ +template <typename Scalar> +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) + erfc(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); }
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 95bcaa8..1938911 100755 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h
@@ -124,7 +124,7 @@ template <typename BinaryOp, typename Scalar> struct member_redux { typedef typename result_of< - BinaryOp(Scalar,Scalar) + BinaryOp(const Scalar&,const Scalar&) >::type result_type; template<typename _Scalar, int Size> struct Cost { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index ecd5c44..a2c06a8 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -79,6 +79,20 @@ } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 pdigamma<float4>(const float4& a) +{ + using numext::digamma; + return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 pdigamma<double2>(const double2& a) +{ + using numext::digamma; + return make_double2(digamma(a.x), digamma(a.y)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 perf<float4>(const float4& a) { return make_float4(erf(a.x), erf(a.y), erf(a.z), erf(a.w));
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 9d57731..d3d9f91 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -40,6 +40,7 @@ HasSqrt = 1, HasRsqrt = 1, HasLGamma = 1, + HasDiGamma = 1, HasErf = 1, HasErfc = 1, @@ -63,6 +64,7 @@ HasSqrt = 1, HasRsqrt = 1, HasLGamma = 1, + HasDiGamma = 1, HasErf = 1, HasErfc = 1,
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 01727f2..897ab04 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -428,6 +428,28 @@ }; /** \internal + * \brief Template functor to compute psi, the derivative of lgamma of a scalar. + * \sa class CwiseUnaryOp, Cwise::digamma() + */ +template<typename Scalar> struct scalar_digamma_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op) + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { + using numext::digamma; return digamma(a); + } + typedef typename packet_traits<Scalar>::type Packet; + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); } +}; +template<typename Scalar> +struct functor_traits<scalar_digamma_op<Scalar> > +{ + enum { + // Guesstimate + Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost, + PacketAccess = packet_traits<Scalar>::HasDiGamma + }; +}; + +/** \internal * \brief Template functor to compute the Gauss error function of a * scalar * \sa class CwiseUnaryOp, Cwise::erf()
diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index 7472329..91c61fc 100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -15,10 +15,11 @@ // 4522 - 'class' : multiple assignment operators specified // 4700 - uninitialized local variable 'xyz' used // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow + // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning) #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 ) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 823e077..415bc48 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h
@@ -526,9 +526,9 @@ template<int Alignment, typename Scalar, typename Index> EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) { - static const Index ScalarSize = sizeof(Scalar); - static const Index AlignmentSize = Alignment / ScalarSize; - static const Index AlignmentMask = AlignmentSize-1; + const Index ScalarSize = sizeof(Scalar); + const Index AlignmentSize = Alignment / ScalarSize; + const Index AlignmentMask = AlignmentSize-1; if(AlignmentSize<=1) {
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index e3e6d76..b01437d 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h
@@ -257,7 +257,7 @@ struct has_tr1_result {int a[3];}; template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)> -struct unary_result_of_select {typedef ArgType type;}; +struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;}; template<typename Func, typename ArgType> struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;}; @@ -279,7 +279,7 @@ }; template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)> -struct binary_result_of_select {typedef ArgType0 type;}; +struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;}; template<typename Func, typename ArgType0, typename ArgType1> struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 01432e2..2ce7414 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -22,6 +22,7 @@ typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType; typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType; typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType; +typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType; typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType; typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType; typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType; @@ -318,6 +319,16 @@ return LgammaReturnType(derived()); } +/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma). + * + * \sa cos(), sin(), tan() + */ +inline const DigammaReturnType +digamma() const +{ + return DigammaReturnType(derived()); +} + /** \returns an expression of the coefficient-wise Gauss error * function of *this. *
diff --git a/test/array.cpp b/test/array.cpp index 6adedfb..96aef31 100644 --- a/test/array.cpp +++ b/test/array.cpp
@@ -219,6 +219,7 @@ VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); #ifdef EIGEN_HAS_C99_MATH VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); + VERIFY_IS_APPROX(m1.digamma(), digamma(m1)); VERIFY_IS_APPROX(m1.erf(), erf(m1)); VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); #endif // EIGEN_HAS_C99_MATH @@ -309,7 +310,22 @@ s1 += Scalar(tiny); m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); - + + // check special functions (comparing against numpy implementation) +#ifdef EIGEN_HAS_C99_MATH + if (!NumTraits<Scalar>::IsComplex) { + VERIFY_IS_APPROX(numext::digamma(Scalar(1)), RealScalar(-0.5772156649015329)); + VERIFY_IS_APPROX(numext::digamma(Scalar(1.5)), RealScalar(0.03648997397857645)); + VERIFY_IS_APPROX(numext::digamma(Scalar(4)), RealScalar(1.2561176684318)); + VERIFY_IS_APPROX(numext::digamma(Scalar(-10.5)), RealScalar(2.398239129535781)); + VERIFY_IS_APPROX(numext::digamma(Scalar(10000.5)), RealScalar(9.210340372392849)); + VERIFY_IS_EQUAL(numext::digamma(Scalar(0)), + std::numeric_limits<RealScalar>::infinity()); + VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)), + std::numeric_limits<RealScalar>::infinity()); + } +#endif // EIGEN_HAS_C99_MATH + // check inplace transpose m3 = m1; m3.transposeInPlace(); @@ -336,8 +352,6 @@ Array<RealScalar, -1, -1> m3(rows, cols); - Scalar s1 = internal::random<Scalar>(); - for (Index i = 0; i < m.rows(); ++i) for (Index j = 0; j < m.cols(); ++j) m2(i,j) = sqrt(m1(i,j)); @@ -410,6 +424,7 @@ VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1); // scalar by array division + Scalar s1 = internal::random<Scalar>(); const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon()); s1 += Scalar(tiny); m1 += ArrayType::Constant(rows,cols,Scalar(tiny));
diff --git a/test/vectorwiseop.cpp b/test/vectorwiseop.cpp index 87476f9..3cc1987 100644 --- a/test/vectorwiseop.cpp +++ b/test/vectorwiseop.cpp
@@ -210,6 +210,9 @@ VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm<Infinity>()); VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm<Infinity>()); + // regression for bug 1158 + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum().x(), m1.col(0).cwiseAbs().sum()); + // test normalized m2 = m1.colwise().normalized(); VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 392acf3..cca716d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -129,6 +129,12 @@ } EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> + digamma() const { + return unaryExpr(internal::scalar_digamma_op<Scalar>()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> erf() const { return unaryExpr(internal::scalar_erf_op<Scalar>());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 09ee0c2..7a5dfbf 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -346,7 +346,7 @@ static const bool HasOptimizedImplementation = false; static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { - assert(false && "Not implemented"); + eigen_assert(false && "Not implemented"); } }; @@ -356,7 +356,7 @@ static const bool HasOptimizedImplementation = false; static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { - assert(false && "Not implemented"); + eigen_assert(false && "Not implemented"); } };
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index 98631fc..18a916e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
@@ -105,7 +105,6 @@ EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions) { - eigen_assert(size >= 1); const Index currentSz = internal::array_prod(m_dimensions); if(size != currentSz) {
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 5c383aa..d16c426 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt
@@ -147,25 +147,27 @@ ei_add_test(cxx11_tensor_sugar "-std=c++0x") ei_add_test(cxx11_tensor_fft "-std=c++0x") ei_add_test(cxx11_tensor_ifft "-std=c++0x") + ei_add_test(cxx11_tensor_empty "-std=c++0x") endif() # These tests needs nvcc -find_package(CUDA 7) +find_package(CUDA 7.0) if(CUDA_FOUND) set(CUDA_PROPAGATE_HOST_FLAGS OFF) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) endif() + set(CUDA_NVCC_FLAGS "-std=c++11 -arch compute_30") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") - set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") + set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") - ei_add_test(cxx11_tensor_device "-std=c++11") - ei_add_test(cxx11_tensor_cuda "-std=c++11") - ei_add_test(cxx11_tensor_contract_cuda "-std=c++11") - ei_add_test(cxx11_tensor_reduction_cuda "-std=c++11") - ei_add_test(cxx11_tensor_random_cuda "-std=c++11") - ei_add_test(cxx11_tensor_argmax_cuda "-std=c++11 -I/opt-cuda-7.0/include") + ei_add_test(cxx11_tensor_device) + ei_add_test(cxx11_tensor_cuda) + ei_add_test(cxx11_tensor_contract_cuda) + ei_add_test(cxx11_tensor_reduction_cuda) + ei_add_test(cxx11_tensor_random_cuda) + ei_add_test(cxx11_tensor_argmax_cuda) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) -endif(CUDA_FOUND) +endif()
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 49e1894..79f1c53 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -131,8 +131,7 @@ cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); - cudaStream_t stream; - assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113); @@ -189,8 +188,7 @@ cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); - cudaStream_t stream; - assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_t_left(d_t_left, 6, 50, 3, 31); @@ -214,7 +212,7 @@ for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4) { - cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " << m_result.data()[i] << endl; + std::cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; assert(false); } } @@ -243,8 +241,7 @@ cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); - cudaStream_t stream; - assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input, 74,37,11,137); @@ -293,8 +290,7 @@ cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); - cudaStream_t stream; - assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_input(d_input,74,9,11,7); @@ -343,8 +339,7 @@ cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); - cudaStream_t stream; - assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_input(d_input, 7,9,11,74); @@ -394,8 +389,7 @@ cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); - cudaStream_t stream; - assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input,74,37,11,137); @@ -455,8 +449,7 @@ cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); - cudaStream_t stream; - assert(cudaStreamCreate(&stream) == cudaSuccess); + Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_input(d_input,74,37,11,137,17); @@ -644,10 +637,6 @@ CALL_SUBTEST(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs CALL_SUBTEST(test_cuda_erfc<float>(0.01f)); CALL_SUBTEST(test_cuda_erfc<float>(0.001f)); - CALL_SUBTEST(test_cuda_tanh<double>(1.0)); - CALL_SUBTEST(test_cuda_tanh<double>(100.0)); - CALL_SUBTEST(test_cuda_tanh<double>(0.01)); - CALL_SUBTEST(test_cuda_tanh<double>(0.001)); CALL_SUBTEST(test_cuda_lgamma<double>(1.0)); CALL_SUBTEST(test_cuda_lgamma<double>(100.0)); CALL_SUBTEST(test_cuda_lgamma<double>(0.01));
diff --git a/unsupported/test/cxx11_tensor_empty.cpp b/unsupported/test/cxx11_tensor_empty.cpp new file mode 100644 index 0000000..ca03a29 --- /dev/null +++ b/unsupported/test/cxx11_tensor_empty.cpp
@@ -0,0 +1,36 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + + +static void test_empty_tensor() +{ + Tensor<float, 2> source; + Tensor<float, 2> tgt1 = source; + Tensor<float, 2> tgt2; + tgt2 = source; +} + +static void test_empty_fixed_size_tensor() +{ + TensorFixedSize<float, Sizes<0>> source; + TensorFixedSize<float, Sizes<0>> tgt1 = source; + TensorFixedSize<float, Sizes<0>> tgt2; + tgt2 = source; +} + + +void test_cxx11_tensor_empty() +{ + CALL_SUBTEST(test_empty_tensor()); + CALL_SUBTEST(test_empty_fixed_size_tensor()); +}
diff --git a/unsupported/test/cxx11_tensor_reduction.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu similarity index 100% rename from unsupported/test/cxx11_tensor_reduction.cu rename to unsupported/test/cxx11_tensor_reduction_cuda.cu