fine tuning in dot() and sum(), and prepare for the sparse versions...
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index bf28b9d..71e7030 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h
@@ -143,12 +143,13 @@ template<typename Derived1, typename Derived2, int Vectorization = ei_dot_traits<Derived1, Derived2>::Vectorization, - int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling + int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling, + int Storage = (ei_traits<Derived1>::Flags | ei_traits<Derived2>::Flags) & SparseBit > struct ei_dot_impl; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling> +struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling, IsDense> { typedef typename Derived1::Scalar Scalar; static Scalar run(const Derived1& v1, const Derived2& v2) @@ -163,12 +164,12 @@ }; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling> +struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling, IsDense> : public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime> {}; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> +struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling, IsDense> { typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; @@ -221,7 +222,7 @@ }; template<typename Derived1, typename Derived2> -struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling> +struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling, IsDense> { typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; @@ -258,20 +259,15 @@ typename ei_traits<Derived>::Scalar MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const { - typedef typename Derived::Nested Nested; - typedef typename OtherDerived::Nested OtherNested; - typedef typename ei_unref<Nested>::type _Nested; - typedef typename ei_unref<OtherNested>::type _OtherNested; - - EIGEN_STATIC_ASSERT_VECTOR_ONLY(_Nested) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) ei_assert(size() == other.size()); - return ei_dot_impl<_Nested, _OtherNested>::run(derived(), other.derived()); + return ei_dot_impl<Derived, OtherDerived>::run(derived(), other.derived()); } /** \returns the squared norm of *this, i.e. the dot product of *this with itself. @@ -287,7 +283,7 @@ template<typename Derived> EIGEN_DEPRECATED inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm2() const { - return ei_real(dot(*this)); + return ei_real((*this).cwise().abs2().sum()); } /** \returns the squared norm of *this, i.e. the dot product of *this with itself. @@ -299,7 +295,7 @@ template<typename Derived> inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const { - return ei_real(dot(*this)); + return ei_real((*this).cwise().abs2().sum()); } /** \returns the \em l2 norm of *this, i.e. the square root of the dot product of *this with itself.
diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h index 45ef622..33aa609 100644 --- a/Eigen/src/Core/Sum.h +++ b/Eigen/src/Core/Sum.h
@@ -42,7 +42,6 @@ enum { Vectorization = (int(Derived::Flags)&ActualPacketAccessBit) && (int(Derived::Flags)&LinearAccessBit) - && (int(Derived::SizeAtCompileTime)>2*PacketSize) ? LinearVectorization : NoVectorization }; @@ -155,12 +154,13 @@ template<typename Derived, int Vectorization = ei_sum_traits<Derived>::Vectorization, - int Unrolling = ei_sum_traits<Derived>::Unrolling + int Unrolling = ei_sum_traits<Derived>::Unrolling, + int Storage = ei_traits<Derived>::Flags & SparseBit > struct ei_sum_impl; template<typename Derived> -struct ei_sum_impl<Derived, NoVectorization, NoUnrolling> +struct ei_sum_impl<Derived, NoVectorization, NoUnrolling, IsDense> { typedef typename Derived::Scalar Scalar; static Scalar run(const Derived& mat) @@ -178,12 +178,12 @@ }; template<typename Derived> -struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling> +struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling, IsDense> : public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime> {}; template<typename Derived> -struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling> +struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling,IsDense> { typedef typename Derived::Scalar Scalar; typedef typename ei_packet_traits<Scalar>::type PacketScalar; @@ -228,7 +228,7 @@ }; template<typename Derived> -struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling> +struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling, IsDense> { typedef typename Derived::Scalar Scalar; static Scalar run(const Derived& mat)
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index f6e2dcb..f31304d 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h
@@ -73,7 +73,7 @@ // and this type has a custom operator new, then we want to honor this operator new! // so when we use C functions to allocate memory, we must be careful to call manually the constructor using // the special placement-new syntax. - return new(void_result) T[size]; + return ::new(void_result) T[size]; } else return new T[size]; // here we really want a new, not a malloc. Justification: if the user uses Eigen on