fine tuning in dot() and sum(), and prepare for the sparse versions...

diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index bf28b9d..71e7030 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h

@@ -143,12 +143,13 @@
 
 template<typename Derived1, typename Derived2,
          int Vectorization = ei_dot_traits<Derived1, Derived2>::Vectorization,
-         int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling
+         int Unrolling = ei_dot_traits<Derived1, Derived2>::Unrolling,
+         int Storage = (ei_traits<Derived1>::Flags | ei_traits<Derived2>::Flags) & SparseBit
 >
 struct ei_dot_impl;
 
 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
+struct ei_dot_impl<Derived1, Derived2, NoVectorization, NoUnrolling, IsDense>
 {
   typedef typename Derived1::Scalar Scalar;
   static Scalar run(const Derived1& v1, const Derived2& v2)
@@ -163,12 +164,12 @@
 };
 
 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling>
+struct ei_dot_impl<Derived1, Derived2, NoVectorization, CompleteUnrolling, IsDense>
   : public ei_dot_novec_unroller<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
 {};
 
 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
+struct ei_dot_impl<Derived1, Derived2, LinearVectorization, NoUnrolling, IsDense>
 {
   typedef typename Derived1::Scalar Scalar;
   typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -221,7 +222,7 @@
 };
 
 template<typename Derived1, typename Derived2>
-struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling>
+struct ei_dot_impl<Derived1, Derived2, LinearVectorization, CompleteUnrolling, IsDense>
 {
   typedef typename Derived1::Scalar Scalar;
   typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -258,20 +259,15 @@
 typename ei_traits<Derived>::Scalar
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
-  typedef typename Derived::Nested Nested;
-  typedef typename OtherDerived::Nested OtherNested;
-  typedef typename ei_unref<Nested>::type _Nested;
-  typedef typename ei_unref<OtherNested>::type _OtherNested;
-
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(_Nested)
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(_OtherNested)
-  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(_Nested,_OtherNested)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
   EIGEN_STATIC_ASSERT((ei_is_same_type<Scalar, typename OtherDerived::Scalar>::ret),
     YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
 
   ei_assert(size() == other.size());
 
-  return ei_dot_impl<_Nested, _OtherNested>::run(derived(), other.derived());
+  return ei_dot_impl<Derived, OtherDerived>::run(derived(), other.derived());
 }
 
 /** \returns the squared norm of *this, i.e. the dot product of *this with itself.
@@ -287,7 +283,7 @@
 template<typename Derived>
 EIGEN_DEPRECATED inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm2() const
 {
-  return ei_real(dot(*this));
+  return ei_real((*this).cwise().abs2().sum());
 }
 
 /** \returns the squared norm of *this, i.e. the dot product of *this with itself.
@@ -299,7 +295,7 @@
 template<typename Derived>
 inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
 {
-  return ei_real(dot(*this));
+  return ei_real((*this).cwise().abs2().sum());
 }
 
 /** \returns the \em l2 norm of *this, i.e. the square root of the dot product of *this with itself.

diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h
index 45ef622..33aa609 100644
--- a/Eigen/src/Core/Sum.h
+++ b/Eigen/src/Core/Sum.h

@@ -42,7 +42,6 @@
   enum {
     Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
                  && (int(Derived::Flags)&LinearAccessBit)
-                 && (int(Derived::SizeAtCompileTime)>2*PacketSize)
                   ? LinearVectorization
                   : NoVectorization
   };
@@ -155,12 +154,13 @@
 
 template<typename Derived,
          int Vectorization = ei_sum_traits<Derived>::Vectorization,
-         int Unrolling = ei_sum_traits<Derived>::Unrolling
+         int Unrolling = ei_sum_traits<Derived>::Unrolling,
+         int Storage = ei_traits<Derived>::Flags & SparseBit
 >
 struct ei_sum_impl;
 
 template<typename Derived>
-struct ei_sum_impl<Derived, NoVectorization, NoUnrolling>
+struct ei_sum_impl<Derived, NoVectorization, NoUnrolling, IsDense>
 {
   typedef typename Derived::Scalar Scalar;
   static Scalar run(const Derived& mat)
@@ -178,12 +178,12 @@
 };
 
 template<typename Derived>
-struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling>
+struct ei_sum_impl<Derived, NoVectorization, CompleteUnrolling, IsDense>
   : public ei_sum_novec_unroller<Derived, 0, Derived::SizeAtCompileTime>
 {};
 
 template<typename Derived>
-struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling>
+struct ei_sum_impl<Derived, LinearVectorization, NoUnrolling,IsDense>
 {
   typedef typename Derived::Scalar Scalar;
   typedef typename ei_packet_traits<Scalar>::type PacketScalar;
@@ -228,7 +228,7 @@
 };
 
 template<typename Derived>
-struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling>
+struct ei_sum_impl<Derived, LinearVectorization, CompleteUnrolling, IsDense>
 {
   typedef typename Derived::Scalar Scalar;
   static Scalar run(const Derived& mat)

diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index f6e2dcb..f31304d 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h

@@ -73,7 +73,7 @@
     // and this type has a custom operator new, then we want to honor this operator new!
     // so when we use C functions to allocate memory, we must be careful to call manually the constructor using
     // the special placement-new syntax.
-    return new(void_result) T[size];
+    return ::new(void_result) T[size];
   }
   else
     return new T[size]; // here we really want a new, not a malloc. Justification: if the user uses Eigen on