implement optimized ploadu for MSVC10: this also fix bad code generation in gebp_kernel :)

commit: 9d2bf35a05b21d0203201a0b72b54022cae24670 [log] [tgz]
author: Gael Guennebaud <g.gael@free.fr> Sat Feb 12 16:40:09 2011 +0100
committer: Gael Guennebaud <g.gael@free.fr> Sat Feb 12 16:40:09 2011 +0100
tree: 4b837291fb8f965454e77b893de59be5727a32d5
parent: ec7409b16ea391f44965887e0cdb3865fc56c98e [diff]
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index fa499a8..0872a04 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h

@@ -222,7 +222,20 @@
 template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int*     from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm_load_si128(reinterpret_cast<const Packet4i*>(from)); }
 
 #if defined(_MSC_VER)
-  template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float*  from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_ps(from); }
+  template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float*  from) {
+    EIGEN_DEBUG_UNALIGNED_LOAD
+    #if (_MSC_VER==1600)
+    // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps
+    // (i.e., it does not generate an unaligned load!!
+    // TODO On most architectures this version should also be faster than a single _mm_loadu_ps
+    // so we could also enable it for MSVC08 but first we have to make this later does not generate crap when doing so...
+    __m128 res = _mm_loadl_pi(res, (const __m64*)(from));
+    res = _mm_loadh_pi(res, (const __m64*)(from+2));
+    return res;
+    #else
+    return _mm_loadu_ps(from);
+    #endif
+  }
   template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); }
   template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int*    from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast<const Packet4i*>(from)); }
 #else
commit	9d2bf35a05b21d0203201a0b72b54022cae24670	[log] [tgz]
author	Gael Guennebaud <g.gael@free.fr>	Sat Feb 12 16:40:09 2011 +0100
committer	Gael Guennebaud <g.gael@free.fr>	Sat Feb 12 16:40:09 2011 +0100
tree	4b837291fb8f965454e77b893de59be5727a32d5
parent	ec7409b16ea391f44965887e0cdb3865fc56c98e [diff]