Various compilation fixes for MSVC 9. All tests compile but some
still fail at runtime in ei_aligned_free() (even without vectorization).
diff --git a/Eigen/Geometry b/Eigen/Geometry
index 7a3ca9e..d627231 100644
--- a/Eigen/Geometry
+++ b/Eigen/Geometry
@@ -3,6 +3,10 @@
 
 #include "Core"
 
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
 namespace Eigen {
 
 /** \defgroup Geometry Geometry module
diff --git a/Eigen/src/Core/CacheFriendlyProduct.h b/Eigen/src/Core/CacheFriendlyProduct.h
index 782fabc..649b39c 100644
--- a/Eigen/src/Core/CacheFriendlyProduct.h
+++ b/Eigen/src/Core/CacheFriendlyProduct.h
@@ -34,8 +34,8 @@
   bool _rhsRowMajor, const Scalar* _rhs, int _rhsStride,
   bool resRowMajor, Scalar* res, int resStride)
 {
-  const Scalar* __restrict__ lhs;
-  const Scalar* __restrict__ rhs;
+  const Scalar* EIGEN_RESTRICT lhs;
+  const Scalar* EIGEN_RESTRICT rhs;
   int lhsStride, rhsStride, rows, cols;
   bool lhsRowMajor;
 
@@ -88,11 +88,11 @@
   const int l2BlockSize = MaxL2BlockSize > size ? size : MaxL2BlockSize;
   const int l2BlockSizeAligned = (1 + std::max(l2BlockSize,l2BlockCols)/PacketSize)*PacketSize;
   const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
-  Scalar* __restrict__ block = 0;
+  Scalar* EIGEN_RESTRICT block = 0;
   const int allocBlockSize = sizeof(Scalar)*l2BlockRows*size;
   const bool allocBlockUsingAlloca = EIGEN_USE_ALLOCA && allocBlockSize<=16000000;
   block = (Scalar*)ei_alloca_or_malloc(allocBlockUsingAlloca, allocBlockSize);
-  Scalar* __restrict__ rhsCopy
+  Scalar* EIGEN_RESTRICT rhsCopy
     = (Scalar*)ei_alloca_or_malloc(true, sizeof(Scalar)*l2BlockSizeAligned*l2BlockSizeAligned);
 
   // loops on each L2 cache friendly blocks of the result
@@ -107,7 +107,6 @@
     int count = 0;
 
     // copy l2blocksize rows of m_lhs to blocks of ps x bw
-    asm("#eigen begin buildblocks");
     for(int l2k=0; l2k<size; l2k+=l2BlockSize)
     {
       const int l2blockSizeEnd = std::min(l2k+l2BlockSize, size);
@@ -154,7 +153,6 @@
         }
       }
     }
-    asm("#eigen end buildblocks");
 
     for(int l2j=0; l2j<cols; l2j+=l2BlockCols)
     {
@@ -177,11 +175,11 @@
         for(int l1i=l2i; l1i<l2blockRowEndBW; l1i+=MaxBlockRows)
         {
           int offsetblock = l2k * (l2blockRowEnd-l2i) + (l1i-l2i)*(l2blockSizeEnd-l2k) - l2k*MaxBlockRows;
-          const Scalar* __restrict__ localB = &block[offsetblock];
+          const Scalar* EIGEN_RESTRICT localB = &block[offsetblock];
           
           for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
           {
-            const Scalar* __restrict__ rhsColumn;
+            const Scalar* EIGEN_RESTRICT rhsColumn;
             if (needRhsCopy)
               rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
             else
@@ -194,7 +192,6 @@
 
             PacketType tmp;
 
-            asm("#eigen begincore");
             for(int k=l2k; k<l2blockSizeEnd; k+=PacketSize)
             {
               tmp = ei_ploadu(&rhsColumn[k]);
@@ -220,7 +217,7 @@
               }
             }
 
-            Scalar* __restrict__ localRes = &(res[l1i + l1j*resStride]);
+            Scalar* EIGEN_RESTRICT localRes = &(res[l1i + l1j*resStride]);
 
             if (PacketSize>1 && resIsAligned)
             {
@@ -250,7 +247,6 @@
                 localRes[7] += ei_predux(dst[7]);
               }
             }
-            asm("#eigen endcore");
           }
         }
         if (l2blockRemainingRows>0)
@@ -258,10 +254,9 @@
           int offsetblock = l2k * (l2blockRowEnd-l2i) + (l2blockRowEndBW-l2i)*(l2blockSizeEnd-l2k) - l2k*l2blockRemainingRows;
           const Scalar* localB = &block[offsetblock];
 
-          asm("#eigen begin dynkernel");
           for(int l1j=l2j; l1j<l2blockColEnd; l1j+=1)
           {
-            const Scalar* __restrict__ rhsColumn;
+            const Scalar* EIGEN_RESTRICT rhsColumn;
             if (needRhsCopy)
               rhsColumn = &(rhsCopy[l2BlockSizeAligned*(l1j-l2j)-l2k]);
             else
@@ -292,7 +287,7 @@
               }
             }
 
-            Scalar* __restrict__ localRes = &(res[l2blockRowEndBW + l1j*resStride]);
+            Scalar* EIGEN_RESTRICT localRes = &(res[l2blockRowEndBW + l1j*resStride]);
 
             // process the remaining rows once at a time
                                          localRes[0] += ei_predux(dst[0]);
@@ -307,7 +302,6 @@
               if (l2blockRemainingRows>=8) localRes[7] += ei_predux(dst[7]);
             }
 
-            asm("#eigen end dynkernel");
           }
         }
       }
@@ -373,7 +367,6 @@
           ei_padd(ei_pmul(ptmp0,ei_pload ## A0(&lhs0[j OFFSET])),ei_pmul(ptmp1,ei_pload ## A13(&lhs1[j OFFSET]))), \
           ei_padd(ei_pmul(ptmp2,ei_pload ## A2(&lhs2[j OFFSET])),ei_pmul(ptmp3,ei_pload ## A13(&lhs3[j OFFSET]))) )))
 
-  asm("#begin matrix_vector_product");
   typedef typename ei_packet_traits<Scalar>::type Packet;
   const int PacketSize = sizeof(Packet)/sizeof(Scalar);
 
@@ -541,7 +534,6 @@
     else
       break;
   } while(PacketSize>1);
-  asm("#end matrix_vector_product");
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
 
@@ -563,7 +555,6 @@
     ptmp2 = ei_pmadd(b, ei_pload##A2 (&lhs2[j]), ptmp2); \
     ptmp3 = ei_pmadd(b, ei_pload##A13(&lhs3[j]), ptmp3); }
 
-  asm("#begin matrix_vector_product");
   typedef typename ei_packet_traits<Scalar>::type Packet;
   const int PacketSize = sizeof(Packet)/sizeof(Scalar);
 
@@ -752,7 +743,6 @@
     else
       break;
   } while(PacketSize>1);
-  asm("#end matrix_vector_product");
 
   #undef _EIGEN_ACCUMULATE_PACKETS
 }
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index 85a8872..a50a9c3 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -166,7 +166,7 @@
   *
   * \sa adjoint() */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::ConjugateReturnType
+inline typename MatrixBase<Derived>::ConjugateReturnType
 MatrixBase<Derived>::conjugate() const
 {
   return ConjugateReturnType(derived());
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index 4c1a0cf..74b7d76 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -166,7 +166,7 @@
     { return derived() = forceAligned() / other; }
 
   protected:
-    const Scalar* __restrict__ m_data;
+    const Scalar* EIGEN_RESTRICT m_data;
     const ei_int_if_dynamic<RowsAtCompileTime> m_rows;
     const ei_int_if_dynamic<ColsAtCompileTime> m_cols;
 };
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 268261b..7480683 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -286,8 +286,8 @@
       if((RowsAtCompileTime == 1 && ColsAtCompileTime == 2)
       || (RowsAtCompileTime == 2 && ColsAtCompileTime == 1))
       {
-        m_storage.data()[0] = x;
-        m_storage.data()[1] = y;
+        m_storage.data()[0] = Scalar(x);
+        m_storage.data()[1] = Scalar(y);
       }
       else
       {
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index aa8e898..61cfd58 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -185,13 +185,13 @@
     typedef CwiseUnaryOp<ei_scalar_quotient1_op<Scalar>, Derived> ScalarQuotient1ReturnType;
     /** \internal the return type of MatrixBase::conjugate() */
     typedef typename ei_meta_if<NumTraits<Scalar>::IsComplex,
-                        CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>,
-                        Derived&
+                        const CwiseUnaryOp<ei_scalar_conjugate_op<Scalar>, Derived>,
+                        const Derived&
                      >::ret ConjugateReturnType;
     /** \internal the return type of MatrixBase::real() */
     typedef CwiseUnaryOp<ei_scalar_real_op<Scalar>, Derived> RealReturnType;
     /** \internal the return type of MatrixBase::adjoint() */
-    typedef Transpose<NestByValue<typename ei_unref<ConjugateReturnType>::type> >
+    typedef Transpose<NestByValue<typename ei_cleantype<ConjugateReturnType>::type> >
             AdjointReturnType;
     /** \internal the return type of MatrixBase::eigenvalues() */
     typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
@@ -489,7 +489,7 @@
     inline const NestByValue<Derived> nestByValue() const;
 
 
-    const ConjugateReturnType conjugate() const;
+    ConjugateReturnType conjugate() const;
     const RealReturnType real() const;
 
     template<typename CustomUnaryOp>
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 2774b3f..6d965e6 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -545,7 +545,7 @@
     enum {
       EvalToRes = (ei_packet_traits<Scalar>::size==1)
                 ||((DestDerived::Flags&ActualPacketAccessBit) && (!(DestDerived::Flags & RowMajorBit))) };
-    Scalar* __restrict__ _res;
+    Scalar* EIGEN_RESTRICT _res;
     if (EvalToRes)
        _res = &res.coeffRef(0);
     else
@@ -590,7 +590,7 @@
     enum {
       EvalToRes = (ei_packet_traits<Scalar>::size==1)
                 ||((DestDerived::Flags & ActualPacketAccessBit) && (DestDerived::Flags & RowMajorBit)) };
-    Scalar* __restrict__ _res;
+    Scalar* EIGEN_RESTRICT _res;
     if (EvalToRes)
        _res = &res.coeffRef(0);
     else
@@ -622,7 +622,7 @@
   template<typename DestDerived>
   inline static void run(DestDerived& res, const ProductType& product)
   {
-    Scalar* __restrict__ _rhs;
+    Scalar* EIGEN_RESTRICT _rhs;
     if (UseRhsDirectly)
        _rhs = &product.rhs().const_cast_derived().coeffRef(0);
     else
@@ -650,7 +650,7 @@
   template<typename DestDerived>
   inline static void run(DestDerived& res, const ProductType& product)
   {
-    Scalar* __restrict__ _lhs;
+    Scalar* EIGEN_RESTRICT _lhs;
     if (UseLhsDirectly)
        _lhs = &product.lhs().const_cast_derived().coeffRef(0);
     else
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 51b7a86..c50aef9 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -99,6 +99,8 @@
 #define EIGEN_ALIGN_128
 #endif
 
+#define EIGEN_RESTRICT __restrict
+
 #define EIGEN_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \
 template<typename OtherDerived> \
 Derived& operator Op(const MatrixBase<OtherDerived>& other) \
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 9d844d2..7e4e0fb 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -224,12 +224,16 @@
 
 template<typename T, int n=1, typename EvalType = typename ei_eval<T>::type> struct ei_nested
 {
+  enum {
+    CostEval   = (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost),
+	CostNoEval = (n-1) * int(ei_traits<T>::CoeffReadCost)
+  };
   typedef typename ei_meta_if<
     ei_must_nest_by_value<T>::ret,
     T,
     typename ei_meta_if<
       (int(ei_traits<T>::Flags) & EvalBeforeNestingBit)
-      || ((n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) <= (n-1) * int(T::CoeffReadCost)),
+      || ( int(CostEval) <= int(CostNoEval) ),
       EvalType,
       const T&
     >::ret
diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h
index 1267ec3..af385df 100644
--- a/Eigen/src/LU/LU.h
+++ b/Eigen/src/LU/LU.h
@@ -71,6 +71,9 @@
              MatrixType::MaxRowsAtCompileTime)
     };
 
+	typedef Matrix<typename MatrixType::Scalar, MatrixType::ColsAtCompileTime, Dynamic,
+                   MatrixType::MaxColsAtCompileTime, MaxSmallDimAtCompileTime> KernelReturnType;
+
     /** Constructor.
       *
       * \param matrix the matrix of which to compute the LU decomposition.
@@ -165,9 +168,8 @@
       * Output: \verbinclude LU_kernel.out
       *
       * \sa computeKernel()
-      */    const Matrix<typename MatrixType::Scalar, MatrixType::ColsAtCompileTime, Dynamic,
-                 MatrixType::MaxColsAtCompileTime,
-                 LU<MatrixType>::MaxSmallDimAtCompileTime> kernel() const;
+      */
+    const KernelReturnType kernel() const;
 
     /** This method finds a solution x to the equation Ax=b, where A is the matrix of which
       * *this is the LU decomposition, if any exists.
@@ -408,9 +410,7 @@
 }
 
 template<typename MatrixType>
-const Matrix<typename MatrixType::Scalar, MatrixType::ColsAtCompileTime, Dynamic,
-                    MatrixType::MaxColsAtCompileTime,
-                    LU<MatrixType>::MaxSmallDimAtCompileTime>
+const typename LU<MatrixType>::KernelReturnType
 LU<MatrixType>::kernel() const
 {
   Matrix<typename MatrixType::Scalar, MatrixType::ColsAtCompileTime, Dynamic,
diff --git a/Eigen/src/QR/Tridiagonalization.h b/Eigen/src/QR/Tridiagonalization.h
index 765a871..c0e70d0 100755
--- a/Eigen/src/QR/Tridiagonalization.h
+++ b/Eigen/src/QR/Tridiagonalization.h
@@ -285,8 +285,8 @@
       hCoeffs.end(n-i-1) += (h * Scalar(-0.5) * matA.col(i).end(n-i-1).dot(hCoeffs.end(n-i-1)))
                             * matA.col(i).end(n-i-1);
       
-      const Scalar* __restrict__ pb = &matA.coeffRef(0,i);
-      const Scalar* __restrict__ pa = (&hCoeffs.coeffRef(0)) - 1;
+      const Scalar* EIGEN_RESTRICT pb = &matA.coeffRef(0,i);
+      const Scalar* EIGEN_RESTRICT pa = (&hCoeffs.coeffRef(0)) - 1;
       for (int j1=i+1; j1<n; ++j1)
       {
         int starti = i+1;