resurrected sparse triangular solver
diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h
index a6abe61..387854b 100644
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -62,6 +62,7 @@
     EIGEN_GENERIC_PUBLIC_INTERFACE(Flagged)
     typedef typename ei_meta_if<ei_must_nest_by_value<ExpressionType>::ret,
         ExpressionType, const ExpressionType&>::ret ExpressionTypeNested;
+    typedef typename ExpressionType::InnerIterator InnerIterator;
 
     inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
 
diff --git a/Eigen/src/Core/SolveTriangular.h b/Eigen/src/Core/SolveTriangular.h
index 0f7aa3b..aaebc59 100755
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@@ -35,7 +35,7 @@
                      ? Upper
                      : -1,
   int StorageOrder = ei_is_part<Lhs>::value ? -1  // this is to solve ambiguous specializations
-                   : int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+                   : int(Lhs::Flags) & (RowMajorBit|SparseBit)
   >
 struct ei_solve_triangular_selector;
 
@@ -51,7 +51,7 @@
 
 // forward substitution, row-major
 template<typename Lhs, typename Rhs, int UpLo>
-struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor>
+struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor|IsDense>
 {
   typedef typename Rhs::Scalar Scalar;
   static void run(const Lhs& lhs, Rhs& other)
@@ -138,7 +138,7 @@
 //  - inv(Upper,         ColMajor) * Column vector
 //  - inv(Upper,UnitDiag,ColMajor) * Column vector
 template<typename Lhs, typename Rhs, int UpLo>
-struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor>
+struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>
 {
   typedef typename Rhs::Scalar Scalar;
   typedef typename ei_packet_traits<Scalar>::type Packet;
diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h
index d08af60..e852fff 100644
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@@ -200,19 +200,15 @@
 };
 
 enum {
-  Dense   = 0,
-  Sparse  = SparseBit
-};
-
-enum {
   ColMajor = 0,
   RowMajor = RowMajorBit
 };
 
 enum {
-  NoDirectAccess = 0,
+  IsDense         = 0,
+  NoDirectAccess  = 0,
   HasDirectAccess = DirectAccessBit,
-  IsSparse = SparseBit
+  IsSparse        = SparseBit
 };
 
 const int FullyCoherentAccessPattern  = 0x1;
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 2251001..00f1a39 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -110,7 +110,7 @@
 
 template<typename T, int Sparseness = ei_traits<T>::Flags&SparseBit> class ei_eval;
 
-template<typename T> struct ei_eval<T,Dense>
+template<typename T> struct ei_eval<T,IsDense>
 {
   typedef Matrix<typename ei_traits<T>::Scalar,
                 ei_traits<T>::RowsAtCompileTime,
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index a2cc785..970280d 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -375,7 +375,7 @@
   Scalar theta = std::acos(ei_abs(d));
   Scalar sinTheta = ei_sin(theta);
 
-  Scalar scale0 = ei_sin( ( 1 - t ) * theta) / sinTheta;
+  Scalar scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta;
   Scalar scale1 = ei_sin( ( t * theta) ) / sinTheta;
   if (d<0)
     scale1 = -scale1;
diff --git a/Eigen/src/Sparse/SparseMatrix.h b/Eigen/src/Sparse/SparseMatrix.h
index d627184..b4c4fe5 100644
--- a/Eigen/src/Sparse/SparseMatrix.h
+++ b/Eigen/src/Sparse/SparseMatrix.h
@@ -261,6 +261,12 @@
       : m_matrix(mat), m_id(mat.m_outerIndex[outer]), m_start(m_id), m_end(mat.m_outerIndex[outer+1])
     {}
 
+    template<unsigned int Added, unsigned int Removed>
+    InnerIterator(const Flagged<SparseMatrix,Added,Removed>& mat, int outer)
+      : m_matrix(mat._expression()), m_id(m_matrix.m_outerIndex[outer]),
+        m_start(m_id), m_end(m_matrix.m_outerIndex[outer+1])
+    {}
+
     InnerIterator& operator++() { m_id++; return *this; }
 
     Scalar value() const { return m_matrix.m_data.value(m_id); }
diff --git a/Eigen/src/Sparse/SparseMatrixBase.h b/Eigen/src/Sparse/SparseMatrixBase.h
index 83dcefe..1dcf83c 100644
--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@@ -160,9 +160,6 @@
       return s;
     }
 
-    template<typename OtherDerived>
-    OtherDerived solveTriangular(const MatrixBase<OtherDerived>& other) const;
-
   protected:
 
     bool m_isRValue;
diff --git a/Eigen/src/Sparse/SparseUtil.h b/Eigen/src/Sparse/SparseUtil.h
index bcf87ad..1a860b3 100644
--- a/Eigen/src/Sparse/SparseUtil.h
+++ b/Eigen/src/Sparse/SparseUtil.h
@@ -48,7 +48,7 @@
   };
 };
 
-template<typename T> class ei_eval<T,Sparse>
+template<typename T> class ei_eval<T,IsSparse>
 {
     typedef typename ei_traits<T>::Scalar _Scalar;
     enum {
diff --git a/Eigen/src/Sparse/TriangularSolver.h b/Eigen/src/Sparse/TriangularSolver.h
index 45679fe..5fb8396 100644
--- a/Eigen/src/Sparse/TriangularSolver.h
+++ b/Eigen/src/Sparse/TriangularSolver.h
@@ -25,42 +25,42 @@
 #ifndef EIGEN_SPARSETRIANGULARSOLVER_H
 #define EIGEN_SPARSETRIANGULARSOLVER_H
 
-template<typename Lhs, typename Rhs,
-  int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit)
-                     ? Lower
-                     : (int(Lhs::Flags) & UpperTriangularBit)
-                     ? Upper
-                     : -1,
-  int StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
-  >
-struct ei_sparse_trisolve_selector;
+// template<typename Lhs, typename Rhs,
+//   int TriangularPart = (int(Lhs::Flags) & LowerTriangularBit)
+//                      ? Lower
+//                      : (int(Lhs::Flags) & UpperTriangularBit)
+//                      ? Upper
+//                      : -1,
+//   int StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor
+//   >
+// struct ei_sparse_trisolve_selector;
 
 // forward substitution, row-major
 template<typename Lhs, typename Rhs>
-struct ei_sparse_trisolve_selector<Lhs,Rhs,Lower,RowMajor>
+struct ei_solve_triangular_selector<Lhs,Rhs,Lower,RowMajor|IsSparse>
 {
   typedef typename Rhs::Scalar Scalar;
-  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  static void run(const Lhs& lhs, Rhs& other)
   {
-    for(int col=0 ; col<rhs.cols() ; ++col)
+    for(int col=0 ; col<other.cols() ; ++col)
     {
       for(int i=0; i<lhs.rows(); ++i)
       {
-        Scalar tmp = rhs.coeff(i,col);
+        Scalar tmp = other.coeff(i,col);
         Scalar lastVal = 0;
         int lastIndex = 0;
         for(typename Lhs::InnerIterator it(lhs, i); it; ++it)
         {
           lastVal = it.value();
           lastIndex = it.index();
-          tmp -= lastVal * res.coeff(lastIndex,col);
+          tmp -= lastVal * other.coeff(lastIndex,col);
         }
         if (Lhs::Flags & UnitDiagBit)
-          res.coeffRef(i,col) = tmp;
+          other.coeffRef(i,col) = tmp;
         else
         {
           ei_assert(lastIndex==i);
-          res.coeffRef(i,col) = tmp/lastVal;
+          other.coeffRef(i,col) = tmp/lastVal;
         }
       }
     }
@@ -69,29 +69,29 @@
 
 // backward substitution, row-major
 template<typename Lhs, typename Rhs>
-struct ei_sparse_trisolve_selector<Lhs,Rhs,Upper,RowMajor>
+struct ei_solve_triangular_selector<Lhs,Rhs,Upper,RowMajor|IsSparse>
 {
   typedef typename Rhs::Scalar Scalar;
-  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  static void run(const Lhs& lhs, Rhs& other)
   {
-    for(int col=0 ; col<rhs.cols() ; ++col)
+    for(int col=0 ; col<other.cols() ; ++col)
     {
       for(int i=lhs.rows()-1 ; i>=0 ; --i)
       {
-        Scalar tmp = rhs.coeff(i,col);
+        Scalar tmp = other.coeff(i,col);
         typename Lhs::InnerIterator it(lhs, i);
         for(++it; it; ++it)
         {
-          tmp -= it.value() * res.coeff(it.index(),col);
+          tmp -= it.value() * other.coeff(it.index(),col);
         }
 
         if (Lhs::Flags & UnitDiagBit)
-          res.coeffRef(i,col) = tmp;
+          other.coeffRef(i,col) = tmp;
         else
         {
           typename Lhs::InnerIterator it(lhs, i);
           ei_assert(it.index() == i);
-          res.coeffRef(i,col) = tmp/it.value();
+          other.coeffRef(i,col) = tmp/it.value();
         }
       }
     }
@@ -100,26 +100,25 @@
 
 // forward substitution, col-major
 template<typename Lhs, typename Rhs>
-struct ei_sparse_trisolve_selector<Lhs,Rhs,Lower,ColMajor>
+struct ei_solve_triangular_selector<Lhs,Rhs,Lower,ColMajor|IsSparse>
 {
   typedef typename Rhs::Scalar Scalar;
-  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  static void run(const Lhs& lhs, Rhs& other)
   {
-    // NOTE we could avoid this copy using an in-place API
-    res = rhs;
-    for(int col=0 ; col<rhs.cols() ; ++col)
+    for(int col=0 ; col<other.cols() ; ++col)
     {
       for(int i=0; i<lhs.cols(); ++i)
       {
         typename Lhs::InnerIterator it(lhs, i);
         if(!(Lhs::Flags & UnitDiagBit))
         {
+          std::cerr << it.value() << " ; " << it.index() << " == " << i << "\n";
           ei_assert(it.index()==i);
-          res.coeffRef(i,col) /= it.value();
+          other.coeffRef(i,col) /= it.value();
         }
-        Scalar tmp = res.coeffRef(i,col);
+        Scalar tmp = other.coeffRef(i,col);
         for(++it; it; ++it)
-          res.coeffRef(it.index(), col) -= tmp * it.value();
+          other.coeffRef(it.index(), col) -= tmp * it.value();
       }
     }
   }
@@ -127,14 +126,12 @@
 
 // backward substitution, col-major
 template<typename Lhs, typename Rhs>
-struct ei_sparse_trisolve_selector<Lhs,Rhs,Upper,ColMajor>
+struct ei_solve_triangular_selector<Lhs,Rhs,Upper,ColMajor|IsSparse>
 {
   typedef typename Rhs::Scalar Scalar;
-  static void run(const Lhs& lhs, const Rhs& rhs, Rhs& res)
+  static void run(const Lhs& lhs, Rhs& other)
   {
-    // NOTE we could avoid this copy using an in-place API
-    res = rhs;
-    for(int col=0 ; col<rhs.cols() ; ++col)
+    for(int col=0 ; col<other.cols() ; ++col)
     {
       for(int i=lhs.cols()-1; i>=0; --i)
       {
@@ -142,28 +139,15 @@
         {
           // FIXME lhs.coeff(i,i) might not be always efficient while it must simply be the
           // last element of the column !
-          res.coeffRef(i,col) /= lhs.coeff(i,i);
+          other.coeffRef(i,col) /= lhs.coeff(i,i);
         }
-        Scalar tmp = res.coeffRef(i,col);
+        Scalar tmp = other.coeffRef(i,col);
         typename Lhs::InnerIterator it(lhs, i);
         for(; it && it.index()<i; ++it)
-          res.coeffRef(it.index(), col) -= tmp * it.value();
+          other.coeffRef(it.index(), col) -= tmp * it.value();
       }
     }
   }
 };
 
-template<typename Derived>
-template<typename OtherDerived>
-OtherDerived SparseMatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const
-{
-  ei_assert(derived().cols() == other.rows());
-  ei_assert(!(Flags & ZeroDiagBit));
-  ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
-
-  OtherDerived res(other.rows(), other.cols());
-  ei_sparse_trisolve_selector<Derived, OtherDerived>::run(derived(), other.derived(), res);
-  return res;
-}
-
 #endif // EIGEN_SPARSETRIANGULARSOLVER_H
diff --git a/test/sparse.cpp b/test/sparse.cpp
index 7decb41..3095297 100644
--- a/test/sparse.cpp
+++ b/test/sparse.cpp
@@ -25,36 +25,63 @@
 #include "main.h"
 #include <Eigen/Sparse>
 
-template<typename Scalar> void sparse(int rows, int cols)
+enum {
+  ForceNonZeroDiag = 1,
+  MakeLowerTriangular = 2,
+  MakeUpperTriangular = 4
+};
+
+template<typename Scalar> void
+initSparse(double density,
+           Matrix<Scalar,Dynamic,Dynamic>& refMat,
+           SparseMatrix<Scalar>& sparseMat,
+           int flags = 0,
+           std::vector<Vector2i>* zeroCoords = 0,
+           std::vector<Vector2i>* nonzeroCoords = 0)
 {
-  double density = std::max(8./(rows*cols), 0.01);
-  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
-  Scalar eps = 1e-6;
-
-  SparseMatrix<Scalar> m(rows, cols);
-  DenseMatrix refMat = DenseMatrix::Zero(rows, cols);
-
-  std::vector<Vector2i> zeroCoords;
-  std::vector<Vector2i> nonzeroCoords;
-  m.startFill(rows*cols*density);
-  for(int j=0; j<cols; j++)
+  sparseMat.startFill(refMat.rows()*refMat.cols()*density);
+  for(int j=0; j<refMat.cols(); j++)
   {
-    for(int i=0; i<rows; i++)
+    for(int i=0; i<refMat.rows(); i++)
     {
       Scalar v = (ei_random<Scalar>(0,1) < density) ? ei_random<Scalar>() : 0;
+      if ((flags&ForceNonZeroDiag) && (i==j))
+        while (ei_abs(v)<1e-2)
+          v = ei_random<Scalar>();
+      if ((flags & MakeLowerTriangular) && j>i)
+        v = 0;
+      else if ((flags & MakeUpperTriangular) && j<i)
+        v = 0;
       if (v!=0)
       {
-        m.fill(i,j) = v;
-        nonzeroCoords.push_back(Vector2i(i,j));
+        sparseMat.fill(i,j) = v;
+        if (nonzeroCoords)
+          nonzeroCoords->push_back(Vector2i(i,j));
       }
-      else
+      else if (zeroCoords)
       {
-        zeroCoords.push_back(Vector2i(i,j));
+        zeroCoords->push_back(Vector2i(i,j));
       }
       refMat(i,j) = v;
     }
   }
-  m.endFill();
+  sparseMat.endFill();
+}
+
+template<typename Scalar> void sparse(int rows, int cols)
+{
+  double density = std::max(8./(rows*cols), 0.01);
+  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
+  typedef Matrix<Scalar,Dynamic,1> DenseVector;
+  Scalar eps = 1e-6;
+
+  SparseMatrix<Scalar> m(rows, cols);
+  DenseMatrix refMat = DenseMatrix::Zero(rows, cols);
+  DenseVector vec1 = DenseVector::Random(rows);
+
+  std::vector<Vector2i> zeroCoords;
+  std::vector<Vector2i> nonzeroCoords;
+  initSparse<Scalar>(density, refMat, m, 0, &zeroCoords, &nonzeroCoords);
 
   VERIFY(zeroCoords.size()>0 && "re-run the test");
   VERIFY(nonzeroCoords.size()>0 && "re-run the test");
@@ -145,10 +172,30 @@
   }
   VERIFY_IS_APPROX(m, refMat);
 
+  // test triangular solver
+  {
+    DenseVector vec2 = vec1, vec3 = vec1;
+    SparseMatrix<Scalar> m2(rows, cols);
+    DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
+
+    // lower
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords);
+    VERIFY_IS_APPROX(refMat2.template marked<Lower>().solveTriangular(vec2),
+                     m2.template marked<Lower>().solveTriangular(vec3));
+
+    // upper
+    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords);
+    VERIFY_IS_APPROX(refMat2.template marked<Upper>().solveTriangular(vec2),
+                     m2.template marked<Upper>().solveTriangular(vec3));
+     
+    // TODO test row major
+  }
+
 }
 
 void test_sparse()
 {
   sparse<double>(8, 8);
   sparse<double>(16, 16);
+  sparse<double>(33, 33);
 }