| // This file is part of Eigen, a lightweight C++ template library | 
 | // for linear algebra. | 
 | // | 
 | // Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl> | 
 | // Copyright (C) 2020 Mischa Senders <m.j.senders@student.tue.nl> | 
 | // Copyright (C) 2020 Lex Kuijpers <l.kuijpers@student.tue.nl> | 
 | // Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl> | 
 | // Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl> | 
 | // Copyright (C) 2020 Adithya Vijaykumar | 
 | // | 
 | // This Source Code Form is subject to the terms of the Mozilla | 
 | // Public License v. 2.0. If a copy of the MPL was not distributed | 
 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
 | /* | 
 |  | 
 | The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L) | 
 |  | 
 | This implementation of IDRSTABL is based on | 
 | 1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with | 
 | reliable update strategies for solving sparse linear systems. Journal of | 
 | Computational and Applied Mathematics, 259, 244-258. | 
 |    doi:10.1016/j.cam.2013.08.028 | 
 |                 2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned | 
 | IDRSTABL Algorithms for Solving Nonsymmetric Linear Systems. International | 
 | Journal of Applied Mathematics, 45(3). | 
 |                 3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems: | 
 | Second Edition. Philadelphia, PA: SIAM. | 
 |                 4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family | 
 | of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear | 
 | Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062. | 
 |    doi:10.1137/070685804 | 
 |                 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s) | 
 | and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598. | 
 |  | 
 |     Right-preconditioning based on Ref. 3 is implemented here. | 
 | */ | 
 |  | 
 | #ifndef EIGEN_IDRSTABL_H | 
 | #define EIGEN_IDRSTABL_H | 
 |  | 
 | namespace Eigen { | 
 |  | 
 | namespace internal { | 
 |  | 
 | template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner> | 
 | bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters, | 
 |               typename Dest::RealScalar &tol_error, Index L, Index S) { | 
 |   /* | 
 |     Setup and type definitions. | 
 |   */ | 
 |   using numext::abs; | 
 |   using numext::sqrt; | 
 |   typedef typename Dest::Scalar Scalar; | 
 |   typedef typename Dest::RealScalar RealScalar; | 
 |   typedef Matrix<Scalar, Dynamic, 1> VectorType; | 
 |   typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType; | 
 |  | 
 |   const Index N = x.rows(); | 
 |  | 
 |   Index k = 0;  // Iteration counter | 
 |   const Index maxIters = iters; | 
 |  | 
 |   const RealScalar rhs_norm = rhs.stableNorm(); | 
 |   const RealScalar tol = tol_error * rhs_norm; | 
 |  | 
 |   if (rhs_norm == 0) { | 
 |     /* | 
 |       If b==0, then the exact solution is x=0. | 
 |       rhs_norm is needed for other calculations anyways, this exit is a freebie. | 
 |     */ | 
 |     x.setZero(); | 
 |     tol_error = 0.0; | 
 |     return true; | 
 |   } | 
 |   // Construct decomposition objects beforehand. | 
 |   FullPivLU<DenseMatrixType> lu_solver; | 
 |  | 
 |   if (S >= N || L >= N) { | 
 |     /* | 
 |       The matrix is very small, or the choice of L and S is very poor | 
 |       in that case solving directly will be best. | 
 |     */ | 
 |     lu_solver.compute(DenseMatrixType(mat)); | 
 |     x = lu_solver.solve(rhs); | 
 |     tol_error = (rhs - mat * x).stableNorm() / rhs_norm; | 
 |     return true; | 
 |   } | 
 |  | 
 |   // Define maximum sizes to prevent any reallocation later on. | 
 |   DenseMatrixType u(N, L + 1); | 
 |   DenseMatrixType r(N, L + 1); | 
 |  | 
 |   DenseMatrixType V(N * (L + 1), S); | 
 |  | 
 |   VectorType alpha(S); | 
 |   VectorType gamma(L); | 
 |   VectorType update(N); | 
 |  | 
 |   /* | 
 |     Main IDRSTABL algorithm | 
 |   */ | 
 |   // Set up the initial residual | 
 |   VectorType x0 = x; | 
 |   r.col(0) = rhs - mat * x; | 
 |   x.setZero();  // The final solution will be x0+x | 
 |  | 
 |   tol_error = r.col(0).stableNorm(); | 
 |  | 
 |   // FOM = Full orthogonalisation method | 
 |   DenseMatrixType h_FOM = DenseMatrixType::Zero(S, S - 1); | 
 |  | 
 |   // Construct an initial U matrix of size N x S | 
 |   DenseMatrixType U(N * (L + 1), S); | 
 |   for (Index col_index = 0; col_index < S; ++col_index) { | 
 |     // Arnoldi-like process to generate a set of orthogonal vectors spanning | 
 |     // {u,A*u,A*A*u,...,A^(S-1)*u}. This construction can be combined with the | 
 |     // Full Orthogonalization Method (FOM) from Ref.3 to provide a possible | 
 |     // early exit with no additional MV. | 
 |     if (col_index != 0) { | 
 |       /* | 
 |       Modified Gram-Schmidt strategy: | 
 |       */ | 
 |       VectorType w = mat * precond.solve(u.col(0)); | 
 |       for (Index i = 0; i < col_index; ++i) { | 
 |         auto v = U.col(i).head(N); | 
 |         h_FOM(i, col_index - 1) = v.dot(w); | 
 |         w -= h_FOM(i, col_index - 1) * v; | 
 |       } | 
 |       u.col(0) = w; | 
 |       h_FOM(col_index, col_index - 1) = u.col(0).stableNorm(); | 
 |  | 
 |       if (abs(h_FOM(col_index, col_index - 1)) != RealScalar(0)) { | 
 |         /* | 
 |         This only happens if u is NOT exactly zero. In case it is exactly zero | 
 |         it would imply that that this u has no component in the direction of the | 
 |         current residual. | 
 |  | 
 |         By then setting u to zero it will not contribute any further (as it | 
 |         should). Whereas attempting to normalize results in division by zero. | 
 |  | 
 |         Such cases occur if: | 
 |         1. The basis of dimension <S is sufficient to exactly solve the linear | 
 |         system. I.e. the current residual is in span{r,Ar,...A^{m-1}r}, where | 
 |         (m-1)<=S. | 
 |         2. Two vectors vectors generated from r, Ar,... are (numerically) | 
 |         parallel. | 
 |  | 
 |         In case 1, the exact solution to the system can be obtained from the | 
 |         "Full Orthogonalization Method" (Algorithm 6.4 in the book of Saad), | 
 |         without any additional MV. | 
 |  | 
 |         Contrary to what one would suspect, the comparison with ==0.0 for | 
 |         floating-point types is intended here. Any arbritary non-zero u is fine | 
 |         to continue, however if u contains either NaN or Inf the algorithm will | 
 |         break down. | 
 |         */ | 
 |         u.col(0) /= h_FOM(col_index, col_index - 1); | 
 |       } | 
 |     } else { | 
 |       u.col(0) = r.col(0); | 
 |       u.col(0).normalize(); | 
 |     } | 
 |  | 
 |     U.col(col_index).head(N) = u.col(0); | 
 |   } | 
 |  | 
 |   if (S > 1) { | 
 |     // Check for early FOM exit. | 
 |     Scalar beta = r.col(0).stableNorm(); | 
 |     VectorType e1 = VectorType::Zero(S - 1); | 
 |     e1(0) = beta; | 
 |     lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1)); | 
 |     VectorType y = lu_solver.solve(e1); | 
 |     VectorType x2 = x + U.topLeftCorner(N, S - 1) * y; | 
 |  | 
 |     // Using proposition 6.7 in Saad, one MV can be saved to calculate the | 
 |     // residual | 
 |     RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).stableNorm(); | 
 |  | 
 |     if (FOM_residual < tol) { | 
 |       // Exit, the FOM algorithm was already accurate enough | 
 |       iters = k; | 
 |       // Convert back to the unpreconditioned solution | 
 |       x = precond.solve(x2); | 
 |       // x contains the updates to x0, add those back to obtain the solution | 
 |       x += x0; | 
 |       tol_error = FOM_residual / rhs_norm; | 
 |       return true; | 
 |     } | 
 |   } | 
 |  | 
 |   /* | 
 |     Select an initial (N x S) matrix R0. | 
 |     1. Generate random R0, orthonormalize the result. | 
 |     2. This results in R0, however to save memory and compute we only need the | 
 |     adjoint of R0. This is given by the matrix R_T.\ Additionally, the matrix | 
 |     (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the | 
 |     anti-distributivity property of the adjoint. This results in AR_T, which is | 
 |     constant if R_T does not have to be regenerated and can be precomputed. | 
 |     Based on reference 4, this has zero probability in exact arithmetic. | 
 |   */ | 
 |  | 
 |   // Original IDRSTABL and Kensuke choose S random vectors: | 
 |   const HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S)); | 
 |   DenseMatrixType R_T = (qr.householderQ() * DenseMatrixType::Identity(N, S)).adjoint(); | 
 |   DenseMatrixType AR_T = DenseMatrixType(R_T * mat); | 
 |  | 
 |   // Pre-allocate sigma. | 
 |   DenseMatrixType sigma(S, S); | 
 |  | 
 |   bool reset_while = false;  // Should the while loop be reset for some reason? | 
 |  | 
 |   while (k < maxIters) { | 
 |     for (Index j = 1; j <= L; ++j) { | 
 |       /* | 
 |         The IDR Step | 
 |       */ | 
 |       // Construction of the sigma-matrix, and the decomposition of sigma. | 
 |       for (Index i = 0; i < S; ++i) { | 
 |         sigma.col(i).noalias() = AR_T * precond.solve(U.block(N * (j - 1), i, N, 1)); | 
 |       } | 
 |  | 
 |       lu_solver.compute(sigma); | 
 |       // Obtain the update coefficients alpha | 
 |       if (j == 1) { | 
 |         // alpha=inverse(sigma)*(R_T*r_0); | 
 |         alpha.noalias() = lu_solver.solve(R_T * r.col(0)); | 
 |       } else { | 
 |         // alpha=inverse(sigma)*(AR_T*r_{j-2}) | 
 |         alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.col(j - 2))); | 
 |       } | 
 |  | 
 |       // Obtain new solution and residual from this update | 
 |       update.noalias() = U.topRows(N) * alpha; | 
 |       r.col(0) -= mat * precond.solve(update); | 
 |       x += update; | 
 |  | 
 |       for (Index i = 1; i <= j - 2; ++i) { | 
 |         // This only affects the case L>2 | 
 |         r.col(i) -= U.block(N * (i + 1), 0, N, S) * alpha; | 
 |       } | 
 |       if (j > 1) { | 
 |         // r=[r;A*r_{j-2}] | 
 |         r.col(j - 1).noalias() = mat * precond.solve(r.col(j - 2)); | 
 |       } | 
 |       tol_error = r.col(0).stableNorm(); | 
 |  | 
 |       if (tol_error < tol) { | 
 |         // If at this point the algorithm has converged, exit. | 
 |         reset_while = true; | 
 |         break; | 
 |       } | 
 |  | 
 |       bool break_normalization = false; | 
 |       for (Index q = 1; q <= S; ++q) { | 
 |         if (q == 1) { | 
 |           // u = r; | 
 |           u.leftCols(j + 1) = r.leftCols(j + 1); | 
 |         } else { | 
 |           // u=[u_1;u_2;...;u_j] | 
 |           u.leftCols(j) = u.middleCols(1, j); | 
 |         } | 
 |  | 
 |         // Obtain the update coefficients beta implicitly | 
 |         // beta=lu_sigma.solve(AR_T * u.block(N * (j - 1), 0, N, 1) | 
 |         u.reshaped().head(u.rows() * j) -= U.topRows(N * j) * lu_solver.solve(AR_T * precond.solve(u.col(j - 1))); | 
 |  | 
 |         // u=[u;Au_{j-1}] | 
 |         u.col(j).noalias() = mat * precond.solve(u.col(j - 1)); | 
 |  | 
 |         // Orthonormalize u_j to the columns of V_j(:,1:q-1) | 
 |         if (q > 1) { | 
 |           /* | 
 |           Modified Gram-Schmidt-like procedure to make u orthogonal to the | 
 |           columns of V from Ref. 1. | 
 |  | 
 |           The vector mu from Ref. 1 is obtained implicitly: | 
 |           mu=V.block(N * j, 0, N, q - 1).adjoint() * u.block(N * j, 0, N, 1). | 
 |           */ | 
 |           for (Index i = 0; i <= q - 2; ++i) { | 
 |             auto v = V.col(i).segment(N * j, N); | 
 |             Scalar h = v.squaredNorm(); | 
 |             h = v.dot(u.col(j)) / h; | 
 |             u.reshaped().head(u.rows() * (j + 1)) -= h * V.block(0, i, N * (j + 1), 1); | 
 |           } | 
 |         } | 
 |         // Normalize u and assign to a column of V | 
 |         Scalar normalization_constant = u.col(j).stableNorm(); | 
 |         //  If u is exactly zero, this will lead to a NaN. Small, non-zero u is | 
 |         //  fine. | 
 |         if (normalization_constant == RealScalar(0.0)) { | 
 |           break_normalization = true; | 
 |           break; | 
 |         } else { | 
 |           u.leftCols(j + 1) /= normalization_constant; | 
 |         } | 
 |  | 
 |         V.block(0, q - 1, N * (j + 1), 1).noalias() = u.reshaped().head(u.rows() * (j + 1)); | 
 |       } | 
 |  | 
 |       if (break_normalization == false) { | 
 |         U = V; | 
 |       } | 
 |     } | 
 |     if (reset_while) { | 
 |       break; | 
 |     } | 
 |  | 
 |     // r=[r;mat*r_{L-1}] | 
 |     r.col(L).noalias() = mat * precond.solve(r.col(L - 1)); | 
 |  | 
 |     /* | 
 |             The polynomial step | 
 |     */ | 
 |     ColPivHouseholderQR<DenseMatrixType> qr_solver(r.rightCols(L)); | 
 |     gamma.noalias() = qr_solver.solve(r.col(0)); | 
 |  | 
 |     // Update solution and residual using the "minimized residual coefficients" | 
 |     update.noalias() = r.leftCols(L) * gamma; | 
 |     x += update; | 
 |     r.col(0) -= mat * precond.solve(update); | 
 |  | 
 |     // Update iteration info | 
 |     ++k; | 
 |     tol_error = r.col(0).stableNorm(); | 
 |  | 
 |     if (tol_error < tol) { | 
 |       // Slightly early exit by moving the criterion before the update of U, | 
 |       // after the main while loop the result of that calculation would not be | 
 |       // needed. | 
 |       break; | 
 |     } | 
 |  | 
 |     /* | 
 |     U=U0-sum(gamma_j*U_j) | 
 |     Consider the first iteration. Then U only contains U0, so at the start of | 
 |     the while-loop U should be U0. Therefore only the first N rows of U have to | 
 |     be updated. | 
 |     */ | 
 |     for (Index i = 1; i <= L; ++i) { | 
 |       U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1); | 
 |     } | 
 |   } | 
 |  | 
 |   /* | 
 |           Exit after the while loop terminated. | 
 |   */ | 
 |   iters = k; | 
 |   // Convert back to the unpreconditioned solution | 
 |   x = precond.solve(x); | 
 |   // x contains the updates to x0, add those back to obtain the solution | 
 |   x += x0; | 
 |   tol_error = tol_error / rhs_norm; | 
 |   return true; | 
 | } | 
 |  | 
 | }  // namespace internal | 
 |  | 
 | template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar>> | 
 | class IDRSTABL; | 
 |  | 
 | namespace internal { | 
 |  | 
 | template <typename MatrixType_, typename Preconditioner_> | 
 | struct traits<IDRSTABL<MatrixType_, Preconditioner_>> { | 
 |   typedef MatrixType_ MatrixType; | 
 |   typedef Preconditioner_ Preconditioner; | 
 | }; | 
 |  | 
 | }  // namespace internal | 
 |  | 
 | /** \ingroup IterativeLinearSolvers_Module | 
 |  * \brief The IDR(s)STAB(l) is a combination of IDR(s) and BiCGSTAB(l). It is a | 
 |  * short-recurrences Krylov method for sparse square problems. It can outperform | 
 |  * both IDR(s) and BiCGSTAB(l). IDR(s)STAB(l) generally closely follows the | 
 |  * optimal GMRES convergence in terms of the number of Matrix-Vector products. | 
 |  * However, without the increasing cost per iteration of GMRES. IDR(s)STAB(l) is | 
 |  * suitable for both indefinite systems and systems with complex eigenvalues. | 
 |  * | 
 |  * This class allows solving for A.x = b sparse linear problems. The vectors x | 
 |  * and b can be either dense or sparse. | 
 |  * | 
 |  * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a | 
 |  * sparse matrix. \tparam Preconditioner_ the type of the preconditioner. | 
 |  * Default is DiagonalPreconditioner | 
 |  * | 
 |  * \implsparsesolverconcept | 
 |  * | 
 |  * The maximum number of iterations and tolerance value can be controlled via | 
 |  * the setMaxIterations() and setTolerance() methods. The defaults are the size | 
 |  * of the problem for the maximum number of iterations and | 
 |  * NumTraits<Scalar>::epsilon() for the tolerance. | 
 |  * | 
 |  * The tolerance is the maximum relative residual error: |Ax-b|/|b| for which | 
 |  * the linear system is considered solved. | 
 |  * | 
 |  * \b Performance: When using sparse matrices, best performance is achieved for | 
 |  * a row-major sparse matrix format. Moreover, in this case multi-threading can | 
 |  * be exploited if the user code is compiled with OpenMP enabled. See \ref | 
 |  * TopicMultiThreading for details. | 
 |  * | 
 |  * By default the iterations start with x=0 as an initial guess of the solution. | 
 |  * One can control the start using the solveWithGuess() method. | 
 |  * | 
 |  * IDR(s)STAB(l) can also be used in a matrix-free context, see the following | 
 |  * \link MatrixfreeSolverExample example \endlink. | 
 |  * | 
 |  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner | 
 |  */ | 
 |  | 
 | template <typename MatrixType_, typename Preconditioner_> | 
 | class IDRSTABL : public IterativeSolverBase<IDRSTABL<MatrixType_, Preconditioner_>> { | 
 |   typedef IterativeSolverBase<IDRSTABL> Base; | 
 |   using Base::m_error; | 
 |   using Base::m_info; | 
 |   using Base::m_isInitialized; | 
 |   using Base::m_iterations; | 
 |   using Base::matrix; | 
 |   Index m_L; | 
 |   Index m_S; | 
 |  | 
 |  public: | 
 |   typedef MatrixType_ MatrixType; | 
 |   typedef typename MatrixType::Scalar Scalar; | 
 |   typedef typename MatrixType::RealScalar RealScalar; | 
 |   typedef Preconditioner_ Preconditioner; | 
 |  | 
 |  public: | 
 |   /** Default constructor. */ | 
 |   IDRSTABL() : m_L(2), m_S(4) {} | 
 |  | 
 |   /**   Initialize the solver with matrix \a A for further \c Ax=b solving. | 
 |  | 
 |   This constructor is a shortcut for the default constructor followed | 
 |   by a call to compute(). | 
 |  | 
 |   \warning this class stores a reference to the matrix A as well as some | 
 |   precomputed values that depend on it. Therefore, if \a A is changed | 
 |   this class becomes invalid. Call compute() to update it with the new | 
 |   matrix A, or modify a copy of A. | 
 |           */ | 
 |   template <typename MatrixDerived> | 
 |   explicit IDRSTABL(const EigenBase<MatrixDerived> &A) : Base(A.derived()), m_L(2), m_S(4) {} | 
 |  | 
 |   /** \internal */ | 
 |   /**     Loops over the number of columns of b and does the following: | 
 |                                   1. sets the tolerance and maxIterations | 
 |                                   2. Calls the function that has the core solver | 
 |      routine | 
 |   */ | 
 |   template <typename Rhs, typename Dest> | 
 |   void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const { | 
 |     m_iterations = Base::maxIterations(); | 
 |     m_error = Base::m_tolerance; | 
 |     bool ret = internal::idrstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L, m_S); | 
 |  | 
 |     m_info = (!ret) ? NumericalIssue : m_error <= 10 * Base::m_tolerance ? Success : NoConvergence; | 
 |   } | 
 |  | 
 |   /** Sets the parameter L, indicating the amount of minimize residual steps are | 
 |    * used. */ | 
 |   void setL(Index L) { | 
 |     eigen_assert(L >= 1 && "L needs to be positive"); | 
 |     m_L = L; | 
 |   } | 
 |   /** Sets the parameter S, indicating the dimension of the shadow residual | 
 |    * space.. */ | 
 |   void setS(Index S) { | 
 |     eigen_assert(S >= 1 && "S needs to be positive"); | 
 |     m_S = S; | 
 |   } | 
 | }; | 
 |  | 
 | }  // namespace Eigen | 
 |  | 
 | #endif /* EIGEN_IDRSTABL_H */ |