Merged in advanpix/eigen-mp-devs (pull request PR-32) Fixes for SparseMatrix to support non-POD scalar types
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 358b318..3efdcfe 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h
@@ -21,6 +21,9 @@ * \param XprType the type of the expression in which we are taking a block * \param BlockRows the number of rows of the block we are taking at compile time (optional) * \param BlockCols the number of columns of the block we are taking at compile time (optional) + * \param InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. * * This class represents an expression of either a fixed-size or dynamic-size block. It is the return * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block<int,int>(Index,Index) and
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 887e718..419f901 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h
@@ -208,9 +208,9 @@ /** type of a vector */ typedef Matrix<Scalar,Dim,1> VectorType; /** type of a read/write reference to the translation part of the rotation */ - typedef Block<MatrixType,Dim,1,int(Mode)==(AffineCompact)> TranslationPart; + typedef Block<MatrixType,Dim,1,!(internal::traits<MatrixType>::Flags & RowMajorBit)> TranslationPart; /** type of a read reference to the translation part of the rotation */ - typedef const Block<ConstMatrixType,Dim,1,int(Mode)==(AffineCompact)> ConstTranslationPart; + typedef const Block<ConstMatrixType,Dim,1,!(internal::traits<MatrixType>::Flags & RowMajorBit)> ConstTranslationPart; /** corresponding translation type */ typedef Translation<Scalar,Dim> TranslationType;
diff --git a/Eigen/src/SVD/UpperBidiagonalization.h b/Eigen/src/SVD/UpperBidiagonalization.h index 587de37..6cac3af 100644 --- a/Eigen/src/SVD/UpperBidiagonalization.h +++ b/Eigen/src/SVD/UpperBidiagonalization.h
@@ -31,7 +31,7 @@ typedef typename MatrixType::Index Index; typedef Matrix<Scalar, 1, ColsAtCompileTime> RowVectorType; typedef Matrix<Scalar, RowsAtCompileTime, 1> ColVectorType; - typedef BandMatrix<RealScalar, ColsAtCompileTime, ColsAtCompileTime, 1, 0> BidiagonalType; + typedef BandMatrix<RealScalar, ColsAtCompileTime, ColsAtCompileTime, 1, 0, RowMajor> BidiagonalType; typedef Matrix<Scalar, ColsAtCompileTime, 1> DiagVectorType; typedef Matrix<Scalar, ColsAtCompileTimeMinusOne, 1> SuperDiagVectorType; typedef HouseholderSequence< @@ -61,6 +61,7 @@ } UpperBidiagonalization& compute(const MatrixType& matrix); + UpperBidiagonalization& computeUnblocked(const MatrixType& matrix); const MatrixType& householder() const { return m_householder; } const BidiagonalType& bidiagonal() const { return m_bidiagonal; } @@ -85,45 +86,294 @@ bool m_isInitialized; }; -template<typename _MatrixType> -UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::compute(const _MatrixType& matrix) +// Standard upper bidiagonalization without fancy optimizations +// This version should be faster for small matrix size +template<typename MatrixType> +void upperbidiagonalization_inplace_unblocked(MatrixType& mat, + typename MatrixType::RealScalar *diagonal, + typename MatrixType::RealScalar *upper_diagonal, + typename MatrixType::Scalar* tempData = 0) { - Index rows = matrix.rows(); - Index cols = matrix.cols(); - - eigen_assert(rows >= cols && "UpperBidiagonalization is only for matrices satisfying rows>=cols."); - - m_householder = matrix; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; - ColVectorType temp(rows); + Index rows = mat.rows(); + Index cols = mat.cols(); + Index size = (std::min)(rows, cols); + + typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixType::MaxRowsAtCompileTime,1> TempType; + TempType tempVector; + if(tempData==0) + { + tempVector.resize(rows); + tempData = tempVector.data(); + } for (Index k = 0; /* breaks at k==cols-1 below */ ; ++k) { Index remainingRows = rows - k; Index remainingCols = cols - k - 1; - // construct left householder transform in-place in m_householder - m_householder.col(k).tail(remainingRows) - .makeHouseholderInPlace(m_householder.coeffRef(k,k), - m_bidiagonal.template diagonal<0>().coeffRef(k)); - // apply householder transform to remaining part of m_householder on the left - m_householder.bottomRightCorner(remainingRows, remainingCols) - .applyHouseholderOnTheLeft(m_householder.col(k).tail(remainingRows-1), - m_householder.coeff(k,k), - temp.data()); + // construct left householder transform in-place in A + mat.col(k).tail(remainingRows) + .makeHouseholderInPlace(mat.coeffRef(k,k), diagonal[k]); + // apply householder transform to remaining part of A on the left + mat.bottomRightCorner(remainingRows, remainingCols) + .applyHouseholderOnTheLeft(mat.col(k).tail(remainingRows-1), mat.coeff(k,k), tempData); if(k == cols-1) break; - - // construct right householder transform in-place in m_householder - m_householder.row(k).tail(remainingCols) - .makeHouseholderInPlace(m_householder.coeffRef(k,k+1), - m_bidiagonal.template diagonal<1>().coeffRef(k)); - // apply householder transform to remaining part of m_householder on the left - m_householder.bottomRightCorner(remainingRows-1, remainingCols) - .applyHouseholderOnTheRight(m_householder.row(k).tail(remainingCols-1).transpose(), - m_householder.coeff(k,k+1), - temp.data()); + + // construct right householder transform in-place in mat + mat.row(k).tail(remainingCols) + .makeHouseholderInPlace(mat.coeffRef(k,k+1), upper_diagonal[k]); + // apply householder transform to remaining part of mat on the left + mat.bottomRightCorner(remainingRows-1, remainingCols) + .applyHouseholderOnTheRight(mat.row(k).tail(remainingCols-1).transpose(), mat.coeff(k,k+1), tempData); } +} + +/** \internal + * Helper routine for the block reduction to upper bidiagonal form. + * + * Let's partition the matrix A: + * + * | A00 A01 | + * A = | | + * | A10 A11 | + * + * This function reduces to bidiagonal form the left \c rows x \a blockSize vertical panel [A00/A10] + * and the \a blockSize x \c cols horizontal panel [A00 A01] of the matrix \a A. The bottom-right block A11 + * is updated using matrix-matrix products: + * A22 -= V * Y^T - X * U^T + * where V and U contains the left and right Householder vectors. U and V are stored in A10, and A01 + * respectively, and the update matrices X and Y are computed during the reduction. + * + */ +template<typename MatrixType> +void upperbidiagonalization_blocked_helper(MatrixType& A, + typename MatrixType::RealScalar *diagonal, + typename MatrixType::RealScalar *upper_diagonal, + typename MatrixType::Index bs, + Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic> > X, + Ref<Matrix<typename MatrixType::Scalar, Dynamic, Dynamic> > Y) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Ref<Matrix<Scalar, Dynamic, 1> > SubColumnType; + typedef Ref<Matrix<Scalar, 1, Dynamic>, 0, InnerStride<> > SubRowType; + typedef Ref<Matrix<Scalar, Dynamic, Dynamic> > SubMatType; + + Index brows = A.rows(); + Index bcols = A.cols(); + + Scalar tau_u, tau_u_prev(0), tau_v; + + for(Index k = 0; k < bs; ++k) + { + Index remainingRows = brows - k; + Index remainingSizeInBlock = bs - k - 1; + Index remainingCols = bcols - k - 1; + + SubMatType X_k1( X.block(k,0, remainingRows,k) ); + SubMatType V_k1( A.block(k,0, remainingRows,k) ); + + // 1 - update the k-th column of A + SubColumnType v_k = A.col(k).tail(remainingRows); + v_k -= V_k1 * Y.row(k).head(k).adjoint(); + if(k) v_k -= X_k1 * A.col(k).head(k); + + // 2 - construct left Householder transform in-place + v_k.makeHouseholderInPlace(tau_v, diagonal[k]); + + if(k+1<bcols) + { + SubMatType Y_k ( Y.block(k+1,0, remainingCols, k+1) ); + SubMatType U_k1 ( A.block(0,k+1, k,remainingCols) ); + + // this eases the application of Householder transforAions + // A(k,k) will store tau_v later + A(k,k) = Scalar(1); + + // 3 - Compute y_k^T = tau_v * ( A^T*v_k - Y_k-1*V_k-1^T*v_k - U_k-1*X_k-1^T*v_k ) + { + SubColumnType y_k( Y.col(k).tail(remainingCols) ); + + // let's use the begining of column k of Y as a temporary vector + SubColumnType tmp( Y.col(k).head(k) ); + y_k.noalias() = A.block(k,k+1, remainingRows,remainingCols).adjoint() * v_k; // bottleneck + tmp.noalias() = V_k1.adjoint() * v_k; + y_k.noalias() -= Y_k.leftCols(k) * tmp; + tmp.noalias() = X_k1.adjoint() * v_k; + y_k.noalias() -= U_k1.adjoint() * tmp; + y_k *= numext::conj(tau_v); + } + + // 4 - update k-th row of A (it will become u_k) + SubRowType u_k( A.row(k).tail(remainingCols) ); + u_k = u_k.conjugate(); + { + u_k -= Y_k * A.row(k).head(k+1).adjoint(); + if(k) u_k -= U_k1.adjoint() * X.row(k).head(k).adjoint(); + } + + // 5 - construct right Householder transform in-placecols + u_k.makeHouseholderInPlace(tau_u, upper_diagonal[k]); + + // this eases the application of Householder transforAions + // A(k,k+1) will store tau_u later + A(k,k+1) = Scalar(1); + + // 6 - Compute x_k = tau_u * ( A*u_k - X_k-1*U_k-1^T*u_k - V_k*Y_k^T*u_k ) + { + SubColumnType x_k ( X.col(k).tail(remainingRows-1) ); + + // let's use the begining of column k of X as a temporary vectors + // note that tmp0 and tmp1 overlaps + SubColumnType tmp0 ( X.col(k).head(k) ), + tmp1 ( X.col(k).head(k+1) ); + + x_k.noalias() = A.block(k+1,k+1, remainingRows-1,remainingCols) * u_k.transpose(); // bottleneck + tmp0.noalias() = U_k1 * u_k.transpose(); + x_k.noalias() -= X_k1.bottomRows(remainingRows-1) * tmp0; + tmp1.noalias() = Y_k.adjoint() * u_k.transpose(); + x_k.noalias() -= A.block(k+1,0, remainingRows-1,k+1) * tmp1; + x_k *= numext::conj(tau_u); + tau_u = numext::conj(tau_u); + u_k = u_k.conjugate(); + } + + if(k>0) A.coeffRef(k-1,k) = tau_u_prev; + tau_u_prev = tau_u; + } + else + A.coeffRef(k-1,k) = tau_u_prev; + + A.coeffRef(k,k) = tau_v; + } + + if(bs<bcols) + A.coeffRef(bs-1,bs) = tau_u_prev; + + // update A22 + if(bcols>bs && brows>bs) + { + SubMatType A11( A.bottomRightCorner(brows-bs,bcols-bs) ); + SubMatType A10( A.block(bs,0, brows-bs,bs) ); + SubMatType A01( A.block(0,bs, bs,bcols-bs) ); + Scalar tmp = A01(bs-1,0); + A01(bs-1,0) = 1; + A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); + A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; + A01(bs-1,0) = tmp; + } +} + +/** \internal + * + * Implementation of a block-bidiagonal reduction. + * It is based on the following paper: + * The Design of a Parallel Dense Linear Algebra Software Library: Reduction to Hessenberg, Tridiagonal, and Bidiagonal Form. + * by Jaeyoung Choi, Jack J. Dongarra, David W. Walker. (1995) + * section 3.3 + */ +template<typename MatrixType, typename BidiagType> +void upperbidiagonalization_inplace_blocked(MatrixType& A, BidiagType& bidiagonal, + typename MatrixType::Index maxBlockSize=32, + typename MatrixType::Scalar* /*tempData*/ = 0) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Block<MatrixType,Dynamic,Dynamic> BlockType; + + Index rows = A.rows(); + Index cols = A.cols(); + Index size = (std::min)(rows, cols); + + Matrix<Scalar,MatrixType::RowsAtCompileTime,Dynamic,ColMajor,MatrixType::MaxRowsAtCompileTime> X(rows,maxBlockSize); + Matrix<Scalar,MatrixType::ColsAtCompileTime,Dynamic,ColMajor,MatrixType::MaxColsAtCompileTime> Y(cols,maxBlockSize); + Index blockSize = (std::min)(maxBlockSize,size); + + Index k = 0; + for(k = 0; k < size; k += blockSize) + { + Index bs = (std::min)(size-k,blockSize); // actual size of the block + Index brows = rows - k; // rows of the block + Index bcols = cols - k; // columns of the block + + // partition the matrix A: + // + // | A00 A01 A02 | + // | | + // A = | A10 A11 A12 | + // | | + // | A20 A21 A22 | + // + // where A11 is a bs x bs diagonal block, + // and let: + // | A11 A12 | + // B = | | + // | A21 A22 | + + BlockType B = A.block(k,k,brows,bcols); + + // This stage performs the bidiagonalization of A11, A21, A12, and updating of A22. + // Finally, the algorithm continue on the updated A22. + // + // However, if B is too small, or A22 empty, then let's use an unblocked strategy + if(k+bs==cols || bcols<48) // somewhat arbitrary threshold + { + upperbidiagonalization_inplace_unblocked(B, + &(bidiagonal.template diagonal<0>().coeffRef(k)), + &(bidiagonal.template diagonal<1>().coeffRef(k)), + X.data() + ); + break; // We're done + } + else + { + upperbidiagonalization_blocked_helper<BlockType>( B, + &(bidiagonal.template diagonal<0>().coeffRef(k)), + &(bidiagonal.template diagonal<1>().coeffRef(k)), + bs, + X.topLeftCorner(brows,bs), + Y.topLeftCorner(bcols,bs) + ); + } + } +} + +template<typename _MatrixType> +UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::computeUnblocked(const _MatrixType& matrix) +{ + Index rows = matrix.rows(); + Index cols = matrix.cols(); + + eigen_assert(rows >= cols && "UpperBidiagonalization is only for Arices satisfying rows>=cols."); + + m_householder = matrix; + + ColVectorType temp(rows); + + upperbidiagonalization_inplace_unblocked(m_householder, + &(m_bidiagonal.template diagonal<0>().coeffRef(0)), + &(m_bidiagonal.template diagonal<1>().coeffRef(0)), + temp.data()); + + m_isInitialized = true; + return *this; +} + +template<typename _MatrixType> +UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::compute(const _MatrixType& matrix) +{ + Index rows = matrix.rows(); + Index cols = matrix.cols(); + + eigen_assert(rows >= cols && "UpperBidiagonalization is only for Arices satisfying rows>=cols."); + + m_householder = matrix; + upperbidiagonalization_inplace_blocked(m_householder, m_bidiagonal); + m_isInitialized = true; return *this; }
diff --git a/Eigen/src/SparseCore/SparseBlock.h b/Eigen/src/SparseCore/SparseBlock.h index efd4428..a0ba764 100644 --- a/Eigen/src/SparseCore/SparseBlock.h +++ b/Eigen/src/SparseCore/SparseBlock.h
@@ -394,6 +394,8 @@ protected: friend class InnerIterator; friend class ReverseInnerIterator; + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) typename XprType::Nested m_matrix; const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h index 6248eee..d46ccc1 100644 --- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h +++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -66,7 +66,7 @@ } /* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = float) */ -int matrix_log_get_pade_degree(float normTminusI) +inline int matrix_log_get_pade_degree(float normTminusI) { const float maxNormForPade[] = { 2.5111573934555054e-1 /* degree = 3 */ , 4.0535837411880493e-1, 5.3149729967117310e-1 }; @@ -80,7 +80,7 @@ } /* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = double) */ -int matrix_log_get_pade_degree(double normTminusI) +inline int matrix_log_get_pade_degree(double normTminusI) { const double maxNormForPade[] = { 1.6206284795015624e-2 /* degree = 3 */ , 5.3873532631381171e-2, 1.1352802267628681e-1, 1.8662860613541288e-1, 2.642960831111435e-1 }; @@ -94,7 +94,7 @@ } /* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = long double) */ -int matrix_log_get_pade_degree(long double normTminusI) +inline int matrix_log_get_pade_degree(long double normTminusI) { #if LDBL_MANT_DIG == 53 // double precision const long double maxNormForPade[] = { 1.6206284795015624e-2L /* degree = 3 */ , 5.3873532631381171e-2L,
diff --git a/unsupported/Eigen/src/SVD/BDCSVD.h b/unsupported/Eigen/src/SVD/BDCSVD.h index d5f0a3f..80006fd 100644 --- a/unsupported/Eigen/src/SVD/BDCSVD.h +++ b/unsupported/Eigen/src/SVD/BDCSVD.h
@@ -70,6 +70,7 @@ typedef Matrix<Scalar, Dynamic, Dynamic> MatrixX; typedef Matrix<RealScalar, Dynamic, Dynamic> MatrixXr; typedef Matrix<RealScalar, Dynamic, 1> VectorType; + typedef Array<RealScalar, Dynamic, 1> ArrayXr; /** \brief Default Constructor. * @@ -78,7 +79,7 @@ */ BDCSVD() : SVDBase<_MatrixType>::SVDBase(), - algoswap(ALGOSWAP) + algoswap(ALGOSWAP), m_numIters(0) {} @@ -90,7 +91,7 @@ */ BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0) : SVDBase<_MatrixType>::SVDBase(), - algoswap(ALGOSWAP) + algoswap(ALGOSWAP), m_numIters(0) { allocate(rows, cols, computationOptions); } @@ -107,7 +108,7 @@ */ BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0) : SVDBase<_MatrixType>::SVDBase(), - algoswap(ALGOSWAP) + algoswap(ALGOSWAP), m_numIters(0) { compute(matrix, computationOptions); } @@ -197,9 +198,14 @@ private: void allocate(Index rows, Index cols, unsigned int computationOptions); - void divide (Index firstCol, Index lastCol, Index firstRowW, - Index firstColW, Index shift); + void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift); void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V); + void computeSingVals(const ArrayXr& col0, const ArrayXr& diag, VectorType& singVals, + ArrayXr& shifts, ArrayXr& mus); + void perturbCol0(const ArrayXr& col0, const ArrayXr& diag, const VectorType& singVals, + const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat); + void computeSingVecs(const ArrayXr& zhat, const ArrayXr& diag, const VectorType& singVals, + const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V); void deflation43(Index firstCol, Index shift, Index i, Index size); void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size); void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift); @@ -212,7 +218,9 @@ Index nRec; int algoswap; bool isTranspose, compU, compV; - + +public: + int m_numIters; }; //end class BDCSVD @@ -484,12 +492,9 @@ template <typename MatrixType> void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V) { - using std::abs; - Block<MatrixXr> block = m_computed.block(firstCol, firstCol, n, n); - // TODO Get rid of these copies (?) - Array<RealScalar, Dynamic, 1> col0 = m_computed.block(firstCol, firstCol, n, 1); - Array<RealScalar, Dynamic, 1> diag = m_computed.block(firstCol, firstCol, n, n).diagonal(); + ArrayXr col0 = m_computed.block(firstCol, firstCol, n, 1); + ArrayXr diag = m_computed.block(firstCol, firstCol, n, n).diagonal(); diag(0) = 0; // compute singular values and vectors (in decreasing order) @@ -499,7 +504,25 @@ if (col0.hasNaN() || diag.hasNaN()) return; - Array<RealScalar, Dynamic, 1> shifts(n), mus(n); + ArrayXr shifts(n), mus(n), zhat(n); + computeSingVals(col0, diag, singVals, shifts, mus); + perturbCol0(col0, diag, singVals, shifts, mus, zhat); + computeSingVecs(zhat, diag, singVals, shifts, mus, U, V); + + // Reverse order so that singular values in increased order + singVals.reverseInPlace(); + U.leftCols(n) = U.leftCols(n).rowwise().reverse().eval(); + if (compV) V = V.rowwise().reverse().eval(); +} + +template <typename MatrixType> +void BDCSVD<MatrixType>::computeSingVals(const ArrayXr& col0, const ArrayXr& diag, + VectorType& singVals, ArrayXr& shifts, ArrayXr& mus) +{ + using std::abs; + using std::swap; + + Index n = col0.size(); for (Index k = 0; k < n; ++k) { if (col0(k) == 0) { // entry is deflated, so singular value is on diagonal @@ -509,7 +532,7 @@ continue; } - // otherwise, use bisection to find singular value + // otherwise, use secular equation to find singular value RealScalar left = diag(k); RealScalar right = (k != n-1) ? diag(k+1) : (diag(n-1) + col0.matrix().norm()); @@ -518,49 +541,98 @@ RealScalar fMid = 1 + (col0.square() / ((diag + mid) * (diag - mid))).sum(); RealScalar shift; - if (k == 0 || fMid > 0) shift = left; + if (k == n-1 || fMid > 0) shift = left; else shift = right; - - // measure everything relative to shifted - Array<RealScalar, Dynamic, 1> diagShifted = diag - shift; - RealScalar leftShifted, rightShifted; + + // measure everything relative to shift + ArrayXr diagShifted = diag - shift; + + // initial guess + RealScalar muPrev, muCur; if (shift == left) { - leftShifted = 1e-30; - if (k == 0) rightShifted = right - left; - else rightShifted = (right - left) * 0.6; // theoretically we can take 0.5, but let's be safe + muPrev = (right - left) * 0.1; + if (k == n-1) muCur = right - left; + else muCur = (right - left) * 0.5; } else { - leftShifted = -(right - left) * 0.6; - rightShifted = -1e-30; + muPrev = -(right - left) * 0.1; + muCur = -(right - left) * 0.5; } - RealScalar fLeft = 1 + (col0.square() / ((diagShifted - leftShifted) * (diag + shift + leftShifted))).sum(); - RealScalar fRight = 1 + (col0.square() / ((diagShifted - rightShifted) * (diag + shift + rightShifted))).sum(); - assert(fLeft * fRight < 0); - - while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * (std::max)(abs(leftShifted), abs(rightShifted))) { - RealScalar midShifted = (leftShifted + rightShifted) / 2; - RealScalar fMid = 1 + (col0.square() / ((diagShifted - midShifted) * (diag + shift + midShifted))).sum(); - if (fLeft * fMid < 0) { - rightShifted = midShifted; - fRight = fMid; + RealScalar fPrev = 1 + (col0.square() / ((diagShifted - muPrev) * (diag + shift + muPrev))).sum(); + RealScalar fCur = 1 + (col0.square() / ((diagShifted - muCur) * (diag + shift + muCur))).sum(); + if (abs(fPrev) < abs(fCur)) { + swap(fPrev, fCur); + swap(muPrev, muCur); + } + + // rational interpolation: fit a function of the form a / mu + b through the two previous + // iterates and use its zero to compute the next iterate + bool useBisection = false; + while (abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * (std::max)(abs(muCur), abs(muPrev)) && fCur != fPrev && !useBisection) { + ++m_numIters; + + RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); + RealScalar b = fCur - a / muCur; + + muPrev = muCur; + fPrev = fCur; + muCur = -a / b; + fCur = 1 + (col0.square() / ((diagShifted - muCur) * (diag + shift + muCur))).sum(); + + if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; + if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; + } + + // fall back on bisection method if rational interpolation did not work + if (useBisection) { + RealScalar leftShifted, rightShifted; + if (shift == left) { + leftShifted = 1e-30; + if (k == 0) rightShifted = right - left; + else rightShifted = (right - left) * 0.6; // theoretically we can take 0.5, but let's be safe } else { - leftShifted = midShifted; - fLeft = fMid; + leftShifted = -(right - left) * 0.6; + rightShifted = -1e-30; } + + RealScalar fLeft = 1 + (col0.square() / ((diagShifted - leftShifted) * (diag + shift + leftShifted))).sum(); + RealScalar fRight = 1 + (col0.square() / ((diagShifted - rightShifted) * (diag + shift + rightShifted))).sum(); + assert(fLeft * fRight < 0); + + while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * (std::max)(abs(leftShifted), abs(rightShifted))) { + RealScalar midShifted = (leftShifted + rightShifted) / 2; + RealScalar fMid = 1 + (col0.square() / ((diagShifted - midShifted) * (diag + shift + midShifted))).sum(); + if (fLeft * fMid < 0) { + rightShifted = midShifted; + fRight = fMid; + } else { + leftShifted = midShifted; + fLeft = fMid; + } + } + + muCur = (leftShifted + rightShifted) / 2; } - singVals[k] = shift + (leftShifted + rightShifted) / 2; + singVals[k] = shift + muCur; shifts[k] = shift; - mus[k] = (leftShifted + rightShifted) / 2; + mus[k] = muCur; // perturb singular value slightly if it equals diagonal entry to avoid division by zero later // (deflation is supposed to avoid this from happening) if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon(); if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon(); } +} - // zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1) - Array<RealScalar, Dynamic, 1> zhat(n); + +// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1) +template <typename MatrixType> +void BDCSVD<MatrixType>::perturbCol0 + (const ArrayXr& col0, const ArrayXr& diag, const VectorType& singVals, + const ArrayXr& shifts, const ArrayXr& mus, ArrayXr& zhat) +{ + Index n = col0.size(); for (Index k = 0; k < n; ++k) { if (col0(k) == 0) zhat(k) = 0; @@ -583,12 +655,15 @@ else zhat(k) = -tmp; } } +} - MatrixXr Mhat = MatrixXr::Zero(n,n); - Mhat.diagonal() = diag; - Mhat.col(0) = zhat; - - // compute singular vectors +// compute singular vectors +template <typename MatrixType> +void BDCSVD<MatrixType>::computeSingVecs + (const ArrayXr& zhat, const ArrayXr& diag, const VectorType& singVals, + const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V) +{ + Index n = zhat.size(); for (Index k = 0; k < n; ++k) { if (zhat(k) == 0) { U.col(k) = VectorType::Unit(n+1, k); @@ -606,11 +681,6 @@ } } U.col(n) = VectorType::Unit(n+1, n); - - // Reverse order so that singular values in increased order - singVals.reverseInPlace(); - U.leftCols(n) = U.leftCols(n).rowwise().reverse().eval(); - if (compV) V = V.rowwise().reverse().eval(); } @@ -692,8 +762,11 @@ template <typename MatrixType> void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift){ //condition 4.1 - RealScalar EPS = NumTraits<RealScalar>::epsilon() * 10 * (std::max<RealScalar>(m_computed(firstCol + shift + 1, firstCol + shift + 1), m_computed(firstCol + k, firstCol + k))); + using std::sqrt; const Index length = lastCol + 1 - firstCol; + RealScalar norm1 = m_computed.block(firstCol+shift, firstCol+shift, length, 1).squaredNorm(); + RealScalar norm2 = m_computed.block(firstCol+shift, firstCol+shift, length, length).diagonal().squaredNorm(); + RealScalar EPS = 10 * NumTraits<RealScalar>::epsilon() * sqrt(norm1 + norm2); if (m_computed(firstCol + shift, firstCol + shift) < EPS){ m_computed(firstCol + shift, firstCol + shift) = EPS; }