BDCSVD: eliminate temporaries flagged by long-standing FIXMEs libeigen/eigen!2450 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index 35c0a92..2a8499a 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h
@@ -256,6 +256,9 @@ internal::UpperBidiagonalization<MatrixX> bid; MatrixX copyWorkspace; MatrixX reducedTriangle; + // Reused workspace for HouseholderSequence::applyThisOnTheLeft in copyUV(). + // Without this, each apply allocates a fresh row vector. + Matrix<Scalar, 1, Dynamic, RowMajor> m_householderWorkspace; using Base::m_computationOptions; using Base::m_computeThinU; @@ -363,9 +366,14 @@ //**** step 2 - Divide & Conquer m_impl.naiveU().setZero(); m_impl.naiveV().setZero(); - // FIXME: this line involves a temporary matrix. - m_impl.computed().topRows(diagSize()) = bid.bidiagonal().toDenseMatrix().transpose(); - m_impl.computed().template bottomRows<1>().setZero(); + // The transposed bidiagonal has only the main diagonal and one sub-diagonal; + // fill those directly instead of materializing a dense temporary. + // Note: BandMatrix::diagonal<N>() const has a latent type bug (returns + // Block<CoefficientsType, ...> instead of Block<const CoefficientsType, ...>), + // so use the index-based overload which is correctly const-qualified. + m_impl.computed().setZero(); + m_impl.computed().topRows(diagSize()).diagonal() = bid.bidiagonal().diagonal(); + m_impl.computed().topRows(diagSize()).template diagonal<-1>() = bid.bidiagonal().diagonal(1); m_impl.divide(0, diagSize() - 1, 0, 0, 0); m_info = m_impl.info(); m_numIters = m_impl.numIters(); @@ -410,28 +418,33 @@ EIGEN_DONT_INLINE void BDCSVD<MatrixType, Options>::copyUV(const HouseholderU& householderU, const HouseholderV& householderV, const NaiveU& naiveU, const NaiveV& naiveV) { - // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa + // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa. + // Cast the diagSize x diagSize block (rather than the full naive matrix) to avoid materializing + // a full-size temporary when Scalar != RealScalar; reuse m_householderWorkspace across the two + // applyThisOnTheLeft calls so each does not allocate a fresh row vector. if (computeU()) { Index Ucols = m_computeThinU ? diagSize() : rows(); m_matrixU = MatrixX::Identity(rows(), Ucols); m_matrixU.topLeftCorner(diagSize(), diagSize()) = - naiveV.template cast<Scalar>().topLeftCorner(diagSize(), diagSize()); - // FIXME: the following conditionals involve temporary buffers. - if (m_useQrDecomp) - m_matrixU.topLeftCorner(householderU.cols(), diagSize()).applyOnTheLeft(householderU); - else - m_matrixU.applyOnTheLeft(householderU); + naiveV.topLeftCorner(diagSize(), diagSize()).template cast<Scalar>(); + if (m_useQrDecomp) { + auto sub = m_matrixU.topLeftCorner(householderU.cols(), diagSize()); + householderU.applyThisOnTheLeft(sub, m_householderWorkspace); + } else { + householderU.applyThisOnTheLeft(m_matrixU, m_householderWorkspace); + } } if (computeV()) { Index Vcols = m_computeThinV ? diagSize() : cols(); m_matrixV = MatrixX::Identity(cols(), Vcols); m_matrixV.topLeftCorner(diagSize(), diagSize()) = - naiveU.template cast<Scalar>().topLeftCorner(diagSize(), diagSize()); - // FIXME: the following conditionals involve temporary buffers. - if (m_useQrDecomp) - m_matrixV.topLeftCorner(householderV.cols(), diagSize()).applyOnTheLeft(householderV); - else - m_matrixV.applyOnTheLeft(householderV); + naiveU.topLeftCorner(diagSize(), diagSize()).template cast<Scalar>(); + if (m_useQrDecomp) { + auto sub = m_matrixV.topLeftCorner(householderV.cols(), diagSize()); + householderV.applyThisOnTheLeft(sub, m_householderWorkspace); + } else { + householderV.applyThisOnTheLeft(m_matrixV, m_householderWorkspace); + } } }
diff --git a/Eigen/src/SVD/BDCSVDImpl.h b/Eigen/src/SVD/BDCSVDImpl.h index 7ba5bbf..9b818fe 100644 --- a/Eigen/src/SVD/BDCSVDImpl.h +++ b/Eigen/src/SVD/BDCSVDImpl.h
@@ -86,6 +86,10 @@ MatrixXr m_computed; ArrayXr m_workspace; ArrayXi m_workspaceI; + // Reused base-case JacobiSVDs (one per option set) so that recursive divide() + // calls don't reallocate JacobiSVD's internal U/V/sigma buffers each time. + JacobiSVD<MatrixXr, ComputeFullU> m_baseSvdU; + JacobiSVD<MatrixXr, ComputeFullU | ComputeFullV> m_baseSvdUV; int m_algoswap; bool m_compU, m_compV; int m_numIters; @@ -201,13 +205,10 @@ // We use the other algorithm which is more efficient for small // matrices. if (n < m_algoswap) { - // FIXME: this block involves temporaries. if (m_compV) { - JacobiSVD<MatrixXr, ComputeFullU | ComputeFullV> baseSvd; - computeBaseCase(baseSvd, n, firstCol, firstRowW, firstColW, shift); + computeBaseCase(m_baseSvdUV, n, firstCol, firstRowW, firstColW, shift); } else { - JacobiSVD<MatrixXr, ComputeFullU> baseSvd; - computeBaseCase(baseSvd, n, firstCol, firstRowW, firstColW, shift); + computeBaseCase(m_baseSvdU, n, firstCol, firstRowW, firstColW, shift); } return; }