blob: 78f72d59babac13b71bd44b563a4fbc898cf42e0 [file] [edit]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// SPDX-FileCopyrightText: The Eigen Authors
// SPDX-License-Identifier: MPL-2.0
#ifndef EIGEN_RANDCOLPIVOTINGHOUSEHOLDERQR_H
#define EIGEN_RANDCOLPIVOTINGHOUSEHOLDERQR_H
#include <random>
// IWYU pragma: private
#include "./InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
template <typename MatrixType_, typename PermutationIndex_>
struct traits<RandColPivHouseholderQR<MatrixType_, PermutationIndex_>> : traits<MatrixType_> {
typedef MatrixXpr XprKind;
typedef SolverStorage StorageKind;
typedef PermutationIndex_ PermutationIndex;
enum { Flags = 0 };
};
// Fill `mat` with iid samples drawn from a real standard normal distribution.
template <typename Derived, typename Engine>
EIGEN_STRONG_INLINE std::enable_if_t<!NumTraits<typename Derived::Scalar>::IsComplex> fill_gaussian(
MatrixBase<Derived>& mat, Engine& engine) {
typedef typename Derived::Scalar Scalar;
std::normal_distribution<Scalar> dist(Scalar(0), Scalar(1));
for (Index j = 0; j < mat.cols(); ++j)
for (Index i = 0; i < mat.rows(); ++i) mat.coeffRef(i, j) = dist(engine);
}
// Fill `mat` with iid samples drawn from a complex standard normal
// distribution (real and imaginary parts sampled independently).
template <typename Derived, typename Engine>
EIGEN_STRONG_INLINE std::enable_if_t<NumTraits<typename Derived::Scalar>::IsComplex> fill_gaussian(
MatrixBase<Derived>& mat, Engine& engine) {
typedef typename Derived::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
std::normal_distribution<RealScalar> dist(RealScalar(0), RealScalar(1));
for (Index j = 0; j < mat.cols(); ++j)
for (Index i = 0; i < mat.rows(); ++i) mat.coeffRef(i, j) = Scalar(dist(engine), dist(engine));
}
// Stable LAWN-176 column-norm downdate after a Householder reflector has
// been applied. `pivot_entry` is the entry of the just-pivoted row above
// column `j`; `tail_norm_fn` recomputes the trailing-column norm from
// scratch when the running estimate has lost too much precision (see
// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf).
template <typename RealScalar, typename Scalar, typename RecomputeFn>
EIGEN_STRONG_INLINE void lawn176_norm_downdate(RealScalar& norm_updated, RealScalar& norm_direct, Scalar pivot_entry,
RealScalar downdate_threshold, RecomputeFn&& tail_norm_fn) {
using std::abs;
if (numext::is_exactly_zero(norm_updated)) return;
RealScalar t = abs(pivot_entry) / norm_updated;
t = (RealScalar(1) + t) * (RealScalar(1) - t);
if (t < RealScalar(0)) t = RealScalar(0);
RealScalar t2 = t * numext::abs2<RealScalar>(norm_updated / norm_direct);
if (t2 <= downdate_threshold) {
norm_direct = tail_norm_fn();
norm_updated = norm_direct;
} else {
norm_updated *= numext::sqrt(t);
}
}
} // end namespace internal
/** \ingroup QR_Module
*
* \class RandColPivHouseholderQR
*
* \brief Randomized blocked Householder rank-revealing QR with column pivoting
*
* \tparam MatrixType_ the type of the matrix being decomposed.
* \tparam PermutationIndex_ the type of the permutation indices.
*
* Computes \f$ \mathbf{A} \mathbf{P} = \mathbf{Q} \mathbf{R} \f$ using the
* BQRRP framework introduced by Melnichenko, Murray, Killian, Demmel,
* Mahoney, Luszczek, and Gates, *Anatomy of High-Performance Column-Pivoted
* QR Decomposition*, arXiv:2507.00976 (2025). BQRRP is itself a refinement
* of the earlier randomized blocked QRCP schemes of Duersch and Gu (RQRCP,
* SIAM J. Sci. Comput. 39(4):C263–C291, 2017, arXiv:1509.06820) and
* Martinsson, Quintana-Ortí, Heavner, and van de Geijn (HQRRP, SIAM J. Sci.
* Comput. 39(2):C96–C115, 2017, arXiv:1512.02671).
*
* The classical Businger–Golub pivot scan is replaced by selecting blocks
* of \c b pivots from a small Gaussian sketch \f$ \mathbf{Y} = \mathbf{G}
* \mathbf{A} \f$ with \f$ \mathbf{G} \in \mathbb{R}^{b \times m} \f$
* having i.i.d.\ standard normal entries. Following the BQRRP paper, pivot
* decisions on the sketch are produced by a partial-pivoted LU on the
* transposed sketch (cheap and robust), the panel itself is factored with
* unpivoted blocked Householder QR, the trailing block is updated through
* the compact-WY apply, and the sketch is downdated by the closed-form
* Duersch–Gu update. After each block step the asymptotic flop count
* matches that of an unpivoted blocked Householder QR
* (\f$ 2mn^2 - \tfrac{2}{3}n^3 \f$ for \f$ m \ge n \f$); almost all
* computation is BLAS-3, which lifts the BLAS-2 ceiling that limits
* ColPivHouseholderQR.
*
* The pivot-quality tradeoff is empirically minor: on the matrices in the
* cited papers, the rank-revealing behavior is comparable to classical
* column pivoting (LAPACK \c geqp3).
*
* The block size \c b can be configured via setBlockSize(); a value of
* \c 0 (the default) leaves the algorithm free to pick a size that scales
* with the input. The seed for the internal RNG can be fixed with
* setSeed() for reproducible output.
*
* \note The \c setOversampling() / \c oversampling() accessors are
* retained as documented no-ops for source compatibility with earlier
* HQRRP-style versions of this class. The BQRRP framework with
* partial-pivoted-LU pivot selection makes oversampling unnecessary
* (see Section 2.1 of arXiv:2507.00976).
*
* This class supports the \link InplaceDecomposition inplace decomposition
* \endlink mechanism. The same public API as ColPivHouseholderQR is
* provided.
*
* \sa ColPivHouseholderQR, MatrixBase::randColPivHouseholderQr()
*/
template <typename MatrixType_, typename PermutationIndex_>
class RandColPivHouseholderQR : public SolverBase<RandColPivHouseholderQR<MatrixType_, PermutationIndex_>>,
public RankRevealingBase<RandColPivHouseholderQR<MatrixType_, PermutationIndex_>> {
public:
typedef MatrixType_ MatrixType;
typedef SolverBase<RandColPivHouseholderQR> Base;
typedef RankRevealingBase<RandColPivHouseholderQR> RankRevealingBase_;
friend class SolverBase<RandColPivHouseholderQR>;
friend class RankRevealingBase<RandColPivHouseholderQR>;
using RankRevealingBase_::dimensionOfKernel;
using RankRevealingBase_::isInjective;
using RankRevealingBase_::isInvertible;
using RankRevealingBase_::isSurjective;
using RankRevealingBase_::maxPivot;
using RankRevealingBase_::nonzeroPivots;
using RankRevealingBase_::rank;
using RankRevealingBase_::setThreshold;
using RankRevealingBase_::threshold;
typedef PermutationIndex_ PermutationIndex;
EIGEN_GENERIC_PUBLIC_INTERFACE(RandColPivHouseholderQR)
enum {
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
};
typedef typename internal::plain_diag_type<MatrixType>::type HCoeffsType;
typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime, PermutationIndex> PermutationType;
typedef typename internal::plain_row_type<MatrixType, PermutationIndex>::type IntRowVectorType;
typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
typedef HouseholderSequence<MatrixType, internal::remove_all_t<typename HCoeffsType::ConjugateReturnType>>
HouseholderSequenceType;
typedef typename MatrixType::PlainObject PlainObject;
private:
// Default `m_blockSize == 0` means: let computeInPlace pick a size
// tuned to the input dimensions. The BQRRP paper recommends b ~= n/32
// for large square inputs and "as large as feasible" for small ones.
static constexpr Index kAutoBlockSize = 0;
// Heuristic block size for `m_blockSize == kAutoBlockSize`. Returns a
// value in roughly [48, 1024] that scales with `size`.
static Index computeAutoBlockSize(Index size) {
if (size < Index(192)) return Index(48);
return numext::mini(numext::maxi(Index(48), size / Index(32)), Index(1024));
}
void init(Index rows, Index cols) {
Index diag = numext::mini(rows, cols);
m_hCoeffs.resize(diag);
m_colsPermutation.resize(cols);
m_temp.resize(cols);
m_isInitialized = false;
}
public:
/** \brief Default constructor. */
RandColPivHouseholderQR() = default;
/** \brief Constructor with memory preallocation. */
RandColPivHouseholderQR(Index rows, Index cols) : m_qr(rows, cols) { init(rows, cols); }
/** \brief Constructs and computes a QR factorization from \a matrix. */
template <typename InputType>
explicit RandColPivHouseholderQR(const EigenBase<InputType>& matrix) : m_qr(matrix.rows(), matrix.cols()) {
init(matrix.rows(), matrix.cols());
compute(matrix.derived());
}
/** \brief Inplace constructor: takes a Ref and decomposes in place. */
template <typename InputType>
explicit RandColPivHouseholderQR(EigenBase<InputType>& matrix) : m_qr(matrix.derived()) {
init(matrix.rows(), matrix.cols());
computeInPlace();
}
#ifdef EIGEN_PARSED_BY_DOXYGEN
template <typename Rhs>
inline Solve<RandColPivHouseholderQR, Rhs> solve(const MatrixBase<Rhs>& b) const;
#endif
HouseholderSequenceType householderQ() const;
HouseholderSequenceType matrixQ() const { return householderQ(); }
const MatrixType& matrixQR() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
return m_qr;
}
const MatrixType& matrixR() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
return m_qr;
}
template <typename InputType>
RandColPivHouseholderQR& compute(const EigenBase<InputType>& matrix);
const PermutationType& colsPermutation() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
return m_colsPermutation;
}
typename MatrixType::Scalar determinant() const;
typename MatrixType::RealScalar absDeterminant() const;
typename MatrixType::RealScalar logAbsDeterminant() const;
typename MatrixType::Scalar signDeterminant() const;
RealScalar pivotCoeff(Index i) const {
using std::abs;
return abs(m_qr.coeff(i, i));
}
inline Inverse<RandColPivHouseholderQR> inverse() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
return Inverse<RandColPivHouseholderQR>(*this);
}
inline Index rows() const { return m_qr.rows(); }
inline Index cols() const { return m_qr.cols(); }
const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
ComputationInfo info() const {
eigen_assert(m_isInitialized && "Decomposition is not initialized.");
return Success;
}
/** \brief Sets the panel block size \c b.
*
* Larger \c b increases the proportion of work performed in level-3
* BLAS but raises the per-iteration overhead. Pass \c b = 0 (the
* default) to let the algorithm pick a size that scales with the
* input dimensions; this matches the BQRRP paper's recommendation of
* roughly \c n/32 for large square inputs.
*
* For small matrices (where \c min(m,n) < 2b) this class transparently
* falls back to the unblocked column-pivoted scalar loop.
*/
RandColPivHouseholderQR& setBlockSize(Index b) {
eigen_assert(b >= 0 && "Block size must be non-negative.");
m_blockSize = b;
return *this;
}
/** \brief Sets the oversampling parameter \c p.
*
* \deprecated With the BQRRP-style LU-on-transposed-sketch pivot
* selection used by this class, oversampling provides no benefit (see
* Section 2.1 of arXiv:2507.00976: the first \c b pivots are
* independent of the oversampling factor). Retained for source
* compatibility; the value is ignored.
*/
RandColPivHouseholderQR& setOversampling(Index /*p*/) { return *this; }
/** \brief Fixes the seed of the internal RNG for reproducible factorization.
*
* If never called, each call to compute() draws a fresh seed from
* \c std::random_device.
*/
RandColPivHouseholderQR& setSeed(uint64_t seed) {
m_seed = seed;
m_seedSet = true;
return *this;
}
/** \brief Returns the user-set panel block size, or \c 0 if the
* algorithm should pick automatically. The actual block size used
* during compute() is not exposed.
*/
Index blockSize() const { return m_blockSize; }
/** \deprecated Returns 0; oversampling is no longer used. \sa setOversampling() */
Index oversampling() const { return Index(0); }
#ifndef EIGEN_PARSED_BY_DOXYGEN
template <typename RhsType, typename DstType>
void _solve_impl(const RhsType& rhs, DstType& dst) const;
template <bool Conjugate, typename RhsType, typename DstType>
void _solve_impl_transposed(const RhsType& rhs, DstType& dst) const;
#endif
protected:
EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
void computeInPlace();
// Unblocked column-pivoted Householder QR on rows [row0, m) and columns
// [col0, col0 + ncols), using full-column swaps. Updates m_hCoeffs in
// [col0, col0 + ncols), m_colsPermutation, and the maxpivot tracker.
// Returns the number of column transpositions applied. Used both as
// the small-matrix fallback path and as the rank-deficient tail of the
// blocked path.
Index unblocked_pivoted_qr(Index row0, Index col0, Index ncols, RealScalar threshold_helper);
// Scan the diagonal of m_qr and set m_nonzero_pivots / m_maxpivot using
// the LAWN-176 threshold (same scale as ColPivHouseholderQR).
void finalize_rank(RealScalar threshold_helper);
MatrixType m_qr;
HCoeffsType m_hCoeffs;
PermutationType m_colsPermutation;
RowVectorType m_temp;
Index m_blockSize = kAutoBlockSize;
uint64_t m_seed = 0;
bool m_seedSet = false;
bool m_isInitialized = false;
Index m_det_p = 1;
};
template <typename MatrixType, typename PermutationIndex>
typename MatrixType::Scalar RandColPivHouseholderQR<MatrixType, PermutationIndex>::determinant() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
Scalar detQ;
internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().prod() : Scalar(0);
}
template <typename MatrixType, typename PermutationIndex>
typename MatrixType::RealScalar RandColPivHouseholderQR<MatrixType, PermutationIndex>::absDeterminant() const {
using std::abs;
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
return isInjective() ? abs(m_qr.diagonal().prod()) : RealScalar(0);
}
template <typename MatrixType, typename PermutationIndex>
typename MatrixType::RealScalar RandColPivHouseholderQR<MatrixType, PermutationIndex>::logAbsDeterminant() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
return isInjective() ? m_qr.diagonal().cwiseAbs().array().log().sum() : -NumTraits<RealScalar>::infinity();
}
template <typename MatrixType, typename PermutationIndex>
typename MatrixType::Scalar RandColPivHouseholderQR<MatrixType, PermutationIndex>::signDeterminant() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
eigen_assert(m_qr.rows() == m_qr.cols() && "You can't take the determinant of a non-square matrix!");
Scalar detQ;
internal::householder_determinant<HCoeffsType, Scalar, NumTraits<Scalar>::IsComplex>::run(m_hCoeffs, detQ);
return isInjective() ? (detQ * Scalar(m_det_p)) * m_qr.diagonal().array().sign().prod() : Scalar(0);
}
template <typename MatrixType, typename PermutationIndex>
template <typename InputType>
RandColPivHouseholderQR<MatrixType, PermutationIndex>& RandColPivHouseholderQR<MatrixType, PermutationIndex>::compute(
const EigenBase<InputType>& matrix) {
m_qr = matrix.derived();
computeInPlace();
return *this;
}
template <typename MatrixType, typename PermutationIndex>
Index RandColPivHouseholderQR<MatrixType, PermutationIndex>::unblocked_pivoted_qr(Index row0, Index col0, Index ncols,
RealScalar threshold_helper) {
using std::abs;
const Index rows = m_qr.rows();
const Index col_end = col0 + ncols;
const Index sub_rows = rows - row0;
Index num_transpositions = 0;
Matrix<RealScalar, 1, Dynamic> norms_direct = m_qr.block(row0, col0, sub_rows, ncols).colwise().norm();
Matrix<RealScalar, 1, Dynamic> norms_updated = norms_direct;
const RealScalar downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());
const Index size = (std::min)(sub_rows, ncols);
for (Index k = 0; k < size; ++k) {
Index biggest;
RealScalar biggest_sq = numext::abs2(norms_updated.tail(ncols - k).maxCoeff(&biggest));
biggest += k;
if (this->m_nonzero_pivots == m_qr.diagonalSize() && biggest_sq < threshold_helper * RealScalar(rows - row0 - k))
this->m_nonzero_pivots = col0 + k;
if (k != biggest) {
m_qr.col(col0 + k).swap(m_qr.col(col0 + biggest));
std::swap(norms_updated.coeffRef(k), norms_updated.coeffRef(biggest));
std::swap(norms_direct.coeffRef(k), norms_direct.coeffRef(biggest));
m_colsPermutation.applyTranspositionOnTheRight(col0 + k, col0 + biggest);
++num_transpositions;
}
RealScalar beta;
m_qr.col(col0 + k).tail(sub_rows - k).makeHouseholderInPlace(m_hCoeffs.coeffRef(col0 + k), beta);
m_qr.coeffRef(row0 + k, col0 + k) = beta;
if (abs(beta) > this->m_maxpivot) this->m_maxpivot = abs(beta);
// Apply the reflector only to the remaining panel columns. Columns at
// indices >= col_end are owned by the outer caller and updated via a
// BLAS-3 trailing update; in the tail/fallback path col_end equals
// m_qr.cols(), so this naturally degrades to a full apply.
const Index trail_cols = col_end - col0 - k - 1;
if (trail_cols > 0) {
m_qr.block(row0 + k, col0 + k + 1, sub_rows - k, trail_cols)
.applyHouseholderOnTheLeft(m_qr.col(col0 + k).tail(sub_rows - k - 1), m_hCoeffs.coeff(col0 + k),
&m_temp.coeffRef(col0 + k + 1));
}
for (Index j = k + 1; j < ncols; ++j) {
internal::lawn176_norm_downdate(norms_updated.coeffRef(j), norms_direct.coeffRef(j),
m_qr.coeff(row0 + k, col0 + j), downdate_threshold,
[&] { return m_qr.col(col0 + j).tail(sub_rows - k - 1).norm(); });
}
}
return num_transpositions;
}
template <typename MatrixType, typename PermutationIndex>
void RandColPivHouseholderQR<MatrixType, PermutationIndex>::finalize_rank(RealScalar threshold_helper) {
using std::abs;
const Index rows = m_qr.rows();
const Index size = (std::min)(rows, m_qr.cols());
// m_maxpivot is already up-to-date: the blocked path tracks it per
// panel, and unblocked_pivoted_qr tracks it per step.
// If neither the blocked path nor the unblocked tail tightened the
// rank cap, fall back to the LAWN-176 first-below-threshold scan.
// This handles the full-rank-blocked-path case where every panel
// was full rank and the only "small" diagonal entries are in the
// unblocked tail.
if (this->m_nonzero_pivots == size) {
for (Index i = 0; i < size; ++i) {
RealScalar a = abs(m_qr.coeff(i, i));
if (numext::abs2(a) < threshold_helper * RealScalar(rows - i)) {
this->m_nonzero_pivots = i;
break;
}
}
}
}
template <typename MatrixType, typename PermutationIndex>
void RandColPivHouseholderQR<MatrixType, PermutationIndex>::computeInPlace() {
eigen_assert(m_qr.cols() <= NumTraits<PermutationIndex>::highest());
const Index rows = m_qr.rows();
const Index cols = m_qr.cols();
const Index size = (std::min)(rows, cols);
m_hCoeffs.resize(size);
m_temp.resize(cols);
m_colsPermutation.resize(cols);
m_colsPermutation.setIdentity();
this->m_nonzero_pivots = size;
this->m_maxpivot = RealScalar(0);
Index num_transpositions = 0;
// Global rank-detection scale, derived from the original column norms
// exactly as ColPivHouseholderQR does (LAWN-176). All panel calls share
// this threshold so rank revelation is consistent across blocks.
RealScalar max_initial_norm = RealScalar(0);
for (Index j = 0; j < cols; ++j) max_initial_norm = numext::maxi(max_initial_norm, m_qr.col(j).norm());
const RealScalar threshold_helper =
cols == 0 ? RealScalar(0)
: numext::abs2<RealScalar>(max_initial_norm * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);
const Index requested_b = (m_blockSize == kAutoBlockSize) ? computeAutoBlockSize(size) : m_blockSize;
const Index b = (std::min)(requested_b, size);
// For very small problems the sketch overhead dominates; use the
// unblocked pivoted loop for the whole matrix instead. We also need at
// least two full blocks to make blocking worthwhile, and `rows > b`
// so the trailing update has a non-empty bottom portion.
const bool can_block = (b > 0) && (size >= 2 * b) && (rows > b);
if (!can_block) {
num_transpositions += unblocked_pivoted_qr(0, 0, cols, threshold_helper);
m_det_p = (num_transpositions % 2) ? -1 : 1;
finalize_rank(threshold_helper);
m_isInitialized = true;
return;
}
typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> WorkMatrix;
typedef Matrix<Scalar, Dynamic, 1> WorkVector;
// Dynamic-sized Ref types so that a fixed-size MatrixType (e.g.
// Matrix<float, 8, 10>) and its run-time-sized blocks can both be
// wrapped without tripping Ref's compile-time-size check.
typedef Ref<WorkMatrix, 0, OuterStride<>> WorkMatrixRef;
typedef Ref<WorkVector> HCoeffsRef;
typedef Transpositions<Dynamic, Dynamic, PermutationIndex> IpivType;
// Hoisted workspaces — allocated once per compute() call. Worst-case
// sizes are computed from the first iteration (n_remain = cols,
// trail_cols = cols - b); subsequent iterations use shrinking
// sub-blocks via leftCols/topRows.
WorkMatrix Y(b, cols);
WorkMatrix YT(cols, b);
WorkMatrix sketch_R(b, b);
WorkMatrix tmp(b, cols - b);
IpivType ipiv(b);
WorkVector y_hcoeffs(b);
WorkVector y_temp(cols);
// Initialize the sketch Y = G * A. We don't keep G around; the
// Duersch-Gu update (step 24) maintains Y deterministically from
// here on.
{
WorkMatrix G(b, rows);
uint64_t seed = m_seed;
if (!m_seedSet) {
std::random_device rd;
seed = (uint64_t(rd()) << 32) | uint64_t(rd());
}
std::mt19937_64 engine(seed);
internal::fill_gaussian(G, engine);
Y.noalias() = G * m_qr;
}
Index k = 0;
bool blocked_terminated_early = false;
while (k + b <= size && cols - k > b) {
const Index n_remain = cols - k; // columns from k to end
const Index trail_cols = n_remain - b; // columns to the right of the panel
const Index sub_rows = rows - k; // rows from k to end
// === Step 7 (Algorithm 2): "crazy man's QRCP" on the sketch.
// Form Y_T = Y(:, k:)^T and run partial-pivoted LU on it. The IPIV
// vector tells us which b columns of Y(:, k:) (and thus which
// columns of m_qr.middleCols(k, n_remain)) to bring to the front.
auto Y_curr = Y.middleCols(k, n_remain);
auto Y_T = YT.topRows(n_remain);
Y_T.noalias() = Y_curr.transpose();
typename IpivType::StorageIndex nb_lu_transp = 0;
internal::partial_lu_inplace(Y_T, ipiv, nb_lu_transp);
// === Steps 9-11: apply IPIV to columns of m_qr and Y starting at
// absolute column k (LAPACK-style serial swaps; equivalent to
// applying Algorithm 4 of the BQRRP paper to the converted pivot
// vector).
for (Index i = 0; i < b; ++i) {
Index dst = static_cast<Index>(ipiv.coeff(i));
if (dst != i) {
m_qr.col(k + i).swap(m_qr.col(k + dst));
Y.col(k + i).swap(Y.col(k + dst));
m_colsPermutation.applyTranspositionOnTheRight(k + i, k + dst);
++num_transpositions;
}
}
// Materialize R_sk in the upper triangle of Y(:, k:) via unpivoted
// blocked Householder QR. We discard the resulting Householder
// vectors and tau; only R_sk feeds the step-24 sketch update.
// Wrap in `Ref` so householder_qr_inplace_blocked sees a flat
// MatrixQR — its internal `Block<MatrixQR, ...>` typedef does not
// compose with Block-of-Block expressions.
{
WorkMatrixRef Y_curr_ref(Y_curr);
Ref<WorkVector> y_hc_ref(y_hcoeffs);
internal::householder_qr_inplace_blocked<WorkMatrixRef, Ref<WorkVector>>::run(
Y_curr_ref, y_hc_ref, /*maxBlockSize=*/(std::min)(b, Index(48)), y_temp.data());
}
// Save R_sk_11 = upper-triangular portion of Y(:, k:k+b) before the
// panel QR overwrites the corresponding columns of m_qr (via the
// trailing update); we need it for step 24.
sketch_R = Y.block(0, k, b, b);
// === Step 12: tall unpivoted Householder QR on the panel.
auto panel = m_qr.block(k, k, sub_rows, b);
auto hCoeffsSegment = m_hCoeffs.segment(k, b);
{
WorkMatrixRef panel_ref(panel);
HCoeffsRef hc_ref(hCoeffsSegment);
internal::householder_qr_inplace_blocked<WorkMatrixRef, HCoeffsRef>::run(
panel_ref, hc_ref, /*maxBlockSize=*/(std::min)(b, Index(48)), m_temp.data());
}
this->m_maxpivot = (std::max)(this->m_maxpivot, m_qr.diagonal().segment(k, b).cwiseAbs().maxCoeff());
// Detect rank deficiency by scanning the panel's diagonal against
// a relative threshold (max pivot seen so far times the default
// RankRevealingBase tolerance). The LAWN-176 threshold used by
// ColPivHouseholderQR assumes a strictly monotonic diagonal —
// BQRRP's unpivoted panel QR breaks that assumption for matrices
// with closely-spaced singular values (e.g. partial isometries),
// so we use the same relative tolerance that RankRevealingBase::
// rank() applies to the final diagonal.
Index panel_rank = b;
{
using std::abs;
const RealScalar relative_cutoff = this->m_maxpivot * NumTraits<RealScalar>::epsilon() * RealScalar(4 * size);
for (Index i = 0; i < b; ++i) {
RealScalar a = abs(m_qr.coeff(k + i, k + i));
if (a <= relative_cutoff) {
panel_rank = i;
break;
}
}
}
// === Step 17: apply Q^H from the panel to the trailing block.
// The loop guard `cols - k > b` guarantees `trail_cols > 0`. We
// pass the un-conjugated coeffs because apply_block_householder_on_the_left
// conjugates internally when forward=false (matching how HouseholderQR
// drives this same routine).
auto trailing = m_qr.block(k, k + b, sub_rows, trail_cols);
internal::apply_block_householder_on_the_left(trailing, panel, hCoeffsSegment,
/*forward=*/false);
if (panel_rank < b) {
// Pin the rank cap and break out; the unblocked tail will
// process the remaining columns. The tail's m_nonzero_pivots
// write is gated on it still being unset (== diagonalSize),
// so a tighter cap from here survives.
if (this->m_nonzero_pivots == size) {
this->m_nonzero_pivots = k + panel_rank;
}
k += b;
blocked_terminated_early = true;
break;
}
// === Step 24: Duersch-Gu sketch update.
// Y(:, k+b:) := R_sk_12 - R_sk_11 * R_11^{-1} * R_12
// R_sk_12 currently sits in Y.middleCols(k+b, trail_cols) (the QR of
// the sketch above placed it there). R_sk_11 we saved into sketch_R.
// R_11 and R_12 are the corresponding blocks of m_qr after the
// panel QR + trailing update.
{
auto tmp_block = tmp.leftCols(trail_cols);
tmp_block = m_qr.block(k, k + b, b, trail_cols);
m_qr.block(k, k, b, b).template triangularView<Upper>().solveInPlace(tmp_block);
Y.middleCols(k + b, trail_cols).noalias() -= sketch_R.template triangularView<Upper>() * tmp_block;
}
k += b;
}
// Tail loop: any remaining columns (last partial block, or the rank-
// deficient remainder after early termination) handled by the
// unblocked pivoted QR. This routine also updates m_nonzero_pivots
// along its diagonal scan, which finalize_rank below cross-checks.
if (k < cols && (k < size || blocked_terminated_early)) {
num_transpositions += unblocked_pivoted_qr(k, k, cols - k, threshold_helper);
}
m_det_p = (num_transpositions % 2) ? -1 : 1;
// Final rank determination: scan the entire diagonal with the LAWN-176
// threshold. This refines whatever the unblocked tail set, and covers
// the full-rank blocked path where m_nonzero_pivots was never touched.
finalize_rank(threshold_helper);
m_isInitialized = true;
}
#ifndef EIGEN_PARSED_BY_DOXYGEN
template <typename MatrixType_, typename PermutationIndex_>
template <typename RhsType, typename DstType>
void RandColPivHouseholderQR<MatrixType_, PermutationIndex_>::_solve_impl(const RhsType& rhs, DstType& dst) const {
const Index nonzero_pivots = nonzeroPivots();
if (nonzero_pivots == 0) {
dst.setZero();
return;
}
typename RhsType::PlainObject c(rhs);
c.applyOnTheLeft(householderQ().setLength(nonzero_pivots).adjoint());
m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots)
.template triangularView<Upper>()
.solveInPlace(c.topRows(nonzero_pivots));
for (Index i = 0; i < nonzero_pivots; ++i) dst.row(m_colsPermutation.indices().coeff(i)) = c.row(i);
for (Index i = nonzero_pivots; i < cols(); ++i) dst.row(m_colsPermutation.indices().coeff(i)).setZero();
}
template <typename MatrixType_, typename PermutationIndex_>
template <bool Conjugate, typename RhsType, typename DstType>
void RandColPivHouseholderQR<MatrixType_, PermutationIndex_>::_solve_impl_transposed(const RhsType& rhs,
DstType& dst) const {
const Index nonzero_pivots = nonzeroPivots();
if (nonzero_pivots == 0) {
dst.setZero();
return;
}
typename RhsType::PlainObject c(m_colsPermutation.transpose() * rhs);
m_qr.topLeftCorner(nonzero_pivots, nonzero_pivots)
.template triangularView<Upper>()
.transpose()
.template conjugateIf<Conjugate>()
.solveInPlace(c.topRows(nonzero_pivots));
dst.topRows(nonzero_pivots) = c.topRows(nonzero_pivots);
dst.bottomRows(rows() - nonzero_pivots).setZero();
dst.applyOnTheLeft(householderQ().setLength(nonzero_pivots).template conjugateIf<!Conjugate>());
}
#endif
namespace internal {
template <typename DstXprType, typename MatrixType, typename PermutationIndex>
struct Assignment<DstXprType, Inverse<RandColPivHouseholderQR<MatrixType, PermutationIndex>>,
internal::assign_op<typename DstXprType::Scalar,
typename RandColPivHouseholderQR<MatrixType, PermutationIndex>::Scalar>,
Dense2Dense> {
typedef RandColPivHouseholderQR<MatrixType, PermutationIndex> QrType;
typedef Inverse<QrType> SrcXprType;
static void run(DstXprType& dst, const SrcXprType& src,
const internal::assign_op<typename DstXprType::Scalar, typename QrType::Scalar>&) {
dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
}
};
} // end namespace internal
template <typename MatrixType, typename PermutationIndex>
typename RandColPivHouseholderQR<MatrixType, PermutationIndex>::HouseholderSequenceType
RandColPivHouseholderQR<MatrixType, PermutationIndex>::householderQ() const {
eigen_assert(m_isInitialized && "RandColPivHouseholderQR is not initialized.");
return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
}
/** \return the randomized column-pivoted Householder QR decomposition of \c *this.
*
* \sa class RandColPivHouseholderQR
*/
template <typename Derived>
template <typename PermutationIndexType>
RandColPivHouseholderQR<typename MatrixBase<Derived>::PlainObject, PermutationIndexType>
MatrixBase<Derived>::randColPivHouseholderQr() const {
return RandColPivHouseholderQR<PlainObject, PermutationIndexType>(eval());
}
} // end namespace Eigen
#endif // EIGEN_RANDCOLPIVOTINGHOUSEHOLDERQR_H