| // This file is part of Eigen, a lightweight C++ template library |
| // for linear algebra. |
| // |
| // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr> |
| // |
| // This Source Code Form is subject to the terms of the Mozilla |
| // Public License v. 2.0. If a copy of the MPL was not distributed |
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H |
| #define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H |
| |
| // IWYU pragma: private |
| #include "./InternalHeaderCheck.h" |
| |
| namespace Eigen { |
| |
| namespace internal { |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, |
| bool sortedInsertion = false) { |
| typedef typename remove_all_t<Lhs>::Scalar LhsScalar; |
| typedef typename remove_all_t<Rhs>::Scalar RhsScalar; |
| typedef typename remove_all_t<ResultType>::Scalar ResScalar; |
| |
| // make sure to call innerSize/outerSize since we fake the storage order. |
| Index rows = lhs.innerSize(); |
| Index cols = rhs.outerSize(); |
| eigen_assert(lhs.outerSize() == rhs.innerSize()); |
| |
| ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); |
| ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0); |
| ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); |
| |
| std::memset(mask, 0, sizeof(bool) * rows); |
| |
| evaluator<Lhs> lhsEval(lhs); |
| evaluator<Rhs> rhsEval(rhs); |
| |
| // estimate the number of non zero entries |
| // given a rhs column containing Y non zeros, we assume that the respective Y columns |
| // of the lhs differs in average of one non zeros, thus the number of non zeros for |
| // the product of a rhs column with the lhs is X+Y where X is the average number of non zero |
| // per column of the lhs. |
| // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) |
| Index estimated_nnz_prod = lhsEval.nonZerosEstimate() + rhsEval.nonZerosEstimate(); |
| |
| res.setZero(); |
| res.reserve(Index(estimated_nnz_prod)); |
| // we compute each column of the result, one after the other |
| for (Index j = 0; j < cols; ++j) { |
| res.startVec(j); |
| Index nnz = 0; |
| for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { |
| RhsScalar y = rhsIt.value(); |
| Index k = rhsIt.index(); |
| for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { |
| Index i = lhsIt.index(); |
| LhsScalar x = lhsIt.value(); |
| if (!mask[i]) { |
| mask[i] = true; |
| values[i] = x * y; |
| indices[nnz] = i; |
| ++nnz; |
| } else |
| values[i] += x * y; |
| } |
| } |
| if (!sortedInsertion) { |
| // unordered insertion |
| for (Index k = 0; k < nnz; ++k) { |
| Index i = indices[k]; |
| res.insertBackByOuterInnerUnordered(j, i) = values[i]; |
| mask[i] = false; |
| } |
| } else { |
| // alternative ordered insertion code: |
| const Index t200 = rows / 11; // 11 == (log2(200)*1.39) |
| const Index t = (rows * 100) / 139; |
| |
| // FIXME reserve nnz non zeros |
| // FIXME implement faster sorting algorithms for very small nnz |
| // if the result is sparse enough => use a quick sort |
| // otherwise => loop through the entire vector |
| // In order to avoid to perform an expensive log2 when the |
| // result is clearly very sparse we use a linear bound up to 200. |
| if ((nnz < 200 && nnz < t200) || nnz * numext::log2(int(nnz)) < t) { |
| if (nnz > 1) std::sort(indices, indices + nnz); |
| for (Index k = 0; k < nnz; ++k) { |
| Index i = indices[k]; |
| res.insertBackByOuterInner(j, i) = values[i]; |
| mask[i] = false; |
| } |
| } else { |
| // dense path |
| for (Index i = 0; i < rows; ++i) { |
| if (mask[i]) { |
| mask[i] = false; |
| res.insertBackByOuterInner(j, i) = values[i]; |
| } |
| } |
| } |
| } |
| } |
| res.finalize(); |
| } |
| |
| } // end namespace internal |
| |
| namespace internal { |
| |
| // Helper template to generate new sparse matrix types |
| template <class Source, int Order> |
| using WithStorageOrder = SparseMatrix<typename Source::Scalar, Order, typename Source::StorageIndex>; |
| |
| template <typename Lhs, typename Rhs, typename ResultType, |
| int LhsStorageOrder = (traits<Lhs>::Flags & RowMajorBit) ? RowMajor : ColMajor, |
| int RhsStorageOrder = (traits<Rhs>::Flags & RowMajorBit) ? RowMajor : ColMajor, |
| int ResStorageOrder = (traits<ResultType>::Flags & RowMajorBit) ? RowMajor : ColMajor> |
| struct conservative_sparse_sparse_product_selector; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, ColMajor, ColMajor, ColMajor> { |
| typedef remove_all_t<Lhs> LhsCleaned; |
| typedef typename LhsCleaned::Scalar Scalar; |
| |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using RowMajorMatrix = WithStorageOrder<ResultType, RowMajor>; |
| using ColMajorMatrixAux = WithStorageOrder<ResultType, ColMajor>; |
| |
| // If the result is tall and thin (in the extreme case a column vector) |
| // then it is faster to sort the coefficients inplace instead of transposing twice. |
| // FIXME, the following heuristic is probably not very good. |
| if (lhs.rows() > rhs.cols()) { |
| using ColMajorMatrix = typename sparse_eval<ColMajorMatrixAux, ResultType::RowsAtCompileTime, |
| ResultType::ColsAtCompileTime, ColMajorMatrixAux::Flags>::type; |
| ColMajorMatrix resCol(lhs.rows(), rhs.cols()); |
| // perform sorted insertion |
| internal::conservative_sparse_sparse_product_impl<Lhs, Rhs, ColMajorMatrix>(lhs, rhs, resCol, true); |
| res = resCol.markAsRValue(); |
| } else { |
| ColMajorMatrixAux resCol(lhs.rows(), rhs.cols()); |
| // resort to transpose to sort the entries |
| internal::conservative_sparse_sparse_product_impl<Lhs, Rhs, ColMajorMatrixAux>(lhs, rhs, resCol, false); |
| RowMajorMatrix resRow(resCol); |
| res = resRow.markAsRValue(); |
| } |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, RowMajor, ColMajor, ColMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using RowMajorRhs = WithStorageOrder<Rhs, RowMajor>; |
| using RowMajorRes = WithStorageOrder<ResultType, RowMajor>; |
| RowMajorRhs rhsRow = rhs; |
| RowMajorRes resRow(lhs.rows(), rhs.cols()); |
| internal::conservative_sparse_sparse_product_impl<RowMajorRhs, Lhs, RowMajorRes>(rhsRow, lhs, resRow); |
| res = resRow; |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, ColMajor, RowMajor, ColMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using RowMajorLhs = WithStorageOrder<Lhs, RowMajor>; |
| using RowMajorRes = WithStorageOrder<ResultType, RowMajor>; |
| RowMajorLhs lhsRow = lhs; |
| RowMajorRes resRow(lhs.rows(), rhs.cols()); |
| internal::conservative_sparse_sparse_product_impl<Rhs, RowMajorLhs, RowMajorRes>(rhs, lhsRow, resRow); |
| res = resRow; |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, RowMajor, RowMajor, ColMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using RowMajorRes = WithStorageOrder<ResultType, RowMajor>; |
| RowMajorRes resRow(lhs.rows(), rhs.cols()); |
| internal::conservative_sparse_sparse_product_impl<Rhs, Lhs, RowMajorRes>(rhs, lhs, resRow); |
| res = resRow; |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, ColMajor, ColMajor, RowMajor> { |
| typedef typename traits<remove_all_t<Lhs>>::Scalar Scalar; |
| |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using ColMajorRes = WithStorageOrder<ResultType, ColMajor>; |
| ColMajorRes resCol(lhs.rows(), rhs.cols()); |
| internal::conservative_sparse_sparse_product_impl<Lhs, Rhs, ColMajorRes>(lhs, rhs, resCol); |
| res = resCol; |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, RowMajor, ColMajor, RowMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using ColMajorLhs = WithStorageOrder<Lhs, ColMajor>; |
| using ColMajorRes = WithStorageOrder<ResultType, ColMajor>; |
| ColMajorLhs lhsCol = lhs; |
| ColMajorRes resCol(lhs.rows(), rhs.cols()); |
| internal::conservative_sparse_sparse_product_impl<ColMajorLhs, Rhs, ColMajorRes>(lhsCol, rhs, resCol); |
| res = resCol; |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, ColMajor, RowMajor, RowMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using ColMajorRhs = WithStorageOrder<Rhs, ColMajor>; |
| using ColMajorRes = WithStorageOrder<ResultType, ColMajor>; |
| ColMajorRhs rhsCol = rhs; |
| ColMajorRes resCol(lhs.rows(), rhs.cols()); |
| internal::conservative_sparse_sparse_product_impl<Lhs, ColMajorRhs, ColMajorRes>(lhs, rhsCol, resCol); |
| res = resCol; |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct conservative_sparse_sparse_product_selector<Lhs, Rhs, ResultType, RowMajor, RowMajor, RowMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using ColMajorRes = WithStorageOrder<ResultType, ColMajor>; |
| using RowMajorRes = WithStorageOrder<ResultType, RowMajor>; |
| RowMajorRes resRow(lhs.rows(), rhs.cols()); |
| internal::conservative_sparse_sparse_product_impl<Rhs, Lhs, RowMajorRes>(rhs, lhs, resRow); |
| // sort the non zeros: |
| ColMajorRes resCol(resRow); |
| res = resCol; |
| } |
| }; |
| |
| } // end namespace internal |
| |
| namespace internal { |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| typedef typename remove_all_t<Lhs>::Scalar LhsScalar; |
| typedef typename remove_all_t<Rhs>::Scalar RhsScalar; |
| Index cols = rhs.outerSize(); |
| eigen_assert(lhs.outerSize() == rhs.innerSize()); |
| |
| evaluator<Lhs> lhsEval(lhs); |
| evaluator<Rhs> rhsEval(rhs); |
| |
| for (Index j = 0; j < cols; ++j) { |
| for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { |
| RhsScalar y = rhsIt.value(); |
| Index k = rhsIt.index(); |
| for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { |
| Index i = lhsIt.index(); |
| LhsScalar x = lhsIt.value(); |
| res.coeffRef(i, j) += x * y; |
| } |
| } |
| } |
| } |
| |
| } // end namespace internal |
| |
| namespace internal { |
| |
| template <typename Lhs, typename Rhs, typename ResultType, |
| int LhsStorageOrder = (traits<Lhs>::Flags & RowMajorBit) ? RowMajor : ColMajor, |
| int RhsStorageOrder = (traits<Rhs>::Flags & RowMajorBit) ? RowMajor : ColMajor> |
| struct sparse_sparse_to_dense_product_selector; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct sparse_sparse_to_dense_product_selector<Lhs, Rhs, ResultType, ColMajor, ColMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| internal::sparse_sparse_to_dense_product_impl<Lhs, Rhs, ResultType>(lhs, rhs, res); |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct sparse_sparse_to_dense_product_selector<Lhs, Rhs, ResultType, RowMajor, ColMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using ColMajorLhs = WithStorageOrder<Lhs, ColMajor>; |
| ColMajorLhs lhsCol(lhs); |
| internal::sparse_sparse_to_dense_product_impl<ColMajorLhs, Rhs, ResultType>(lhsCol, rhs, res); |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct sparse_sparse_to_dense_product_selector<Lhs, Rhs, ResultType, ColMajor, RowMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| using ColMajorRhs = WithStorageOrder<Rhs, ColMajor>; |
| ColMajorRhs rhsCol(rhs); |
| internal::sparse_sparse_to_dense_product_impl<Lhs, ColMajorRhs, ResultType>(lhs, rhsCol, res); |
| } |
| }; |
| |
| template <typename Lhs, typename Rhs, typename ResultType> |
| struct sparse_sparse_to_dense_product_selector<Lhs, Rhs, ResultType, RowMajor, RowMajor> { |
| static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { |
| Transpose<ResultType> trRes(res); |
| internal::sparse_sparse_to_dense_product_impl<Rhs, Lhs, Transpose<ResultType>>(rhs, lhs, trRes); |
| } |
| }; |
| |
| } // end namespace internal |
| |
| } // end namespace Eigen |
| |
| #endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H |