blob: b7ae44fda940c96248036ed2b09fcbfcee1047d0 [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_GENERAL_PRODUCT_H
#define EIGEN_GENERAL_PRODUCT_H
// IWYU pragma: private
#include "./InternalHeaderCheck.h"
namespace Eigen {
enum { Large = 2, Small = 3 };
// Define the threshold value to fallback from the generic matrix-matrix product
// implementation (heavy) to the lightweight coeff-based product one.
// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
// in products/GeneralMatrixMatrix.h for more details.
// TODO This threshold should also be used in the compile-time selector below.
#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD
// This default value has been obtained on a Haswell architecture.
#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20
#endif
namespace internal {
template <int Rows, int Cols, int Depth>
struct product_type_selector;
template <int Size, int MaxSize>
struct product_size_category {
enum {
#ifndef EIGEN_GPU_COMPILE_PHASE
is_large = MaxSize == Dynamic || Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
(Size == Dynamic && MaxSize >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
#else
is_large = 0,
#endif
value = is_large ? Large
: Size == 1 ? 1
: Small
};
};
template <typename Lhs, typename Rhs>
struct product_type {
typedef remove_all_t<Lhs> Lhs_;
typedef remove_all_t<Rhs> Rhs_;
enum {
MaxRows = traits<Lhs_>::MaxRowsAtCompileTime,
Rows = traits<Lhs_>::RowsAtCompileTime,
MaxCols = traits<Rhs_>::MaxColsAtCompileTime,
Cols = traits<Rhs_>::ColsAtCompileTime,
MaxDepth = min_size_prefer_fixed(traits<Lhs_>::MaxColsAtCompileTime, traits<Rhs_>::MaxRowsAtCompileTime),
Depth = min_size_prefer_fixed(traits<Lhs_>::ColsAtCompileTime, traits<Rhs_>::RowsAtCompileTime)
};
// the splitting into different lines of code here, introducing the _select enums and the typedef below,
// is to work around an internal compiler error with gcc 4.1 and 4.2.
private:
enum {
rows_select = product_size_category<Rows, MaxRows>::value,
cols_select = product_size_category<Cols, MaxCols>::value,
depth_select = product_size_category<Depth, MaxDepth>::value
};
typedef product_type_selector<rows_select, cols_select, depth_select> selector;
public:
enum { value = selector::ret, ret = selector::ret };
#ifdef EIGEN_DEBUG_PRODUCT
static void debug() {
EIGEN_DEBUG_VAR(Rows);
EIGEN_DEBUG_VAR(Cols);
EIGEN_DEBUG_VAR(Depth);
EIGEN_DEBUG_VAR(rows_select);
EIGEN_DEBUG_VAR(cols_select);
EIGEN_DEBUG_VAR(depth_select);
EIGEN_DEBUG_VAR(value);
}
#endif
};
/* The following allows to select the kind of product at compile time
* based on the three dimensions of the product.
* This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
// FIXME I'm not sure the current mapping is the ideal one.
template <int M, int N>
struct product_type_selector<M, N, 1> {
enum { ret = OuterProduct };
};
template <int M>
struct product_type_selector<M, 1, 1> {
enum { ret = LazyCoeffBasedProductMode };
};
template <int N>
struct product_type_selector<1, N, 1> {
enum { ret = LazyCoeffBasedProductMode };
};
template <int Depth>
struct product_type_selector<1, 1, Depth> {
enum { ret = InnerProduct };
};
template <>
struct product_type_selector<1, 1, 1> {
enum { ret = InnerProduct };
};
template <>
struct product_type_selector<Small, 1, Small> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<1, Small, Small> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<Small, Small, Small> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<Small, Small, 1> {
enum { ret = LazyCoeffBasedProductMode };
};
template <>
struct product_type_selector<Small, Large, 1> {
enum { ret = LazyCoeffBasedProductMode };
};
template <>
struct product_type_selector<Large, Small, 1> {
enum { ret = LazyCoeffBasedProductMode };
};
template <>
struct product_type_selector<1, Large, Small> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<1, Large, Large> {
enum { ret = GemvProduct };
};
template <>
struct product_type_selector<1, Small, Large> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<Large, 1, Small> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<Large, 1, Large> {
enum { ret = GemvProduct };
};
template <>
struct product_type_selector<Small, 1, Large> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<Small, Small, Large> {
enum { ret = GemmProduct };
};
template <>
struct product_type_selector<Large, Small, Large> {
enum { ret = GemmProduct };
};
template <>
struct product_type_selector<Small, Large, Large> {
enum { ret = GemmProduct };
};
template <>
struct product_type_selector<Large, Large, Large> {
enum { ret = GemmProduct };
};
template <>
struct product_type_selector<Large, Small, Small> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<Small, Large, Small> {
enum { ret = CoeffBasedProductMode };
};
template <>
struct product_type_selector<Large, Large, Small> {
enum { ret = GemmProduct };
};
} // end namespace internal
/***********************************************************************
* Implementation of Inner Vector Vector Product
***********************************************************************/
// FIXME : maybe the "inner product" could return a Scalar
// instead of a 1x1 matrix ??
// Pro: more natural for the user
// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
// product ends up to a row-vector times col-vector product... To tackle this use
// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
/***********************************************************************
* Implementation of Outer Vector Vector Product
***********************************************************************/
/***********************************************************************
* Implementation of General Matrix Vector Product
***********************************************************************/
/* According to the shape/flags of the matrix we have to distinghish 3 different cases:
* 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
* 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
* 3 - all other cases are handled using a simple loop along the outer-storage direction.
* Therefore we need a lower level meta selector.
* Furthermore, if the matrix is the rhs, then the product has to be transposed.
*/
namespace internal {
template <int Side, int StorageOrder, bool BlasCompatible>
struct gemv_dense_selector;
} // end namespace internal
namespace internal {
template <typename Scalar, int Size, int MaxSize, bool Cond>
struct gemv_static_vector_if;
template <typename Scalar, int Size, int MaxSize>
struct gemv_static_vector_if<Scalar, Size, MaxSize, false> {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() {
eigen_internal_assert(false && "should never be called");
return 0;
}
};
template <typename Scalar, int Size>
struct gemv_static_vector_if<Scalar, Size, Dynamic, true> {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; }
};
template <typename Scalar, int Size, int MaxSize>
struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
#if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
// Some architectures cannot align on the stack,
// => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
EIGEN_MAX_ALIGN_BYTES);
}
#endif
};
// The vector is on the left => transposition
template <int StorageOrder, bool BlasCompatible>
struct gemv_dense_selector<OnTheLeft, StorageOrder, BlasCompatible> {
template <typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
Transpose<Dest> destT(dest);
enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
gemv_dense_selector<OnTheRight, OtherStorageOrder, BlasCompatible>::run(rhs.transpose(), lhs.transpose(), destT,
alpha);
}
};
template <>
struct gemv_dense_selector<OnTheRight, ColMajor, true> {
template <typename Lhs, typename Rhs, typename Dest>
static inline void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef Map<Matrix<ResScalar, Dynamic, 1>, plain_enum_min(AlignedMax, internal::packet_traits<ResScalar>::size)>
MappedDest;
ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
// make sure Dest is a compile-time vector type (bug 1166)
typedef std::conditional_t<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr> ActualDest;
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime == 1),
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = ((!EvalToDestAtCompileTime) || ComplexByReal) && (ActualDest::MaxSizeAtCompileTime != 0)
};
typedef const_blas_data_mapper<LhsScalar, Index, ColMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar, Index, RowMajor> RhsMapper;
RhsScalar compatibleAlpha = get_factor<ResScalar, RhsScalar>::run(actualAlpha);
if (!MightCannotUseDest) {
// shortcut if we are sure to be able to use dest directly,
// this ease the compiler to generate cleaner and more optimzized code for most common cases
general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, LhsBlasTraits::NeedToConjugate, RhsScalar,
RhsMapper, RhsBlasTraits::NeedToConjugate>::run(actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(),
actualLhs.outerStride()),
RhsMapper(actualRhs.data(),
actualRhs.innerStride()),
dest.data(), 1, compatibleAlpha);
} else {
gemv_static_vector_if<ResScalar, ActualDest::SizeAtCompileTime, ActualDest::MaxSizeAtCompileTime,
MightCannotUseDest>
static_dest;
const bool alphaIsCompatible = (!ComplexByReal) || (numext::is_exactly_zero(numext::imag(actualAlpha)));
const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
ei_declare_aligned_stack_constructed_variable(ResScalar, actualDestPtr, dest.size(),
evalToDest ? dest.data() : static_dest.data());
if (!evalToDest) {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if (!alphaIsCompatible) {
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
} else
MappedDest(actualDestPtr, dest.size()) = dest;
}
general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, LhsBlasTraits::NeedToConjugate, RhsScalar,
RhsMapper, RhsBlasTraits::NeedToConjugate>::run(actualLhs.rows(), actualLhs.cols(),
LhsMapper(actualLhs.data(),
actualLhs.outerStride()),
RhsMapper(actualRhs.data(),
actualRhs.innerStride()),
actualDestPtr, 1, compatibleAlpha);
if (!evalToDest) {
if (!alphaIsCompatible)
dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
}
}
};
template <>
struct gemv_dense_selector<OnTheRight, RowMajor, true> {
template <typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
std::add_const_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
std::add_const_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
DirectlyUseRhs =
ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime == 0
};
gemv_static_vector_if<RhsScalar, ActualRhsTypeCleaned::SizeAtCompileTime,
ActualRhsTypeCleaned::MaxSizeAtCompileTime, !DirectlyUseRhs>
static_rhs;
ei_declare_aligned_stack_constructed_variable(
RhsScalar, actualRhsPtr, actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if (!DirectlyUseRhs) {
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
Index size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
typedef const_blas_data_mapper<LhsScalar, Index, RowMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar, Index, ColMajor> RhsMapper;
general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, LhsBlasTraits::NeedToConjugate, RhsScalar,
RhsMapper, RhsBlasTraits::NeedToConjugate>::
run(actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()),
RhsMapper(actualRhsPtr, 1), dest.data(),
dest.col(0).innerStride(), // NOTE if dest is not a vector at compile-time, then dest.innerStride() might
// be wrong. (bug 1166)
actualAlpha);
}
};
template <>
struct gemv_dense_selector<OnTheRight, ColMajor, false> {
template <typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
EIGEN_STATIC_ASSERT((!nested_eval<Lhs, 1>::Evaluate),
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
// TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory,
// otherwise use a temp
typename nested_eval<Rhs, 1>::type actual_rhs(rhs);
const Index size = rhs.rows();
for (Index k = 0; k < size; ++k) dest += (alpha * actual_rhs.coeff(k)) * lhs.col(k);
}
};
template <>
struct gemv_dense_selector<OnTheRight, RowMajor, false> {
template <typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
EIGEN_STATIC_ASSERT((!nested_eval<Lhs, 1>::Evaluate),
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
typename nested_eval<Rhs, Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
const Index rows = dest.rows();
for (Index i = 0; i < rows; ++i)
dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
}
};
} // end namespace internal
/***************************************************************************
* Implementation of matrix base methods
***************************************************************************/
/** \returns the matrix product of \c *this and \a other.
*
* \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
template <typename Derived>
template <typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product<Derived, OtherDerived> MatrixBase<Derived>::operator*(
const MatrixBase<OtherDerived>& other) const {
// A note regarding the function declaration: In MSVC, this function will sometimes
// not be inlined since DenseStorage is an unwindable object for dynamic
// matrices and product types are holding a member to store the result.
// Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
enum {
ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic ||
int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(
ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
#ifdef EIGEN_DEBUG_PRODUCT
internal::product_type<Derived, OtherDerived>::debug();
#endif
return Product<Derived, OtherDerived>(derived(), other.derived());
}
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
* computed once at a time as requested. This might be useful in some extremely rare cases when only
* a small and no coherent fraction of the result's coefficients have to be computed.
*
* \warning This version of the matrix product can be much much slower. So use it only if you know
* what you are doing and that you measured a true speed improvement.
*
* \sa operator*(const MatrixBase&)
*/
template <typename Derived>
template <typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product<Derived, OtherDerived, LazyProduct>
MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived>& other) const {
enum {
ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic ||
int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime),
AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived)
};
// note to the lost user:
// * for a dot product use: v1.dot(v2)
// * for a coeff-wise product use: v1.cwiseProduct(v2)
EIGEN_STATIC_ASSERT(
ProductIsValid || !(AreVectors && SameSizes),
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
return Product<Derived, OtherDerived, LazyProduct>(derived(), other.derived());
}
} // end namespace Eigen
#endif // EIGEN_PRODUCT_H