Merged in facaiy/eigen/ENH/exp_support_complex_for_gpu (pull request PR-359) ENH: exp supports complex type for cuda
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index dfdb63e..8419798 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h
@@ -136,7 +136,9 @@ public: EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) { - EIGEN_ONLY_USED_FOR_DEBUG(outerStride); +#ifndef EIGEN_INTERNAL_DEBUGGING + EIGEN_UNUSED_VARIABLE(outerStride); +#endif eigen_internal_assert(outerStride==OuterStride); } EIGEN_DEVICE_FUNC Index outerStride() const { return OuterStride; } @@ -1077,7 +1079,8 @@ EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), - m_startCol(block.startCol()) + m_startCol(block.startCol()), + m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0) { } typedef typename XprType::Scalar Scalar; @@ -1096,7 +1099,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (InnerPanel) + return m_argImpl.coeff(m_linear_offset.value() + index); + else + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1108,7 +1114,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (InnerPanel) + return m_argImpl.coeffRef(m_linear_offset.value() + index); + else + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } template<int LoadMode, typename PacketType> @@ -1122,8 +1131,11 @@ EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + if (InnerPanel) + return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index); + else + return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); } template<int StoreMode, typename PacketType> @@ -1137,15 +1149,19 @@ EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + if (InnerPanel) + return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x); + else + return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); } protected: evaluator<ArgType> m_argImpl; const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; + const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset; }; // TODO: This evaluator does not actually use the child evaluator;
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index abb1e51..ac9502b 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h
@@ -95,6 +95,8 @@ template<typename Expression> EIGEN_DEVICE_FUNC void construct(Expression& expr) { + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression); + if(PlainObjectType::RowsAtCompileTime==1) { eigen_assert(expr.rows()==1 || expr.cols()==1);
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 7e71fe3..be21972 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h
@@ -71,7 +71,9 @@ EIGEN_DEVICE_FUNC explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) - {} + { + EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY); + } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index bfda39d..2bb3d0d 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -145,6 +145,60 @@ } }; +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits<Eigen::half> { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html + static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; + +// If std::numeric_limits<T> is specialized, should also specialize +// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and +// std::numeric_limits<const volatile T> +// https://stackoverflow.com/a/16519653/ +template<> +struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {}; +template<> +struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {}; +template<> +struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {}; +} // end namespace std + +namespace Eigen { + namespace half_impl { #if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 @@ -501,49 +555,6 @@ } // end namespace internal -} // end namespace Eigen - -namespace std { -template<> -struct numeric_limits<Eigen::half> { - static const bool is_specialized = true; - static const bool is_signed = true; - static const bool is_integer = false; - static const bool is_exact = false; - static const bool has_infinity = true; - static const bool has_quiet_NaN = true; - static const bool has_signaling_NaN = true; - static const float_denorm_style has_denorm = denorm_present; - static const bool has_denorm_loss = false; - static const std::float_round_style round_style = std::round_to_nearest; - static const bool is_iec559 = false; - static const bool is_bounded = false; - static const bool is_modulo = false; - static const int digits = 11; - static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html - static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html - static const int radix = 2; - static const int min_exponent = -13; - static const int min_exponent10 = -4; - static const int max_exponent = 16; - static const int max_exponent10 = 4; - static const bool traps = true; - static const bool tinyness_before = false; - - static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } - static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } - static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } - static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } - static Eigen::half round_error() { return Eigen::half(0.5); } - static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } - static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } - static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } - static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } -}; -} - -namespace Eigen { - template<> struct NumTraits<Eigen::half> : GenericNumTraits<Eigen::half> {
diff --git a/Eigen/src/Core/products/TriangularMatrixMatrix.h b/Eigen/src/Core/products/TriangularMatrixMatrix.h index 539b6c0..f784507 100644 --- a/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -400,7 +400,9 @@ { template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) { - typedef typename Dest::Scalar Scalar; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar Scalar; typedef internal::blas_traits<Lhs> LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; @@ -412,8 +414,9 @@ typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) - * RhsBlasTraits::extractScalarFactor(a_rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs); + Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha; typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; @@ -438,6 +441,21 @@ &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha, blocking ); + + // Apply correction if the diagonal is unit and a scalar factor was nested: + if ((Mode&UnitDiag)==UnitDiag) + { + if (LhsIsTriangular && lhs_alpha!=LhsScalar(1)) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize); + } + else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1)) + { + Index diagSize = (std::min)(rhs.rows(),rhs.cols()); + dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize); + } + } } };
diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index 4b292e7..76bfa15 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -221,8 +221,9 @@ typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 @@ -274,6 +275,12 @@ else dest = MappedDest(actualDestPtr, dest.size()); } + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } }; @@ -295,8 +302,9 @@ typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 @@ -326,6 +334,12 @@ actualRhsPtr,1, dest.data(),dest.innerStride(), actualAlpha); + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } };
diff --git a/Eigen/src/Core/util/StaticAssert.h b/Eigen/src/Core/util/StaticAssert.h index cb16789..500e477 100644 --- a/Eigen/src/Core/util/StaticAssert.h +++ b/Eigen/src/Core/util/StaticAssert.h
@@ -24,6 +24,7 @@ * */ +#ifndef EIGEN_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) @@ -101,7 +102,8 @@ THIS_TYPE_IS_NOT_SUPPORTED=1, STORAGE_KIND_MUST_MATCH=1, STORAGE_INDEX_MUST_MATCH=1, - CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1 + CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1, + SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1 }; }; @@ -131,7 +133,7 @@ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); #endif // EIGEN_NO_STATIC_ASSERT - +#endif // EIGEN_STATIC_ASSERT // static assertion failing if the type \a TYPE is not a vector type #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \
diff --git a/Eigen/src/Geometry/Scaling.h b/Eigen/src/Geometry/Scaling.h index f58ca03..8d9acf2 100755 --- a/Eigen/src/Geometry/Scaling.h +++ b/Eigen/src/Geometry/Scaling.h
@@ -29,6 +29,22 @@ * * \sa Scaling(), class DiagonalMatrix, MatrixBase::asDiagonal(), class Translation, class Transform */ + +namespace internal +{ + // This helper helps nvcc+MSVC to properly parse this file. + // See bug 1412. + template <typename Scalar, int Dim, int Mode> + struct uniformscaling_times_affine_returntype + { + enum + { + NewMode = int(Mode) == int(Isometry) ? Affine : Mode + }; + typedef Transform <Scalar, Dim, NewMode> type; + }; +} + template<typename _Scalar> class UniformScaling { @@ -60,9 +76,11 @@ /** Concatenates a uniform scaling and an affine transformation */ template<int Dim, int Mode, int Options> - inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode)> operator* (const Transform<Scalar,Dim, Mode, Options>& t) const + inline typename + internal::uniformscaling_times_affine_returntype<Scalar,Dim,Mode>::type + operator* (const Transform<Scalar, Dim, Mode, Options>& t) const { - Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode)> res = t; + typename internal::uniformscaling_times_affine_returntype<Scalar,Dim,Mode>::type res = t; res.prescale(factor()); return res; } @@ -70,7 +88,7 @@ /** Concatenates a uniform scaling and a linear transformation matrix */ // TODO returns an expression template<typename Derived> - inline typename internal::plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const + inline typename Eigen::internal::plain_matrix_type<Derived>::type operator* (const MatrixBase<Derived>& other) const { return other * m_factor; } template<typename Derived,int Dim>
diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 7ea9b14..34dbef4 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h
@@ -31,7 +31,7 @@ for (int i = 0; i < C.rows(); i++) { for (typename MatrixType::InnerIterator it(C, i); it; ++it) - it.valueRef() = 0.0; + it.valueRef() = typename MatrixType::Scalar(0); } symmat = C + A; }
diff --git a/Eigen/src/SVD/BDCSVD.h b/Eigen/src/SVD/BDCSVD.h index 0abd4c1..06865a3 100644 --- a/Eigen/src/SVD/BDCSVD.h +++ b/Eigen/src/SVD/BDCSVD.h
@@ -217,7 +217,7 @@ // Method to allocate and initialize matrix and attributes template<typename MatrixType> -void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions) +void BDCSVD<MatrixType>::allocate(Eigen::Index rows, Eigen::Index cols, unsigned int computationOptions) { m_isTranspose = (cols > rows); @@ -393,7 +393,7 @@ //@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix // to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper. template<typename MatrixType> -void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift) +void BDCSVD<MatrixType>::divide (Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift) { // requires rows = cols + 1; using std::pow; @@ -573,7 +573,7 @@ // handling of round-off errors, be consistent in ordering // For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf template <typename MatrixType> -void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V) +void BDCSVD<MatrixType>::computeSVDofM(Eigen::Index firstCol, Eigen::Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V) { const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)(); using std::abs; @@ -1059,7 +1059,7 @@ // i >= 1, di almost null and zi non null. // We use a rotation to zero out zi applied to the left of M template <typename MatrixType> -void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size) +void BDCSVD<MatrixType>::deflation43(Eigen::Index firstCol, Eigen::Index shift, Eigen::Index i, Eigen::Index size) { using std::abs; using std::sqrt; @@ -1088,7 +1088,7 @@ // We apply two rotations to have zj = 0; // TODO deflation44 is still broken and not properly tested template <typename MatrixType> -void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size) +void BDCSVD<MatrixType>::deflation44(Eigen::Index firstColu , Eigen::Index firstColm, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index i, Eigen::Index j, Eigen::Index size) { using std::abs; using std::sqrt; @@ -1128,7 +1128,7 @@ // acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive] template <typename MatrixType> -void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift) +void BDCSVD<MatrixType>::deflation(Eigen::Index firstCol, Eigen::Index lastCol, Eigen::Index k, Eigen::Index firstRowW, Eigen::Index firstColW, Eigen::Index shift) { using std::sqrt; using std::abs;
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h index 43488b1..1c7c803 100644 --- a/Eigen/src/SVD/JacobiSVD.h +++ b/Eigen/src/SVD/JacobiSVD.h
@@ -610,7 +610,7 @@ }; template<typename MatrixType, int QRPreconditioner> -void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Index rows, Index cols, unsigned int computationOptions) +void JacobiSVD<MatrixType, QRPreconditioner>::allocate(Eigen::Index rows, Eigen::Index cols, unsigned int computationOptions) { eigen_assert(rows >= 0 && cols >= 0);
diff --git a/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/Eigen/src/SVD/JacobiSVD_LAPACKE.h index 5027215..ff0516f 100644 --- a/Eigen/src/SVD/JacobiSVD_LAPACKE.h +++ b/Eigen/src/SVD/JacobiSVD_LAPACKE.h
@@ -61,9 +61,10 @@ u = (LAPACKE_TYPE*)m_matrixU.data(); \ } else { ldu=1; u=&dummy; }\ MatrixType localV; \ - ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \ + lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \ if (computeV()) { \ - localV.resize(ldvt, m_cols); \ + localV.resize(vt_rows, m_cols); \ + ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \ vt = (LAPACKE_TYPE*)localV.data(); \ } else { ldvt=1; vt=&dummy; }\ Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \
diff --git a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h index 31e0699..84a1bf2 100644 --- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@@ -122,7 +122,7 @@ for(StorageIndex k = 0; k < size; ++k) { // compute nonzero pattern of kth row of L, in topological order - y[k] = 0.0; // Y(0:k) is now all zero + y[k] = Scalar(0); // Y(0:k) is now all zero StorageIndex top = size; // stack for pattern is empty tags[k] = k; // mark node k as visited m_nonZerosPerCol[k] = 0; // count of nonzeros in column k of L @@ -146,12 +146,12 @@ /* compute numerical values kth row of L (a sparse triangular solve) */ RealScalar d = numext::real(y[k]) * m_shiftScale + m_shiftOffset; // get D(k,k), apply the shift function, and clear Y(k) - y[k] = 0.0; + y[k] = Scalar(0); for(; top < size; ++top) { Index i = pattern[top]; /* pattern[top:n-1] is pattern of L(:,k) */ Scalar yi = y[i]; /* get and clear Y(i) */ - y[i] = 0.0; + y[i] = Scalar(0); /* the nonzero entry L(k,i) */ Scalar l_ki;
diff --git a/test/block.cpp b/test/block.cpp index d610598..8c4dd87 100644 --- a/test/block.cpp +++ b/test/block.cpp
@@ -44,7 +44,7 @@ typedef typename MatrixType::RealScalar RealScalar; typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType; typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType; - typedef Matrix<Scalar, Dynamic, Dynamic> DynamicMatrixType; + typedef Matrix<Scalar, Dynamic, Dynamic, MatrixType::IsRowMajor?RowMajor:ColMajor> DynamicMatrixType; typedef Matrix<Scalar, Dynamic, 1> DynamicVectorType; Index rows = m.rows(); @@ -142,7 +142,7 @@ VERIFY(numext::real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows)); VERIFY(numext::real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols)); - // chekc that linear acccessors works on blocks + // check that linear acccessors works on blocks m1 = m1_copy; if((MatrixType::Flags&RowMajorBit)==0) VERIFY_IS_EQUAL(m1.leftCols(c1).coeff(r1+c1*rows), m1(r1,c1)); @@ -166,6 +166,13 @@ VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); VERIFY_IS_APPROX( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_APPROX( (m1*1).topRows(r1), m1.topRows(r1) ); + VERIFY_IS_APPROX( (m1*1).leftCols(c1), m1.leftCols(c1) ); + VERIFY_IS_APPROX( (m1*1).transpose().topRows(c1), m1.transpose().topRows(c1) ); + VERIFY_IS_APPROX( (m1*1).transpose().leftCols(r1), m1.transpose().leftCols(r1) ); + VERIFY_IS_APPROX( (m1*1).transpose().middleRows(c1,c2-c1+1), m1.transpose().middleRows(c1,c2-c1+1) ); + VERIFY_IS_APPROX( (m1*1).transpose().middleCols(r1,r2-r1+1), m1.transpose().middleCols(r1,r2-r1+1) ); + // evaluation into plain matrices from expressions with direct access (stress MapBase) DynamicMatrixType dm; DynamicVectorType dv;
diff --git a/test/boostmultiprec.cpp b/test/boostmultiprec.cpp index e06e9bd..bafbd3b 100644 --- a/test/boostmultiprec.cpp +++ b/test/boostmultiprec.cpp
@@ -55,6 +55,10 @@ #include "bdcsvd.cpp" #endif +#ifdef EIGEN_TEST_PART_11 +#include "simplicial_cholesky.cpp" +#endif + #include <Eigen/Dense> #undef min @@ -197,5 +201,7 @@ CALL_SUBTEST_9(( jacobisvd(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); CALL_SUBTEST_10(( bdcsvd(Mat(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); + + CALL_SUBTEST_11(( test_simplicial_cholesky_T<Real,int>() )); }
diff --git a/test/main.h b/test/main.h index 429c44f..6079cbd 100644 --- a/test/main.h +++ b/test/main.h
@@ -175,6 +175,12 @@ eigen_assert_exception(void) {} ~eigen_assert_exception() { Eigen::no_more_assert = false; } }; + + struct eigen_static_assert_exception + { + eigen_static_assert_exception(void) {} + ~eigen_static_assert_exception() { Eigen::no_more_assert = false; } + }; } // If EIGEN_DEBUG_ASSERTS is defined and if no assertion is triggered while // one should have been, then the list of excecuted assertions is printed out. @@ -238,6 +244,7 @@ else \ EIGEN_THROW_X(Eigen::eigen_assert_exception()); \ } + #ifdef EIGEN_EXCEPTIONS #define VERIFY_RAISES_ASSERT(a) { \ Eigen::no_more_assert = false; \ @@ -249,13 +256,39 @@ catch (Eigen::eigen_assert_exception&) { VERIFY(true); } \ Eigen::report_on_cerr_on_assert_failure = true; \ } - #endif //EIGEN_EXCEPTIONS + #endif // EIGEN_EXCEPTIONS #endif // EIGEN_DEBUG_ASSERTS + #if defined(TEST_CHECK_STATIC_ASSERTIONS) && defined(EIGEN_EXCEPTIONS) + #define EIGEN_STATIC_ASSERT(a,MSG) \ + if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\ + { \ + Eigen::no_more_assert = true; \ + if(report_on_cerr_on_assert_failure) \ + eigen_plain_assert(a && #MSG); \ + else \ + EIGEN_THROW_X(Eigen::eigen_static_assert_exception()); \ + } + #define VERIFY_RAISES_STATIC_ASSERT(a) { \ + Eigen::no_more_assert = false; \ + Eigen::report_on_cerr_on_assert_failure = false; \ + try { \ + a; \ + VERIFY(Eigen::should_raise_an_assert && # a); \ + } \ + catch (Eigen::eigen_static_assert_exception&) { VERIFY(true); } \ + Eigen::report_on_cerr_on_assert_failure = true; \ + } + #endif // TEST_CHECK_STATIC_ASSERTIONS + #ifndef VERIFY_RAISES_ASSERT #define VERIFY_RAISES_ASSERT(a) \ std::cout << "Can't VERIFY_RAISES_ASSERT( " #a " ) with exceptions disabled\n"; #endif +#ifndef VERIFY_RAISES_STATIC_ASSERT + #define VERIFY_RAISES_STATIC_ASSERT(a) \ + std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n"; +#endif #if !defined(__CUDACC__) #define EIGEN_USE_CUSTOM_ASSERT @@ -264,10 +297,10 @@ #else // EIGEN_NO_ASSERTION_CHECKING #define VERIFY_RAISES_ASSERT(a) {} + #define VERIFY_RAISES_STATIC_ASSERT(a) {} #endif // EIGEN_NO_ASSERTION_CHECKING - #define EIGEN_INTERNAL_DEBUGGING #include <Eigen/QR> // required for createRandomPIMatrixOfRank
diff --git a/test/product_trmm.cpp b/test/product_trmm.cpp index 12e5544..e08d9f39 100644 --- a/test/product_trmm.cpp +++ b/test/product_trmm.cpp
@@ -29,7 +29,7 @@ typedef Matrix<Scalar,Dynamic,OtherCols,OtherCols==1?ColMajor:ResOrder> ResXS; typedef Matrix<Scalar,OtherCols,Dynamic,OtherCols==1?RowMajor:ResOrder> ResSX; - TriMatrix mat(rows,cols), tri(rows,cols), triTr(cols,rows); + TriMatrix mat(rows,cols), tri(rows,cols), triTr(cols,rows), s1tri(rows,cols), s1triTr(cols,rows); OnTheRight ge_right(cols,otherCols); OnTheLeft ge_left(otherCols,rows); @@ -42,6 +42,8 @@ mat.setRandom(); tri = mat.template triangularView<Mode>(); triTr = mat.transpose().template triangularView<Mode>(); + s1tri = (s1*mat).template triangularView<Mode>(); + s1triTr = (s1*mat).transpose().template triangularView<Mode>(); ge_right.setRandom(); ge_left.setRandom(); @@ -51,19 +53,29 @@ VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView<Mode>() * ge_right, tri * ge_right); VERIFY_IS_APPROX( ge_sx.noalias() = ge_left * mat.template triangularView<Mode>(), ge_left * tri); - VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); - VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView<Mode>(), ge_right.transpose() * triTr.conjugate()); + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); - VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.adjoint()), s1*triTr.conjugate() * (s2*ge_left.adjoint())); - VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView<Mode>(), ge_right.adjoint() * triTr.conjugate()); + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.transpose()).template triangularView<Mode>() * (s2*ge_left.transpose()), s1triTr * (s2*ge_left.transpose())); + VERIFY_IS_APPROX( ge_sx.noalias() = (s2*ge_left) * (s1*mat).template triangularView<Mode>(), (s2*ge_left)*s1tri); + VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView<Mode>(), ge_right.transpose() * triTr.conjugate()); + VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView<Mode>(), ge_right.adjoint() * triTr.conjugate()); + ge_xs_save = ge_xs; - VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.adjoint()) ); + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.adjoint()) ); + ge_xs_save = ge_xs; + VERIFY_IS_APPROX( (ge_xs_save + s1triTr * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.transpose()).template triangularView<Mode>() * (s2*ge_left.adjoint()) ); ge_sx.setRandom(); ge_sx_save = ge_sx; - VERIFY_IS_APPROX( ge_sx_save - (ge_right.adjoint() * (-s1 * triTr).conjugate()).eval(), ge_sx.noalias() -= (ge_right.adjoint() * (-s1 * mat).adjoint().template triangularView<Mode>()).eval()); + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( ge_sx_save - (ge_right.adjoint() * (-s1 * triTr).conjugate()).eval(), ge_sx.noalias() -= (ge_right.adjoint() * (-s1 * mat).adjoint().template triangularView<Mode>()).eval()); - VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint()); + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint()); + VERIFY_IS_APPROX( ge_xs = (s1*mat).transpose().template triangularView<Mode>() * ge_left.adjoint(), s1triTr * ge_left.adjoint()); + // TODO check with sub-matrix expressions ? }
diff --git a/test/ref.cpp b/test/ref.cpp index 769db04..9dd2c04 100644 --- a/test/ref.cpp +++ b/test/ref.cpp
@@ -13,7 +13,7 @@ #endif #define TEST_ENABLE_TEMPORARY_TRACKING - +#define TEST_CHECK_STATIC_ASSERTIONS #include "main.h" // test Ref.h @@ -255,6 +255,17 @@ test_ref_ambiguous(A, B); } +void test_ref_fixed_size_assert() +{ + Vector4f v4; + VectorXf vx(10); + VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = v4; (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = vx.head<4>(); (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = v4; (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = vx.head<4>(); (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = 2*v4; (void)y; ); +} + void test_ref() { for(int i = 0; i < g_repeat; i++) { @@ -277,4 +288,5 @@ } CALL_SUBTEST_7( test_ref_overloads() ); + CALL_SUBTEST_7( test_ref_fixed_size_assert() ); }
diff --git a/test/selfadjoint.cpp b/test/selfadjoint.cpp index 92401e5..aaa4888 100644 --- a/test/selfadjoint.cpp +++ b/test/selfadjoint.cpp
@@ -7,6 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_CHECK_STATIC_ASSERTIONS #include "main.h" // This file tests the basic selfadjointView API, @@ -45,6 +46,9 @@ m4 = m2; m4 -= m1.template selfadjointView<Lower>(); VERIFY_IS_APPROX(m4, m2-m3); + + VERIFY_RAISES_STATIC_ASSERT(m2.template selfadjointView<StrictlyUpper>()); + VERIFY_RAISES_STATIC_ASSERT(m2.template selfadjointView<UnitLower>()); } void bug_159()
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index 10e0a8a..f81da31 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -231,20 +231,32 @@ // t_n = exp(sqrt(-1) * pi * n^2 / line_len) // for n = 0, 1,..., line_len-1. // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 - pos_j_base_powered[0] = ComplexScalar(1, 0); - if (line_len > 1) { - const RealScalar pi_over_len(EIGEN_PI / line_len); - const ComplexScalar pos_j_base = ComplexScalar( - std::cos(pi_over_len), std::sin(pi_over_len)); - pos_j_base_powered[1] = pos_j_base; - if (line_len > 2) { - const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; - for (int j = 2; j < line_len + 1; ++j) { - pos_j_base_powered[j] = pos_j_base_powered[j - 1] * - pos_j_base_powered[j - 1] / - pos_j_base_powered[j - 2] * pos_j_base_sq; - } - } + + // The recurrence is correct in exact arithmetic, but causes + // numerical issues for large transforms, especially in + // single-precision floating point. + // + // pos_j_base_powered[0] = ComplexScalar(1, 0); + // if (line_len > 1) { + // const ComplexScalar pos_j_base = ComplexScalar( + // numext::cos(M_PI / line_len), numext::sin(M_PI / line_len)); + // pos_j_base_powered[1] = pos_j_base; + // if (line_len > 2) { + // const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; + // for (int i = 2; i < line_len + 1; ++i) { + // pos_j_base_powered[i] = pos_j_base_powered[i - 1] * + // pos_j_base_powered[i - 1] / + // pos_j_base_powered[i - 2] * + // pos_j_base_sq; + // } + // } + // } + // TODO(rmlarsen): Find a way to use Eigen's vectorized sin + // and cosine functions here. + for (int j = 0; j < line_len + 1; ++j) { + double arg = ((EIGEN_PI * j) * j) / line_len; + std::complex<double> tmp(numext::cos(arg), numext::sin(arg)); + pos_j_base_powered[j] = static_cast<ComplexScalar>(tmp); } }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 3c6a2e0..91d4ead 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -265,6 +265,10 @@ // Calculate the padding m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; + // The padding size calculation for PADDING_SAME has been updated to + // be consistent with how TensorFlow extracts its paddings. + m_rowPaddingTop = numext::maxi<Index>(0, m_rowPaddingTop); + m_colPaddingLeft = numext::maxi<Index>(0, m_colPaddingLeft); break; default: eigen_assert(false && "unexpected padding");
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 5d1b8fc..7deca3e 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -535,6 +535,10 @@ return nan; } + if (numext::isnan(a) || numext::isnan(x)) { // propagate nans + return nan; + } + if ((x < one) || (x < a)) { /* The checks above ensure that we meet the preconditions for * igamma_impl::Impl(), so call it, rather than igamma_impl::Run(). @@ -592,7 +596,7 @@ qkm1 = z * x; ans = pkm1 / qkm1; - while (true) { + for (int i = 0; i < 2000; i++) { c += one; y += one; z += two; @@ -724,6 +728,10 @@ return nan; } + if (numext::isnan(a) || numext::isnan(x)) { // propagate nans + return nan; + } + if ((x > one) && (x > a)) { /* The checks above ensure that we meet the preconditions for * igammac_impl::Impl(), so call it, rather than igammac_impl::Run(). @@ -770,7 +778,7 @@ c = one; ans = one; - while (true) { + for (int i = 0; i < 2000; i++) { r += one; c *= x/r; ans += c;
diff --git a/unsupported/test/cxx11_tensor_fft.cpp b/unsupported/test/cxx11_tensor_fft.cpp index 2f14ebc..a553694 100644 --- a/unsupported/test/cxx11_tensor_fft.cpp +++ b/unsupported/test/cxx11_tensor_fft.cpp
@@ -224,6 +224,32 @@ } } +template <typename RealScalar> +static void test_fft_non_power_of_2_round_trip(int exponent) { + int n = (1 << exponent) + 1; + + Eigen::DSizes<long, 1> dimensions; + dimensions[0] = n; + const DSizes<long, 1> arr = dimensions; + Tensor<RealScalar, 1, ColMajor, long> input; + + input.resize(arr); + input.setRandom(); + + array<int, 1> fft; + fft[0] = 0; + + Tensor<std::complex<RealScalar>, 1, ColMajor> forward = + input.template fft<BothParts, FFT_FORWARD>(fft); + + Tensor<RealScalar, 1, ColMajor, long> output = + forward.template fft<RealPart, FFT_REVERSE>(fft); + + for (int i = 0; i < n; ++i) { + VERIFY_IS_APPROX(input[i], output[i]); + } +} + void test_cxx11_tensor_fft() { test_fft_complex_input_golden(); test_fft_real_input_golden(); @@ -270,4 +296,6 @@ test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 4>(); test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 4>(); test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 4>(); + + test_fft_non_power_of_2_round_trip<float>(7); }
diff --git a/unsupported/test/cxx11_tensor_image_patch.cpp b/unsupported/test/cxx11_tensor_image_patch.cpp index 475c596..105d32f 100644 --- a/unsupported/test/cxx11_tensor_image_patch.cpp +++ b/unsupported/test/cxx11_tensor_image_patch.cpp
@@ -405,6 +405,57 @@ } } +// Verifies that SAME padding, when computed as negative values, will be clipped +// to zero. +void test_patch_padding_same_negative_padding_clip_to_zero() { + int input_depth = 1; + int input_rows = 15; + int input_cols = 1; + int input_batches = 1; + int ksize = 1; // Corresponds to the Rows and Cols for + // tensor.extract_image_patches<>. + int row_stride = 5; + int col_stride = 1; + // ColMajor + Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches); + // Initializes tensor with incrementing numbers. + for (int i = 0; i < tensor.size(); ++i) { + tensor.data()[i] = i + 1; + } + Tensor<float, 5> result = tensor.extract_image_patches( + ksize, ksize, row_stride, col_stride, 1, 1, PADDING_SAME); + // row padding will be computed as -2 originally and then be clipped to 0. + VERIFY_IS_EQUAL(result.coeff(0), 1.0f); + VERIFY_IS_EQUAL(result.coeff(1), 6.0f); + VERIFY_IS_EQUAL(result.coeff(2), 11.0f); + + VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth + VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows + VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols + VERIFY_IS_EQUAL(result.dimension(3), 3); // number of patches + VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches + + // RowMajor + Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout(); + VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0)); + + Tensor<float, 5, RowMajor> result_row_major = + tensor_row_major.extract_image_patches(ksize, ksize, row_stride, + col_stride, 1, 1, PADDING_SAME); + VERIFY_IS_EQUAL(result_row_major.coeff(0), 1.0f); + VERIFY_IS_EQUAL(result_row_major.coeff(1), 6.0f); + VERIFY_IS_EQUAL(result_row_major.coeff(2), 11.0f); + + VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4)); + VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3)); + VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2)); + VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1)); + VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0)); +} + void test_patch_no_extra_dim() { Tensor<float, 3> tensor(2,3,5); @@ -754,4 +805,5 @@ CALL_SUBTEST_4(test_patch_padding_valid_same_value()); CALL_SUBTEST_5(test_patch_padding_same()); CALL_SUBTEST_6(test_imagenet_patches()); + CALL_SUBTEST_7(test_patch_padding_same_negative_padding_clip_to_zero()); }