| // This file is part of Eigen, a lightweight C++ template library |
| // for linear algebra. |
| // |
| // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> |
| // |
| // This Source Code Form is subject to the terms of the Mozilla |
| // Public License v. 2.0. If a copy of the MPL was not distributed |
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H |
| #define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H |
| |
| |
| namespace Eigen { |
| |
| /** \internal |
| * |
| * \class TensorIntDiv |
| * \ingroup CXX11_Tensor_Module |
| * |
| * \brief Fast integer division by a constant. |
| * |
| * See the paper from Granlund and Montgomery for explanation. |
| * (at http://dx.doi.org/10.1145/773473.178249) |
| * |
| * \sa Tensor |
| */ |
| |
| namespace internal { |
| |
| template <typename T> |
| struct TensorIntDivisor { |
| public: |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { |
| multiplier = 0; |
| shift1 = 0; |
| shift2 = 0; |
| } |
| |
| // Must have 1 <= divider <= 2^31-1 |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { |
| const int N = 32; |
| eigen_assert(divider > 0); |
| eigen_assert(divider <= (1<<(N-1)) - 1); |
| |
| // fast ln2 |
| #ifndef __CUDA_ARCH__ |
| const int leading_zeros = __builtin_clz(divider); |
| #else |
| const int leading_zeros = __clz(divider); |
| #endif |
| const int log_div = N - (leading_zeros+1); |
| |
| multiplier = (static_cast<uint64_t>(1) << (N+log_div)) / divider - (static_cast<uint64_t>(1) << N) + 1; |
| shift1 = log_div > 1 ? 1 : log_div; |
| shift2 = log_div > 1 ? log_div-1 : 0; |
| } |
| |
| // Must have 0 <= numerator <= 2^32-1 |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { |
| const int N = 32; |
| eigen_assert(numerator >= 0); |
| eigen_assert(numerator <= (1ull<<N) - 1); |
| |
| uint32_t t1 = (multiplier * numerator) >> 32; |
| uint32_t t = (static_cast<uint32_t>(numerator) - t1) >> shift1; |
| return (t1 + t) >> shift2; |
| } |
| |
| private: |
| uint64_t multiplier; |
| int32_t shift1; |
| int32_t shift2; |
| }; |
| |
| |
| template <typename T> |
| static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { |
| return divisor.divide(numerator); |
| } |
| |
| |
| } // end namespace internal |
| } // end namespace Eigen |
| |
| #endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H |