| // This file is part of Eigen, a lightweight C++ template library |
| // for linear algebra. |
| // |
| // Copyright (C) 2018 Andy Davis <andydavis@google.com> |
| // Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com> |
| // |
| // This Source Code Form is subject to the terms of the Mozilla |
| // Public License v. 2.0. If a copy of the MPL was not distributed |
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #ifndef EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H |
| #define EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H |
| |
| namespace Eigen { |
| namespace internal { |
| |
| namespace { |
| |
| // Helper template to choose between ColMajor and RowMajor values. |
| template <int Layout> |
| struct cond; |
| |
| template <> |
| struct cond<ColMajor> { |
| template <typename T> |
| EIGEN_STRONG_INLINE const T& operator()(const T& col, |
| const T& /*row*/) const { |
| return col; |
| } |
| }; |
| |
| template <> |
| struct cond<RowMajor> { |
| template <typename T> |
| EIGEN_STRONG_INLINE const T& operator()(const T& /*col*/, |
| const T& row) const { |
| return row; |
| } |
| }; |
| |
| } // namespace |
| |
| /** |
| * \enum TensorBlockShapeType |
| * \ingroup CXX11_Tensor_Module |
| * |
| * \brief Tensor block shape type. |
| * |
| * Tensor block shape type defines what are the shape preference for the blocks |
| * extracted from the larger tensor. |
| * |
| * Example: |
| * |
| * We want to extract blocks of 100 elements from the large 100x100 tensor: |
| * - tensor: 100x100 |
| * - target_block_size: 100 |
| * |
| * TensorBlockShapeType: |
| * - kUniformAllDims: 100 blocks of size 10x10 |
| * - kSkewedInnerDims: 100 blocks of size 100x1 (or 1x100 depending on a column |
| * or row major layout) |
| */ |
| enum TensorBlockShapeType { |
| kUniformAllDims, |
| kSkewedInnerDims |
| }; |
| |
| struct TensorOpResourceRequirements { |
| TensorBlockShapeType block_shape; |
| Index block_total_size; |
| // TODO(andydavis) Add 'target_num_threads' to support communication of |
| // thread-resource requirements. This will allow ops deep in the |
| // expression tree (like reductions) to communicate resources |
| // requirements based on local state (like the total number of reductions |
| // to be computed). |
| TensorOpResourceRequirements(TensorBlockShapeType shape, |
| const Index size) |
| : block_shape(shape), block_total_size(size) {} |
| }; |
| |
| // Tries to merge multiple resource requirements. |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MergeResourceRequirements( |
| const std::vector<TensorOpResourceRequirements>& resources, |
| TensorBlockShapeType* block_shape, Index* block_total_size) { |
| if (resources.empty()) { |
| return; |
| } |
| // TODO(andydavis) Implement different policies (i.e. revert to a default |
| // policy if block shapes/sizes conflict). |
| *block_shape = resources[0].block_shape; |
| *block_total_size = resources[0].block_total_size; |
| for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) { |
| if (resources[i].block_shape == kSkewedInnerDims && |
| *block_shape != kSkewedInnerDims) { |
| *block_shape = kSkewedInnerDims; |
| } |
| *block_total_size = |
| numext::maxi(*block_total_size, resources[i].block_total_size); |
| } |
| } |
| |
| /** |
| * \class TensorBlock |
| * \ingroup CXX11_Tensor_Module |
| * |
| * \brief Tensor block class. |
| * |
| * This class represents a tensor block specified by the index of the |
| * first block coefficient, and the size of the block in each dimension. |
| */ |
| template <typename Scalar, typename StorageIndex, int NumDims, int Layout> |
| class TensorBlock { |
| public: |
| typedef DSizes<StorageIndex, NumDims> Dimensions; |
| |
| TensorBlock(const StorageIndex first_coeff_index, const Dimensions& block_sizes, |
| const Dimensions& block_strides, const Dimensions& tensor_strides, |
| Scalar* data) |
| : m_first_coeff_index(first_coeff_index), |
| m_block_sizes(block_sizes), |
| m_block_strides(block_strides), |
| m_tensor_strides(tensor_strides), |
| m_data(data) {} |
| |
| StorageIndex first_coeff_index() const { return m_first_coeff_index; } |
| |
| const Dimensions& block_sizes() const { return m_block_sizes; } |
| |
| const Dimensions& block_strides() const { return m_block_strides; } |
| |
| const Dimensions& tensor_strides() const { return m_tensor_strides; } |
| |
| Scalar* data() { return m_data; } |
| |
| const Scalar* data() const { return m_data; } |
| |
| private: |
| StorageIndex m_first_coeff_index; |
| Dimensions m_block_sizes; |
| Dimensions m_block_strides; |
| Dimensions m_tensor_strides; |
| Scalar* m_data; // Not owned. |
| }; |
| |
| /** |
| * \class TensorBlockMapper |
| * \ingroup CXX11_Tensor_Module |
| * |
| * \brief Tensor block mapper class. |
| * |
| * This class is responsible for iterating over the blocks of a tensor. |
| */ |
| template <typename Scalar, typename StorageIndex, int NumDims, int Layout> |
| class TensorBlockMapper { |
| public: |
| typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; |
| typedef DSizes<StorageIndex, NumDims> Dimensions; |
| |
| TensorBlockMapper() {} |
| TensorBlockMapper(const Dimensions& dims, |
| const TensorBlockShapeType block_shape, |
| Index min_target_size) |
| : m_dimensions(dims), |
| m_block_dim_sizes(BlockDimensions(dims, block_shape, convert_index<StorageIndex>(min_target_size))) { |
| // Calculate block counts by dimension and total block count. |
| DSizes<StorageIndex, NumDims> block_count; |
| for (Index i = 0; i < block_count.rank(); ++i) { |
| block_count[i] = divup(m_dimensions[i], m_block_dim_sizes[i]); |
| } |
| m_total_block_count = array_prod(block_count); |
| |
| // Calculate block strides (used for enumerating blocks). |
| if (NumDims > 0) { |
| if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { |
| m_block_strides[0] = 1; |
| m_tensor_strides[0] = 1; |
| for (int i = 1; i < NumDims; ++i) { |
| m_block_strides[i] = m_block_strides[i - 1] * block_count[i - 1]; |
| m_tensor_strides[i] = m_tensor_strides[i - 1] * m_dimensions[i - 1]; |
| } |
| } else { |
| m_block_strides[NumDims - 1] = 1; |
| m_tensor_strides[NumDims - 1] = 1; |
| for (int i = NumDims - 2; i >= 0; --i) { |
| m_block_strides[i] = m_block_strides[i + 1] * block_count[i + 1]; |
| m_tensor_strides[i] = m_tensor_strides[i + 1] * m_dimensions[i + 1]; |
| } |
| } |
| } |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block |
| GetBlockForIndex(StorageIndex block_index, Scalar* data) const { |
| StorageIndex first_coeff_index = 0; |
| DSizes<StorageIndex, NumDims> coords; |
| DSizes<StorageIndex, NumDims> sizes; |
| DSizes<StorageIndex, NumDims> strides; |
| if (NumDims > 0) { |
| if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { |
| for (int i = NumDims - 1; i > 0; --i) { |
| const StorageIndex idx = block_index / m_block_strides[i]; |
| coords[i] = idx * m_block_dim_sizes[i]; |
| sizes[i] = |
| numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]); |
| block_index -= idx * m_block_strides[i]; |
| first_coeff_index += coords[i] * m_tensor_strides[i]; |
| } |
| coords[0] = block_index * m_block_dim_sizes[0]; |
| sizes[0] = |
| numext::mini((m_dimensions[0] - coords[0]), m_block_dim_sizes[0]); |
| first_coeff_index += coords[0] * m_tensor_strides[0]; |
| |
| strides[0] = 1; |
| for (int i = 1; i < NumDims; ++i) { |
| strides[i] = strides[i - 1] * sizes[i - 1]; |
| } |
| } else { |
| for (int i = 0; i < NumDims - 1; ++i) { |
| const StorageIndex idx = block_index / m_block_strides[i]; |
| coords[i] = idx * m_block_dim_sizes[i]; |
| sizes[i] = |
| numext::mini((m_dimensions[i] - coords[i]), m_block_dim_sizes[i]); |
| block_index -= idx * m_block_strides[i]; |
| first_coeff_index += coords[i] * m_tensor_strides[i]; |
| } |
| coords[NumDims - 1] = block_index * m_block_dim_sizes[NumDims - 1]; |
| sizes[NumDims - 1] = |
| numext::mini((m_dimensions[NumDims - 1] - coords[NumDims - 1]), |
| m_block_dim_sizes[NumDims - 1]); |
| first_coeff_index += |
| coords[NumDims - 1] * m_tensor_strides[NumDims - 1]; |
| |
| strides[NumDims - 1] = 1; |
| for (int i = NumDims - 2; i >= 0; --i) { |
| strides[i] = strides[i + 1] * sizes[i + 1]; |
| } |
| } |
| } |
| |
| return Block(first_coeff_index, sizes, strides, m_tensor_strides, data); |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { |
| return m_total_block_count; |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex |
| block_dims_total_size() const { |
| return m_block_dim_sizes.TotalSize(); |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& |
| block_dim_sizes() const { |
| return m_block_dim_sizes; |
| } |
| |
| private: |
| static Dimensions BlockDimensions(const Dimensions& tensor_dims, |
| const TensorBlockShapeType block_shape, |
| StorageIndex min_target_size) { |
| min_target_size = numext::maxi<StorageIndex>(1, min_target_size); |
| |
| // If tensor fully fits into the target size, we'll treat it a single block. |
| Dimensions block_dim_sizes = tensor_dims; |
| |
| if (tensor_dims.TotalSize() == 0) { |
| // Corner case: one of the dimensions is zero. Logic below is too complex |
| // to handle this case on a general basis, just use unit block size. |
| // Note: we must not yield blocks with zero dimensions (recipe for |
| // overflows/underflows, divisions by zero and NaNs later). |
| for (int i = 0; i < NumDims; ++i) { |
| block_dim_sizes[i] = 1; |
| } |
| } else if (block_dim_sizes.TotalSize() > min_target_size) { |
| if (block_shape == kUniformAllDims) { |
| // Tensor will not fit within 'min_target_size' budget: calculate tensor |
| // block dimension sizes based on "square" dimension size target. |
| const StorageIndex dim_size_target = convert_index<StorageIndex>( |
| std::pow(static_cast<float>(min_target_size), |
| 1.0f / static_cast<float>(block_dim_sizes.rank()))); |
| for (Index i = 0; i < block_dim_sizes.rank(); ++i) { |
| // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it |
| // a multiple of the packet size. Note that reducing |
| // 'block_dim_size' in this manner can increase the number of |
| // blocks, and so will amplify any per-block overhead. |
| block_dim_sizes[i] = numext::mini(dim_size_target, tensor_dims[i]); |
| } |
| // Add any un-allocated coefficients to inner dimension(s). |
| StorageIndex total_size = block_dim_sizes.TotalSize(); |
| for (int i = 0; i < NumDims; ++i) { |
| const int dim = cond<Layout>()(i, NumDims - i - 1); |
| if (block_dim_sizes[dim] < tensor_dims[dim]) { |
| const StorageIndex total_size_other_dims = |
| total_size / block_dim_sizes[dim]; |
| const StorageIndex alloc_avail = |
| divup<StorageIndex>(min_target_size, total_size_other_dims); |
| if (alloc_avail == block_dim_sizes[dim]) { |
| // Insufficient excess coefficients to allocate. |
| break; |
| } |
| block_dim_sizes[dim] = numext::mini(tensor_dims[dim], alloc_avail); |
| total_size = total_size_other_dims * block_dim_sizes[dim]; |
| } |
| } |
| } else if (block_shape == kSkewedInnerDims) { |
| StorageIndex coeff_to_allocate = min_target_size; |
| for (int i = 0; i < NumDims; ++i) { |
| const int dim = cond<Layout>()(i, NumDims - i - 1); |
| block_dim_sizes[dim] = |
| numext::mini(coeff_to_allocate, tensor_dims[dim]); |
| coeff_to_allocate = divup( |
| coeff_to_allocate, |
| numext::maxi(static_cast<StorageIndex>(1), block_dim_sizes[dim])); |
| } |
| eigen_assert(coeff_to_allocate == 1); |
| } else { |
| eigen_assert(false); // someone added new block shape type |
| } |
| } |
| |
| eigen_assert( |
| block_dim_sizes.TotalSize() >= |
| numext::mini<Index>(min_target_size, tensor_dims.TotalSize())); |
| |
| return block_dim_sizes; |
| } |
| |
| Dimensions m_dimensions; |
| Dimensions m_block_dim_sizes; |
| Dimensions m_block_strides; |
| Dimensions m_tensor_strides; |
| StorageIndex m_total_block_count; |
| }; |
| |
| } // namespace internal |
| |
| } // namespace Eigen |
| |
| #endif // EIGEN_CXX11_TENSOR_TENSOR_BLOCK_H |