unsupported/test/tensor_block_eval.cpp - mirror - Git at Google

 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 // SPDX-FileCopyrightText: The Eigen Authors
 // SPDX-License-Identifier: MPL-2.0

 // clang-format off
 #include "main.h"
 #include <Eigen/Tensor>
 // clang-format on

 using Eigen::internal::TensorBlockDescriptor;
 using Eigen::internal::TensorExecutor;

 template <typename T, int NumDims, int Layout>
 static void setRandomForBinaryProduct(Tensor<T, NumDims, Layout>& tensor) {
   EIGEN_IF_CONSTEXPR((std::is_integral<T>::value && !std::is_same<T, bool>::value)) {
     setRandomDataInRange(tensor, T(-1000), T(1000));
   }
   else {
     tensor.setRandom();
   }
 }

 // -------------------------------------------------------------------------- //
 // Utility functions to generate random tensors, blocks, and evaluate them.

 template <int NumDims>
 static DSizes<Index, NumDims> RandomDims(Index min, Index max) {
   DSizes<Index, NumDims> dims;
   for (int i = 0; i < NumDims; ++i) {
     dims[i] = internal::random<Index>(min, max);
   }
   return DSizes<Index, NumDims>(dims);
 }

 // Block offsets and extents allows to construct a TensorSlicingOp corresponding
 // to a TensorBlockDescriptor.
 template <int NumDims>
 struct TensorBlockParams {
   DSizes<Index, NumDims> offsets;
   DSizes<Index, NumDims> sizes;
   TensorBlockDescriptor<NumDims, Index> desc;
 };

 template <int Layout, int NumDims>
 static TensorBlockParams<NumDims> RandomBlock(DSizes<Index, NumDims> dims, Index min, Index max) {
   // Choose random offsets and sizes along all tensor dimensions.
   DSizes<Index, NumDims> offsets(RandomDims<NumDims>(min, max));
   DSizes<Index, NumDims> sizes(RandomDims<NumDims>(min, max));

   // Make sure that offset + size do not overflow dims.
   for (int i = 0; i < NumDims; ++i) {
     offsets[i] = numext::mini(dims[i] - 1, offsets[i]);
     sizes[i] = numext::mini(sizes[i], dims[i] - offsets[i]);
   }

   Index offset = 0;
   DSizes<Index, NumDims> strides = Eigen::internal::strides<Layout>(dims);
   for (int i = 0; i < NumDims; ++i) {
     offset += strides[i] * offsets[i];
   }

   return {offsets, sizes, TensorBlockDescriptor<NumDims, Index>(offset, sizes)};
 }

 // Generate block with block sizes skewed towards inner dimensions. This type of
 // block is required for evaluating broadcast expressions.
 template <int Layout, int NumDims>
 static TensorBlockParams<NumDims> SkewedInnerBlock(DSizes<Index, NumDims> dims) {
   using BlockMapper = internal::TensorBlockMapper<NumDims, Layout, Index>;
   BlockMapper block_mapper(
       dims,
       {internal::TensorBlockShapeType::kSkewedInnerDims, internal::random<size_t>(1, dims.TotalSize()), {0, 0, 0}});

   Index total_blocks = block_mapper.blockCount();
   Index block_index = internal::random<Index>(0, total_blocks - 1);
   auto block = block_mapper.blockDescriptor(block_index);
   DSizes<Index, NumDims> sizes = block.dimensions();

   auto strides = internal::strides<Layout>(dims);
   DSizes<Index, NumDims> offsets;

   // Compute offsets for the first block coefficient.
   Index index = block.offset();
   if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
     for (int i = NumDims - 1; i > 0; --i) {
       const Index idx = index / strides[i];
       index -= idx * strides[i];
       offsets[i] = idx;
     }
     if (NumDims > 0) offsets[0] = index;
   } else {
     for (int i = 0; i < NumDims - 1; ++i) {
       const Index idx = index / strides[i];
       index -= idx * strides[i];
       offsets[i] = idx;
     }
     if (NumDims > 0) offsets[NumDims - 1] = index;
   }

   return {offsets, sizes, block};
 }

 template <int NumDims>
 static TensorBlockParams<NumDims> FixedSizeBlock(DSizes<Index, NumDims> dims) {
   DSizes<Index, NumDims> offsets;
   for (int i = 0; i < NumDims; ++i) offsets[i] = 0;

   return {offsets, dims, TensorBlockDescriptor<NumDims, Index>(0, dims)};
 }

 inline Eigen::IndexList<Index, Eigen::type2index<1>> NByOne(Index n) {
   Eigen::IndexList<Index, Eigen::type2index<1>> ret;
   ret.set(0, n);
   return ret;
 }
 inline Eigen::IndexList<Eigen::type2index<1>, Index> OneByM(Index m) {
   Eigen::IndexList<Eigen::type2index<1>, Index> ret;
   ret.set(1, m);
   return ret;
 }

 // -------------------------------------------------------------------------- //
 // Verify that block expression evaluation produces the same result as a
 // TensorSliceOp (reading a tensor block is same to taking a tensor slice).

 template <typename T, int NumDims, int Layout, typename Expression, typename GenBlockParams>
 static void VerifyBlockEvaluator(Expression expr, GenBlockParams gen_block) {
   using Device = DefaultDevice;
   auto d = Device();

   // Scratch memory allocator for block evaluation.
   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
   TensorBlockScratch scratch(d);

   // TensorEvaluator is needed to produce tensor blocks of the expression.
   auto eval = TensorEvaluator<const decltype(expr), Device>(expr, d);
   eval.evalSubExprsIfNeeded(nullptr);

   // Choose a random offsets, sizes and TensorBlockDescriptor.
   TensorBlockParams<NumDims> block_params = gen_block();

   // Evaluate TensorBlock expression into a tensor.
   Tensor<T, NumDims, Layout> block(block_params.desc.dimensions());

   // Dimensions for the potential destination buffer.
   DSizes<Index, NumDims> dst_dims;
   if (internal::random<bool>()) {
     dst_dims = block_params.desc.dimensions();
   } else {
     for (int i = 0; i < NumDims; ++i) {
       Index extent = internal::random<Index>(0, 5);
       dst_dims[i] = block_params.desc.dimension(i) + extent;
     }
   }

   // Maybe use this tensor as a block desc destination.
   Tensor<T, NumDims, Layout> dst(dst_dims);
   dst.setZero();
   if (internal::random<bool>()) {
     block_params.desc.template AddDestinationBuffer<Layout>(dst.data(), internal::strides<Layout>(dst.dimensions()));
   }

   const bool root_of_expr = internal::random<bool>();
   auto tensor_block = eval.block(block_params.desc, scratch, root_of_expr);

   if (tensor_block.kind() == internal::TensorBlockKind::kMaterializedInOutput) {
     // Copy data from destination buffer.
     if (dimensions_match(dst.dimensions(), block.dimensions())) {
       block = dst;
     } else {
       DSizes<Index, NumDims> offsets;
       for (int i = 0; i < NumDims; ++i) offsets[i] = 0;
       block = dst.slice(offsets, block.dimensions());
     }

   } else {
     // Assign to block from expression.
     auto b_expr = tensor_block.expr();

     // We explicitly disable vectorization and tiling, to run a simple coefficient
     // wise assignment loop, because it's very simple and should be correct.
     using BlockAssign = TensorAssignOp<decltype(block), const decltype(b_expr)>;
     using BlockExecutor = TensorExecutor<const BlockAssign, Device, false, internal::TiledEvaluation::Off>;
     BlockExecutor::run(BlockAssign(block, b_expr), d);
   }

   // Cleanup temporary buffers owned by a tensor block.
   tensor_block.cleanup();

   // Compute a Tensor slice corresponding to a Tensor block.
   Tensor<T, NumDims, Layout> slice(block_params.desc.dimensions());
   auto s_expr = expr.slice(block_params.offsets, block_params.sizes);

   // Explicitly use coefficient assignment to evaluate slice expression.
   using SliceAssign = TensorAssignOp<decltype(slice), const decltype(s_expr)>;
   using SliceExecutor = TensorExecutor<const SliceAssign, Device, false, internal::TiledEvaluation::Off>;
   SliceExecutor::run(SliceAssign(slice, s_expr), d);

   // Tensor block and tensor slice must be the same.
   for (Index i = 0; i < block.dimensions().TotalSize(); ++i) {
     VERIFY_IS_EQUAL(block.coeff(i), slice.coeff(i));
   }

   // Release evaluator-owned temporaries (e.g. the materialized buffer that
   // TensorScan / TensorFFT allocate during evalSubExprsIfNeeded).
   eval.cleanup();
 }

 // -------------------------------------------------------------------------- //

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_block() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   // Identity tensor expression transformation.
   VerifyBlockEvaluator<T, NumDims, Layout>(input, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_unary_expr_block() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   VerifyBlockEvaluator<T, NumDims, Layout>(input.abs(), [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_binary_expr_block() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
   setRandomForBinaryProduct(lhs);
   setRandomForBinaryProduct(rhs);

   VerifyBlockEvaluator<T, NumDims, Layout>(lhs * rhs, [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_binary_with_unary_expr_block() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> lhs(dims), rhs(dims);
   lhs.setRandom();
   rhs.setRandom();

   VerifyBlockEvaluator<T, NumDims, Layout>((lhs.abs() + rhs.abs()).sqrt(),
                                            [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_broadcast() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 10);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   DSizes<Index, NumDims> bcast = RandomDims<NumDims>(1, 5);

   DSizes<Index, NumDims> bcasted_dims;
   for (int i = 0; i < NumDims; ++i) bcasted_dims[i] = dims[i] * bcast[i];

   VerifyBlockEvaluator<T, NumDims, Layout>(input.broadcast(bcast),
                                            [&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.broadcast(bcast),
                                            [&bcasted_dims]() { return RandomBlock<Layout>(bcasted_dims, 5, 10); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.broadcast(bcast),
                                            [&bcasted_dims]() { return FixedSizeBlock(bcasted_dims); });

   // Check that desc.destination() memory is not shared between two broadcast
   // materializations.
   VerifyBlockEvaluator<T, NumDims, Layout>(input.broadcast(bcast) * input.abs().broadcast(bcast),
                                            [&bcasted_dims]() { return SkewedInnerBlock<Layout>(bcasted_dims); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_scan() {
   // Scan eagerly materializes m_output during evalSubExprsIfNeeded; this
   // exercises the block() wrapper around that buffer.
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(4, 12);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   const Index axis = NumDims == 1 ? 0 : NumDims / 2;

   VerifyBlockEvaluator<T, NumDims, Layout>(input.cumsum(axis), [&dims]() { return RandomBlock<Layout>(dims, 1, 5); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.cumsum(axis), [&dims]() { return FixedSizeBlock(dims); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_fft() {
   // FFT eagerly materializes m_data during evalSubExprsIfNeeded; this
   // exercises the block() wrapper around that buffer. Use RealPart so
   // the output type matches the input type and the harness's slice
   // comparison stays straightforward.
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(4, 12);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   Eigen::array<int, 1> fft_dims = {0};

   VerifyBlockEvaluator<T, NumDims, Layout>(input.template fft<RealPart, FFT_FORWARD>(fft_dims),
                                            [&dims]() { return RandomBlock<Layout>(dims, 1, 5); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.template fft<RealPart, FFT_FORWARD>(fft_dims),
                                            [&dims]() { return FixedSizeBlock(dims); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_layout_swap() {
   // The swap_layout expression has the opposite layout of its operand. Build
   // the input with the opposite layout and assert the block evaluator on the
   // resulting expression matches the slice-based reference.
   constexpr int InputLayout = (Layout == ColMajor) ? RowMajor : ColMajor;
   DSizes<Index, NumDims> input_dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, InputLayout> input(input_dims);
   input.setRandom();

   DSizes<Index, NumDims> swapped_dims;
   for (int i = 0; i < NumDims; ++i) {
     swapped_dims[i] = input_dims[NumDims - 1 - i];
   }

   VerifyBlockEvaluator<T, NumDims, Layout>(input.swap_layout(),
                                            [&swapped_dims]() { return RandomBlock<Layout>(swapped_dims, 1, 10); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.swap_layout(),
                                            [&swapped_dims]() { return FixedSizeBlock(swapped_dims); });
 }

 // Regression for the original failure mode this MR fixes: TensorPaddingOp's
 // BlockAccess gates on m_impl.RawAccess (true for Scan/FFT/LayoutSwap/Contraction)
 // and its getResourceRequirements() unconditionally calls the operand's. Before
 // the operand-side block plumbing was added, instantiating the executor's
 // Tiling=On path through these compositions failed to compile. Driving block()
 // over the composed expressions exercises both sides of that compile path.
 template <typename T, int NumDims, int Layout>
 static void test_eval_composed_block_ops() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(4, 8);
   array<std::pair<Index, Index>, NumDims> paddings;
   DSizes<Index, NumDims> padded_dims;
   for (int i = 0; i < NumDims; ++i) {
     paddings[i] = std::make_pair(1, 2);
     padded_dims[i] = dims[i] + 3;
   }

   const Index axis = NumDims == 1 ? 0 : NumDims / 2;

   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   // cumsum(...).pad(...) — TensorScan + TensorPadding.
   VerifyBlockEvaluator<T, NumDims, Layout>(input.cumsum(axis).pad(paddings),
                                            [&padded_dims]() { return RandomBlock<Layout>(padded_dims, 1, 5); });

   // swap_layout().pad(...) — operand built with the opposite layout so the
   // composed expression evaluates in the test's Layout.
   constexpr int InputLayout = (Layout == ColMajor) ? RowMajor : ColMajor;
   Tensor<T, NumDims, InputLayout> swap_input(dims);
   swap_input.setRandom();
   DSizes<Index, NumDims> swap_padded_dims;
   for (int i = 0; i < NumDims; ++i) {
     swap_padded_dims[i] = dims[NumDims - 1 - i] + 3;
   }
   VerifyBlockEvaluator<T, NumDims, Layout>(swap_input.swap_layout().pad(paddings), [&swap_padded_dims]() {
     return RandomBlock<Layout>(swap_padded_dims, 1, 5);
   });
 }

 // 2D-specific regression: contract(...).pad(...) hit the same composition bug
 // because TensorContraction has RawAccess=true but lacked getResourceRequirements().
 template <typename T, int Layout>
 static void test_eval_contract_pad_composition() {
   Tensor<T, 2, Layout> A(8, 6);
   Tensor<T, 2, Layout> B(6, 4);
   A.setRandom();
   B.setRandom();

   Eigen::array<IndexPair<Index>, 1> contract_dims = {IndexPair<Index>(1, 0)};
   array<std::pair<Index, Index>, 2> paddings = {std::pair<Index, Index>{1, 1}, std::pair<Index, Index>{2, 2}};
   DSizes<Index, 2> padded_dims(8 + 2, 4 + 4);

   VerifyBlockEvaluator<T, 2, Layout>(A.contract(B, contract_dims).pad(paddings),
                                      [&padded_dims]() { return RandomBlock<Layout>(padded_dims, 1, 5); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_reshape() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(1, 10);

   DSizes<Index, NumDims> shuffled = dims;
   std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed));

   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   VerifyBlockEvaluator<T, NumDims, Layout>(input.reshape(shuffled),
                                            [&shuffled]() { return RandomBlock<Layout>(shuffled, 1, 10); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.reshape(shuffled),
                                            [&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_cast() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   VerifyBlockEvaluator<T, NumDims, Layout>(input.template cast<int>().template cast<T>(),
                                            [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_select() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> lhs(dims);
   Tensor<T, NumDims, Layout> rhs(dims);
   Tensor<bool, NumDims, Layout> cond(dims);
   lhs.setRandom();
   rhs.setRandom();
   cond.setRandom();

   VerifyBlockEvaluator<T, NumDims, Layout>(cond.select(lhs, rhs),
                                            [&dims]() { return RandomBlock<Layout>(dims, 1, 20); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_padding() {
   const int inner_dim = Layout == static_cast<int>(ColMajor) ? 0 : NumDims - 1;

   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   DSizes<Index, NumDims> pad_before = RandomDims<NumDims>(0, 4);
   DSizes<Index, NumDims> pad_after = RandomDims<NumDims>(0, 4);
   array<std::pair<Index, Index>, NumDims> paddings;
   for (int i = 0; i < NumDims; ++i) {
     paddings[i] = std::make_pair(pad_before[i], pad_after[i]);
   }

   // Test squeezing reads from inner dim.
   if (internal::random<bool>()) {
     pad_before[inner_dim] = 0;
     pad_after[inner_dim] = 0;
     paddings[inner_dim] = std::make_pair(0, 0);
   }

   DSizes<Index, NumDims> padded_dims;
   for (int i = 0; i < NumDims; ++i) {
     padded_dims[i] = dims[i] + pad_before[i] + pad_after[i];
   }

   VerifyBlockEvaluator<T, NumDims, Layout>(input.pad(paddings),
                                            [&padded_dims]() { return FixedSizeBlock(padded_dims); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.pad(paddings),
                                            [&padded_dims]() { return RandomBlock<Layout>(padded_dims, 1, 10); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.pad(paddings),
                                            [&padded_dims]() { return SkewedInnerBlock<Layout>(padded_dims); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_chipping() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   Index chip_dim = internal::random<int>(0, NumDims - 1);
   Index chip_offset = internal::random<Index>(0, dims[chip_dim] - 2);

   DSizes<Index, NumDims - 1> chipped_dims;
   for (Index i = 0; i < chip_dim; ++i) {
     chipped_dims[i] = dims[i];
   }
   for (Index i = chip_dim + 1; i < NumDims; ++i) {
     chipped_dims[i - 1] = dims[i];
   }

   // Block buffer forwarding.
   VerifyBlockEvaluator<T, NumDims - 1, Layout>(input.chip(chip_offset, chip_dim),
                                                [&chipped_dims]() { return FixedSizeBlock(chipped_dims); });

   VerifyBlockEvaluator<T, NumDims - 1, Layout>(input.chip(chip_offset, chip_dim),
                                                [&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });

   // Block expression assignment.
   VerifyBlockEvaluator<T, NumDims - 1, Layout>(input.abs().chip(chip_offset, chip_dim),
                                                [&chipped_dims]() { return FixedSizeBlock(chipped_dims); });

   VerifyBlockEvaluator<T, NumDims - 1, Layout>(input.abs().chip(chip_offset, chip_dim),
                                                [&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });
 }

 template <typename T, int NumDims>
 struct SimpleTensorGenerator {
   T operator()(const array<Index, NumDims>& coords) const {
     T result = static_cast<T>(0);
     for (int i = 0; i < NumDims; ++i) {
       result += static_cast<T>((i + 1) * coords[i]);
     }
     return result;
   }
 };

 // Boolean specialization to avoid -Wint-in-bool-context warnings on GCC.
 template <int NumDims>
 struct SimpleTensorGenerator<bool, NumDims> {
   bool operator()(const array<Index, NumDims>& coords) const {
     bool result = false;
     for (int i = 0; i < NumDims; ++i) {
       result ^= coords[i];
     }
     return result;
   }
 };

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_generator() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   auto generator = SimpleTensorGenerator<T, NumDims>();

   VerifyBlockEvaluator<T, NumDims, Layout>(input.generate(generator), [&dims]() { return FixedSizeBlock(dims); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.generate(generator),
                                            [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_reverse() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   // Randomly reverse dimensions.
   Eigen::DSizes<bool, NumDims> reverse;
   for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();

   VerifyBlockEvaluator<T, NumDims, Layout>(input.reverse(reverse), [&dims]() { return FixedSizeBlock(dims); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.reverse(reverse),
                                            [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_slice() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   // Pick a random slice of an input tensor.
   DSizes<Index, NumDims> slice_start = RandomDims<NumDims>(5, 10);
   DSizes<Index, NumDims> slice_size = RandomDims<NumDims>(5, 10);

   // Make sure that slice start + size do not overflow tensor dims.
   for (int i = 0; i < NumDims; ++i) {
     slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
     slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
   }

   VerifyBlockEvaluator<T, NumDims, Layout>(input.slice(slice_start, slice_size),
                                            [&slice_size]() { return FixedSizeBlock(slice_size); });

   VerifyBlockEvaluator<T, NumDims, Layout>(input.slice(slice_start, slice_size),
                                            [&slice_size]() { return RandomBlock<Layout>(slice_size, 1, 10); });
 }

 // Exercise the block evaluator for bool slices as a sub-expression of a
 // block-aware parent op at sizes spanning Packet16b (16-lane bool packet)
 // boundaries. Before the BlockAccess fix for bool slicing, the parent op's
 // BlockAccess would be forced to false and this composition would never
 // dispatch through the block path.
 template <int NumDims, int Layout>
 static void test_eval_tensor_slice_bool_composite() {
   const Index boundary_sizes[] = {15, 16, 17, 31, 32, 33, 47, 48, 49};
   for (Index sz : boundary_sizes) {
     DSizes<Index, NumDims> dims;
     for (int i = 0; i < NumDims; ++i) dims[i] = sz;

     Tensor<bool, NumDims, Layout> lhs(dims);
     Tensor<bool, NumDims, Layout> rhs(dims);
     lhs.setRandom();
     rhs.setRandom();

     // Slice skewed off the boundary so the block layout straddles packet
     // boundaries of the underlying tensor.
     DSizes<Index, NumDims> slice_start;
     DSizes<Index, NumDims> slice_size;
     for (int i = 0; i < NumDims; ++i) {
       slice_start[i] = sz >= 2 ? 1 : 0;
       slice_size[i] = sz - slice_start[i];
     }

     auto expr = lhs.slice(slice_start, slice_size) && rhs.slice(slice_start, slice_size);

     VerifyBlockEvaluator<bool, NumDims, Layout>(expr, [&slice_size]() { return FixedSizeBlock(slice_size); });
     VerifyBlockEvaluator<bool, NumDims, Layout>(expr,
                                                 [&slice_size, sz]() { return RandomBlock<Layout>(slice_size, 1, sz); });
   }
 }

 template <typename T, int NumDims, int Layout>
 static void test_eval_tensor_shuffle() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(5, 15);
   Tensor<T, NumDims, Layout> input(dims);
   input.setRandom();

   DSizes<Index, NumDims> shuffle;
   for (int i = 0; i < NumDims; ++i) shuffle[i] = i;

   do {
     DSizes<Index, NumDims> shuffled_dims;
     for (int i = 0; i < NumDims; ++i) shuffled_dims[i] = dims[shuffle[i]];

     VerifyBlockEvaluator<T, NumDims, Layout>(input.shuffle(shuffle),
                                              [&shuffled_dims]() { return FixedSizeBlock(shuffled_dims); });

     VerifyBlockEvaluator<T, NumDims, Layout>(input.shuffle(shuffle),
                                              [&shuffled_dims]() { return RandomBlock<Layout>(shuffled_dims, 1, 5); });

     break;

   } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
 }

 template <typename T, int Layout>
 static void test_eval_tensor_reshape_with_bcast() {
   Index dim = internal::random<Index>(1, 100);

   Tensor<T, 2, Layout> lhs(1, dim);
   Tensor<T, 2, Layout> rhs(dim, 1);
   lhs.setRandom();
   rhs.setRandom();

   auto reshapeLhs = NByOne(dim);
   auto reshapeRhs = OneByM(dim);

   auto bcastLhs = OneByM(dim);
   auto bcastRhs = NByOne(dim);

   DSizes<Index, 2> dims(dim, dim);

   VerifyBlockEvaluator<T, 2, Layout>(
       lhs.reshape(reshapeLhs).broadcast(bcastLhs) * rhs.reshape(reshapeRhs).broadcast(bcastRhs),
       [dims]() { return SkewedInnerBlock<Layout, 2>(dims); });
 }

 template <typename T, int Layout>
 static void test_eval_tensor_forced_eval() {
   Index dim = internal::random<Index>(1, 100);

   Tensor<T, 2, Layout> lhs(dim, 1);
   Tensor<T, 2, Layout> rhs(1, dim);
   lhs.setRandom();
   rhs.setRandom();

   auto bcastLhs = OneByM(dim);
   auto bcastRhs = NByOne(dim);

   DSizes<Index, 2> dims(dim, dim);

   VerifyBlockEvaluator<T, 2, Layout>((lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
                                      [dims]() { return SkewedInnerBlock<Layout, 2>(dims); });

   VerifyBlockEvaluator<T, 2, Layout>((lhs.broadcast(bcastLhs) * rhs.broadcast(bcastRhs)).eval().reshape(dims),
                                      [dims]() { return RandomBlock<Layout, 2>(dims, 1, 50); });
 }

 template <typename T, int Layout>
 static void test_eval_tensor_chipping_of_bcast() {
   if (Layout != static_cast<int>(RowMajor)) return;

   Index dim0 = internal::random<Index>(1, 10);
   Index dim1 = internal::random<Index>(1, 10);
   Index dim2 = internal::random<Index>(1, 10);

   Tensor<T, 3, Layout> input(1, dim1, dim2);
   input.setRandom();

   Eigen::array<Index, 3> bcast = {{dim0, 1, 1}};
   DSizes<Index, 2> chipped_dims(dim0, dim2);

   VerifyBlockEvaluator<T, 2, Layout>(input.broadcast(bcast).chip(0, 1),
                                      [chipped_dims]() { return FixedSizeBlock(chipped_dims); });

   VerifyBlockEvaluator<T, 2, Layout>(input.broadcast(bcast).chip(0, 1),
                                      [chipped_dims]() { return SkewedInnerBlock<Layout, 2>(chipped_dims); });

   VerifyBlockEvaluator<T, 2, Layout>(input.broadcast(bcast).chip(0, 1),
                                      [chipped_dims]() { return RandomBlock<Layout, 2>(chipped_dims, 1, 5); });
 }

 // -------------------------------------------------------------------------- //
 // Verify that assigning block to a Tensor expression produces the same result
 // as an assignment to TensorSliceOp (writing a block is is identical to
 // assigning one tensor to a slice of another tensor).

 template <typename T, int NumDims, int Layout, int NumExprDims = NumDims, typename Expression, typename GenBlockParams>
 static void VerifyBlockAssignment(Tensor<T, NumDims, Layout>& tensor, Expression expr, GenBlockParams gen_block) {
   using Device = DefaultDevice;
   auto d = Device();

   // We use tensor evaluator as a target for block and slice assignments.
   auto eval = TensorEvaluator<decltype(expr), Device>(expr, d);

   // Generate a random block, or choose a block that fits in full expression.
   TensorBlockParams<NumExprDims> block_params = gen_block();

   // Generate random data of the selected block size.
   Tensor<T, NumExprDims, Layout> block(block_params.desc.dimensions());
   block.setRandom();

   // ************************************************************************ //
   // (1) Assignment from a block.

   // Construct a materialize block from a random generated block tensor.
   internal::TensorMaterializedBlock<T, NumExprDims, Layout> blk(internal::TensorBlockKind::kView, block.data(),
                                                                 block.dimensions());

   // Reset all underlying tensor values to zero.
   tensor.setZero();

   // Use evaluator to write block into a tensor.
   eval.writeBlock(block_params.desc, blk);

   // Make a copy of the result after assignment.
   Tensor<T, NumDims, Layout> block_assigned = tensor;

   // ************************************************************************ //
   // (2) Assignment to a slice

   // Reset all underlying tensor values to zero.
   tensor.setZero();

   // Assign block to a slice of original expression
   auto s_expr = expr.slice(block_params.offsets, block_params.sizes);

   // Explicitly use coefficient assignment to evaluate slice expression.
   using SliceAssign = TensorAssignOp<decltype(s_expr), const decltype(block)>;
   using SliceExecutor = TensorExecutor<const SliceAssign, Device, false, internal::TiledEvaluation::Off>;
   SliceExecutor::run(SliceAssign(s_expr, block), d);

   // Make a copy of the result after assignment.
   Tensor<T, NumDims, Layout> slice_assigned = tensor;

   for (Index i = 0; i < tensor.dimensions().TotalSize(); ++i) {
     VERIFY_IS_EQUAL(block_assigned.coeff(i), slice_assigned.coeff(i));
   }
 }

 // -------------------------------------------------------------------------- //

 template <typename T, int NumDims, int Layout>
 static void test_assign_to_tensor() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> tensor(dims);

   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);

   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map, [&dims]() { return RandomBlock<Layout>(dims, 10, 20); });
   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map, [&dims]() { return FixedSizeBlock(dims); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_assign_to_tensor_reshape() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> tensor(dims);

   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);

   DSizes<Index, NumDims> shuffled = dims;
   std::shuffle(&shuffled[0], &shuffled[NumDims - 1], std::mt19937(g_seed));

   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.reshape(shuffled),
                                             [&shuffled]() { return RandomBlock<Layout>(shuffled, 1, 10); });

   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.reshape(shuffled),
                                             [&shuffled]() { return SkewedInnerBlock<Layout>(shuffled); });

   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.reshape(shuffled),
                                             [&shuffled]() { return FixedSizeBlock(shuffled); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_assign_to_tensor_chipping() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> tensor(dims);

   Index chip_dim = internal::random<int>(0, NumDims - 1);
   Index chip_offset = internal::random<Index>(0, dims[chip_dim] - 2);

   DSizes<Index, NumDims - 1> chipped_dims;
   for (Index i = 0; i < chip_dim; ++i) {
     chipped_dims[i] = dims[i];
   }
   for (Index i = chip_dim + 1; i < NumDims; ++i) {
     chipped_dims[i - 1] = dims[i];
   }

   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);

   VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
       tensor, map.chip(chip_offset, chip_dim), [&chipped_dims]() { return RandomBlock<Layout>(chipped_dims, 1, 10); });

   VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(
       tensor, map.chip(chip_offset, chip_dim), [&chipped_dims]() { return SkewedInnerBlock<Layout>(chipped_dims); });

   VerifyBlockAssignment<T, NumDims, Layout, NumDims - 1>(tensor, map.chip(chip_offset, chip_dim),
                                                          [&chipped_dims]() { return FixedSizeBlock(chipped_dims); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_assign_to_tensor_slice() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(10, 20);
   Tensor<T, NumDims, Layout> tensor(dims);

   // Pick a random slice of tensor.
   DSizes<Index, NumDims> slice_start = RandomDims<NumDims>(5, 10);
   DSizes<Index, NumDims> slice_size = RandomDims<NumDims>(5, 10);

   // Make sure that slice start + size do not overflow tensor dims.
   for (int i = 0; i < NumDims; ++i) {
     slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
     slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
   }

   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);

   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.slice(slice_start, slice_size),
                                             [&slice_size]() { return RandomBlock<Layout>(slice_size, 1, 10); });

   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.slice(slice_start, slice_size),
                                             [&slice_size]() { return SkewedInnerBlock<Layout>(slice_size); });

   VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.slice(slice_start, slice_size),
                                             [&slice_size]() { return FixedSizeBlock(slice_size); });
 }

 template <typename T, int NumDims, int Layout>
 static void test_assign_to_tensor_shuffle() {
   DSizes<Index, NumDims> dims = RandomDims<NumDims>(5, 15);
   Tensor<T, NumDims, Layout> tensor(dims);

   DSizes<Index, NumDims> shuffle;
   for (int i = 0; i < NumDims; ++i) shuffle[i] = i;

   TensorMap<Tensor<T, NumDims, Layout>> map(tensor.data(), dims);

   do {
     DSizes<Index, NumDims> shuffled_dims;
     for (int i = 0; i < NumDims; ++i) shuffled_dims[i] = dims[shuffle[i]];

     VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.shuffle(shuffle),
                                               [&shuffled_dims]() { return FixedSizeBlock(shuffled_dims); });

     VerifyBlockAssignment<T, NumDims, Layout>(tensor, map.shuffle(shuffle),
                                               [&shuffled_dims]() { return RandomBlock<Layout>(shuffled_dims, 1, 5); });

   } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
 }

 // -------------------------------------------------------------------------- //

 #define CALL_SUBTEST_PART(PART) CALL_SUBTEST_##PART

 #define CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(PART, NAME)     \
   CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 3, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 4, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 5, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 1, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 2, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<int, 1, RowMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 2, RowMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 3, RowMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 4, RowMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 5, RowMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 1, ColMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 2, ColMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 4, ColMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<int, 5, ColMajor>()));   \
   CALL_SUBTEST_PART(PART)((NAME<bool, 1, RowMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 2, RowMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 3, RowMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 4, RowMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 5, RowMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 1, ColMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 2, ColMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 4, ColMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, 5, ColMajor>()))

 #define CALL_SUBTESTS_DIMS_LAYOUTS(PART, NAME)           \
   CALL_SUBTEST_PART(PART)((NAME<float, 1, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 2, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 3, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 4, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 5, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 1, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 2, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 4, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, 5, ColMajor>()))

 #define CALL_SUBTESTS_LAYOUTS_TYPES(PART, NAME)       \
   CALL_SUBTEST_PART(PART)((NAME<float, RowMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<float, ColMajor>())); \
   CALL_SUBTEST_PART(PART)((NAME<bool, RowMajor>()));  \
   CALL_SUBTEST_PART(PART)((NAME<bool, ColMajor>()))

 EIGEN_DECLARE_TEST(tensor_block_eval) {
   // clang-format off
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_block);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(1, test_eval_tensor_binary_expr_block);
   CALL_SUBTESTS_DIMS_LAYOUTS(1, test_eval_tensor_unary_expr_block);
   CALL_SUBTESTS_DIMS_LAYOUTS(2, test_eval_tensor_binary_with_unary_expr_block);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_broadcast);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_reshape);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(2, test_eval_tensor_layout_swap);
   CALL_SUBTEST_PART(2)((test_eval_tensor_scan<float, 2, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_scan<float, 3, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_scan<float, 4, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_scan<float, 2, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_scan<float, 3, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_scan<float, 4, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_fft<float, 2, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_fft<float, 3, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_fft<float, 4, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_fft<float, 2, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_fft<float, 3, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_tensor_fft<float, 4, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_composed_block_ops<float, 2, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_composed_block_ops<float, 3, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_composed_block_ops<float, 4, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_composed_block_ops<float, 2, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_composed_block_ops<float, 3, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_composed_block_ops<float, 4, ColMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_contract_pad_composition<float, RowMajor>()));
   CALL_SUBTEST_PART(2)((test_eval_contract_pad_composition<float, ColMajor>()));
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_cast);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_select);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(3, test_eval_tensor_padding);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_chipping);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_generator);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(4, test_eval_tensor_reverse);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_slice);
   CALL_SUBTEST_PART(5)((test_eval_tensor_slice_bool_composite<1, RowMajor>()));
   CALL_SUBTEST_PART(5)((test_eval_tensor_slice_bool_composite<2, RowMajor>()));
   CALL_SUBTEST_PART(5)((test_eval_tensor_slice_bool_composite<3, RowMajor>()));
   CALL_SUBTEST_PART(5)((test_eval_tensor_slice_bool_composite<1, ColMajor>()));
   CALL_SUBTEST_PART(5)((test_eval_tensor_slice_bool_composite<2, ColMajor>()));
   CALL_SUBTEST_PART(5)((test_eval_tensor_slice_bool_composite<3, ColMajor>()));
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(5, test_eval_tensor_shuffle);

   CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_reshape_with_bcast);
   CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_forced_eval);
   CALL_SUBTESTS_LAYOUTS_TYPES(6, test_eval_tensor_chipping_of_bcast);

   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_reshape);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(7, test_assign_to_tensor_chipping);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_slice);
   CALL_SUBTESTS_DIMS_LAYOUTS_TYPES(8, test_assign_to_tensor_shuffle);

   // Force CMake to split this test.
   // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8

   // clang-format on
 }