| // This file is part of Eigen, a lightweight C++ template library |
| // for linear algebra. |
| // |
| // Copyright (C) 2016 |
| // Mehdi Goli Codeplay Software Ltd. |
| // Ralph Potter Codeplay Software Ltd. |
| // Luke Iwanski Codeplay Software Ltd. |
| // Contact: <eigen@codeplay.com> |
| // |
| // This Source Code Form is subject to the terms of the Mozilla |
| // Public License v. 2.0. If a copy of the MPL was not distributed |
| // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #define EIGEN_TEST_NO_LONGDOUBLE |
| #define EIGEN_TEST_NO_COMPLEX |
| #define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl |
| #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t |
| #define EIGEN_USE_SYCL |
| |
| #include "main.h" |
| #include <unsupported/Eigen/CXX11/Tensor> |
| |
| using Eigen::array; |
| using Eigen::SyclDevice; |
| using Eigen::Tensor; |
| using Eigen::TensorMap; |
| |
| namespace std { |
| template <typename T> T rsqrt(T x) { return 1 / std::sqrt(x); } |
| template <typename T> T square(T x) { return x * x; } |
| template <typename T> T cube(T x) { return x * x * x; } |
| template <typename T> T inverse(T x) { return 1 / x; } |
| } |
| |
| #define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout) \ |
| { \ |
| /* out OPERATOR in.FUNC() */ \ |
| Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \ |
| Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ |
| in = in.random() + static_cast<SCALAR>(0.01); \ |
| out = out.random() + static_cast<SCALAR>(0.01); \ |
| Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ |
| SCALAR *gpu_data = static_cast<SCALAR *>( \ |
| sycl_device.allocate(in.size() * sizeof(SCALAR))); \ |
| SCALAR *gpu_data_out = static_cast<SCALAR *>( \ |
| sycl_device.allocate(out.size() * sizeof(SCALAR))); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ |
| sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ |
| (in.size()) * sizeof(SCALAR)); \ |
| sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ |
| (out.size()) * sizeof(SCALAR)); \ |
| gpu_out.device(sycl_device) OPERATOR gpu.FUNC(); \ |
| sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ |
| (out.size()) * sizeof(SCALAR)); \ |
| for (int64_t i = 0; i < out.size(); ++i) { \ |
| SCALAR ver = reference(i); \ |
| ver OPERATOR std::FUNC(in(i)); \ |
| VERIFY_IS_APPROX(out(i), ver); \ |
| } \ |
| sycl_device.deallocate(gpu_data); \ |
| sycl_device.deallocate(gpu_data_out); \ |
| } \ |
| { \ |
| /* out OPERATOR out.FUNC() */ \ |
| Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ |
| out = out.random() + static_cast<SCALAR>(0.01); \ |
| Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ |
| SCALAR *gpu_data_out = static_cast<SCALAR *>( \ |
| sycl_device.allocate(out.size() * sizeof(SCALAR))); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ |
| sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ |
| (out.size()) * sizeof(SCALAR)); \ |
| gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC(); \ |
| sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ |
| (out.size()) * sizeof(SCALAR)); \ |
| for (int64_t i = 0; i < out.size(); ++i) { \ |
| SCALAR ver = reference(i); \ |
| ver OPERATOR std::FUNC(reference(i)); \ |
| VERIFY_IS_APPROX(out(i), ver); \ |
| } \ |
| sycl_device.deallocate(gpu_data_out); \ |
| } |
| |
| #define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout) \ |
| TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout) |
| |
| #define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout) \ |
| { \ |
| /* out = in.FUNC() */ \ |
| Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \ |
| Tensor<bool, 3, Layout, int64_t> out(tensorRange); \ |
| in = in.random() + static_cast<SCALAR>(0.01); \ |
| SCALAR *gpu_data = static_cast<SCALAR *>( \ |
| sycl_device.allocate(in.size() * sizeof(SCALAR))); \ |
| bool *gpu_data_out = \ |
| static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool))); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \ |
| TensorMap<Tensor<bool, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ |
| sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ |
| (in.size()) * sizeof(SCALAR)); \ |
| gpu_out.device(sycl_device) = gpu.FUNC(); \ |
| sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ |
| (out.size()) * sizeof(bool)); \ |
| for (int64_t i = 0; i < out.size(); ++i) { \ |
| VERIFY_IS_EQUAL(out(i), std::FUNC(in(i))); \ |
| } \ |
| sycl_device.deallocate(gpu_data); \ |
| sycl_device.deallocate(gpu_data_out); \ |
| } |
| |
| #define TEST_UNARY_BUILTINS(SCALAR, Layout) \ |
| TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout) \ |
| TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout) \ |
| TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout) \ |
| TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout) \ |
| TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout) |
| |
| static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) { |
| int64_t sizeDim1 = 10; |
| int64_t sizeDim2 = 10; |
| int64_t sizeDim3 = 10; |
| array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; |
| |
| TEST_UNARY_BUILTINS(float, RowMajor) |
| TEST_UNARY_BUILTINS(float, ColMajor) |
| } |
| |
| namespace std { |
| template <typename T> T cwiseMax(T x, T y) { return std::max(x, y); } |
| template <typename T> T cwiseMin(T x, T y) { return std::min(x, y); } |
| } |
| |
| #define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout) \ |
| { \ |
| /* out = in_1.FUNC(in_2) */ \ |
| Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ |
| Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \ |
| Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ |
| in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ |
| in_2 = in_2.random() + static_cast<SCALAR>(0.01); \ |
| Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ |
| SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ |
| sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ |
| SCALAR *gpu_data_2 = static_cast<SCALAR *>( \ |
| sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ |
| SCALAR *gpu_data_out = static_cast<SCALAR *>( \ |
| sycl_device.allocate(out.size() * sizeof(SCALAR))); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ |
| sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ |
| (in_1.size()) * sizeof(SCALAR)); \ |
| sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ |
| (in_2.size()) * sizeof(SCALAR)); \ |
| gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2); \ |
| sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ |
| (out.size()) * sizeof(SCALAR)); \ |
| for (int64_t i = 0; i < out.size(); ++i) { \ |
| SCALAR ver = reference(i); \ |
| ver = std::FUNC(in_1(i), in_2(i)); \ |
| VERIFY_IS_APPROX(out(i), ver); \ |
| } \ |
| sycl_device.deallocate(gpu_data_1); \ |
| sycl_device.deallocate(gpu_data_2); \ |
| sycl_device.deallocate(gpu_data_out); \ |
| } |
| |
| #define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout) \ |
| { \ |
| /* out = in_1 OPERATOR in_2 */ \ |
| Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ |
| Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \ |
| Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ |
| in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ |
| in_2 = in_2.random() + static_cast<SCALAR>(0.01); \ |
| Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ |
| SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ |
| sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ |
| SCALAR *gpu_data_2 = static_cast<SCALAR *>( \ |
| sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ |
| SCALAR *gpu_data_out = static_cast<SCALAR *>( \ |
| sycl_device.allocate(out.size() * sizeof(SCALAR))); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ |
| sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ |
| (in_1.size()) * sizeof(SCALAR)); \ |
| sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ |
| (in_2.size()) * sizeof(SCALAR)); \ |
| gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2; \ |
| sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ |
| (out.size()) * sizeof(SCALAR)); \ |
| for (int64_t i = 0; i < out.size(); ++i) { \ |
| VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i)); \ |
| } \ |
| sycl_device.deallocate(gpu_data_1); \ |
| sycl_device.deallocate(gpu_data_2); \ |
| sycl_device.deallocate(gpu_data_out); \ |
| } |
| |
| #define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout) \ |
| { \ |
| /* out = in_1 OPERATOR 2 */ \ |
| Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ |
| Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ |
| in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ |
| Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ |
| SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ |
| sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ |
| SCALAR *gpu_data_out = static_cast<SCALAR *>( \ |
| sycl_device.allocate(out.size() * sizeof(SCALAR))); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ |
| TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ |
| sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ |
| (in_1.size()) * sizeof(SCALAR)); \ |
| gpu_out.device(sycl_device) = gpu_1 OPERATOR 2; \ |
| sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ |
| (out.size()) * sizeof(SCALAR)); \ |
| for (int64_t i = 0; i < out.size(); ++i) { \ |
| VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2); \ |
| } \ |
| sycl_device.deallocate(gpu_data_1); \ |
| sycl_device.deallocate(gpu_data_out); \ |
| } |
| |
| #define TEST_BINARY_BUILTINS(SCALAR, Layout) \ |
| TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout) \ |
| TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout) \ |
| TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout) \ |
| TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout) \ |
| TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout) \ |
| TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout) |
| |
| static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) { |
| int64_t sizeDim1 = 10; |
| int64_t sizeDim2 = 10; |
| int64_t sizeDim3 = 10; |
| array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; |
| TEST_BINARY_BUILTINS(float, RowMajor) |
| TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor) |
| TEST_BINARY_BUILTINS(float, ColMajor) |
| TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor) |
| } |
| |
| void test_cxx11_tensor_builtins_sycl() { |
| for (const auto& device :Eigen::get_sycl_supported_devices()) { |
| QueueInterface queueInterface(device); |
| Eigen::SyclDevice sycl_device(&queueInterface); |
| CALL_SUBTEST(test_builtin_unary_sycl(sycl_device)); |
| CALL_SUBTEST(test_builtin_binary_sycl(sycl_device)); |
| } |
| } |