[SYCL clean up the code] : removing exrta #pragma unroll in SYCL which was causing issues in embeded systems

commit: a725a3233c98185eb3e5db6186aea3a906b8411f [log] [tgz]
author: mehdi-goli <mehdi.goli@codeplay.com> Tue Oct 27 16:31:33 2020 +0000
committer: David Tellenbach <david.tellenbach@me.com> Wed Oct 28 08:34:49 2020 +0000
tree: 6baf020ab6f144bae295aee54f0e2cb63f395d7e
parent: b9ff791fed08a6b9d877b460377c13f4e3b71d70 [diff]
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
index 387c3ed..474eba0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h

@@ -100,7 +100,6 @@
     CoeffReturnType accumulator = *aInPtr;
 
     scratchptr[localid] = op.finalize(accumulator);
-#pragma unroll 8
     for (Index offset = itemID.get_local_range(0) / 2; offset > 0; offset /= 2) {
       itemID.barrier(cl::sycl::access::fence_space::local_space);
       if (localid < offset) {
@@ -154,7 +153,6 @@
     Index start = Evaluator::PacketSize * globalid;
     // vectorizable parts
     PacketReturnType packetAccumulator = op.template initializePacket<PacketReturnType>();
-#pragma unroll(8 / Evaluator::PacketSize)
     for (Index i = start; i < VectorizedRange; i += step) {
       op.template reducePacket<PacketReturnType>(evaluator.impl().template packet<Unaligned>(i), &packetAccumulator);
     }
@@ -293,7 +291,6 @@
     const Index per_thread_local_stride = PannelParameters::LocalThreadSizeR * reduce_elements_num_groups;
     const Index per_thread_global_stride =
         rt == reduction_dim::outer_most ? num_coeffs_to_preserve * per_thread_local_stride : per_thread_local_stride;
-#pragma unroll 8
     for (Index i = globalRId; i < num_coeffs_to_reduce; i += per_thread_local_stride) {
       op.reduce(evaluator.impl().coeff(global_offset), &accumulator);
       localOffset += per_thread_local_stride;
@@ -391,7 +388,6 @@
 
     OutScalar accumulator = op.initialize();
 // num_coeffs_to_reduce is not bigger that 256
-#pragma unroll 8
     for (Index i = 0; i < num_coeffs_to_reduce; i++) {
       op.reduce(*in_ptr, &accumulator);
       in_ptr += num_coeffs_to_preserve;
commit	a725a3233c98185eb3e5db6186aea3a906b8411f	[log] [tgz]
author	mehdi-goli <mehdi.goli@codeplay.com>	Tue Oct 27 16:31:33 2020 +0000
committer	David Tellenbach <david.tellenbach@me.com>	Wed Oct 28 08:34:49 2020 +0000
tree	6baf020ab6f144bae295aee54f0e2cb63f395d7e
parent	b9ff791fed08a6b9d877b460377c13f4e3b71d70 [diff]