Adding EIGEN_STRONG_INLINE back; using size() instead of dimensions.TotalSize() on Tensor.

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
index 2be1a5a..844cec1 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h

@@ -44,14 +44,14 @@
   // destructor
   ~SyclDevice() { deallocate_all(); }
 
-  template <typename T> void deallocate(T *p) const {
+  template <typename T> EIGEN_STRONG_INLINE void deallocate(T *p) const {
     auto it = buffer_map.find(p);
     if (it != buffer_map.end()) {
       buffer_map.erase(it);
       internal::aligned_free(p);
     }
   }
-  void deallocate_all() const {
+  EIGEN_STRONG_INLINE void deallocate_all() const {
     std::map<const void *, std::shared_ptr<void>>::iterator it=buffer_map.begin();
     while (it!=buffer_map.end()) {
       auto p=it->first;
@@ -88,23 +88,23 @@
   }
 
   /// allocating memory on the cpu
-  void *allocate(size_t) const {
+  EIGEN_STRONG_INLINE void *allocate(size_t) const {
     return internal::aligned_malloc(8);
   }
 
   // some runtime conditions that can be applied here
-  bool isDeviceSuitable() const { return true; }
+  EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; }
 
-  void memcpy(void *dst, const void *src, size_t n) const {
+  EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const {
     ::memcpy(dst, src, n);
   }
 
-  template<typename T> void memcpyHostToDevice(T *dst, const T *src, size_t n) const {
+  template<typename T> EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const {
     auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>();
     memcpy(host_acc.get_pointer(), src, n);
   }
 
- inline void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange)  const {
+  EIGEN_STRONG_INLINE void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange)  const {
       tileSize =m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
       rng = n;
       if (rng==0) rng=1;
@@ -116,7 +116,7 @@
       }
     }
 
-  template<typename T> void memcpyDeviceToHost(T *dst, const T *src, size_t n) const {
+  template<typename T> EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const {
     auto it = buffer_map.find(src);
     if (it != buffer_map.end()) {
     size_t rng, GRange, tileSize;
@@ -141,7 +141,7 @@
     }
   }
 
-  template<typename T>  void memset(T *buff, int c, size_t n) const {
+  template<typename T>  EIGEN_STRONG_INLINE void memset(T *buff, int c, size_t n) const {
 
       size_t rng, GRange, tileSize;
       parallel_for_setup(n/sizeof(T), tileSize, rng, GRange);
@@ -158,7 +158,7 @@
       });
       m_queue.throw_asynchronous();
   }
-  int majorDeviceVersion() const {
+  EIGEN_STRONG_INLINE int majorDeviceVersion() const {
   return 1;
   }
 };

diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp
index 820bc88..584fa80 100644
--- a/unsupported/test/cxx11_tensor_device_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_device_sycl.cpp

@@ -29,11 +29,11 @@
   array<int, 1> tensorRange = {{sizeDim1}};
   Tensor<int, 1> in(tensorRange);
   Tensor<int, 1> in1(tensorRange);
-  memset(in1.data(), 1,in1.dimensions().TotalSize()*sizeof(int));
-  int * gpu_in_data  = static_cast<int*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(int)));
-  sycl_device.memset(gpu_in_data, 1,in.dimensions().TotalSize()*sizeof(int) );
-  sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.dimensions().TotalSize()*sizeof(int) );
-  for (int i=0; i<in.dimensions().TotalSize(); i++)
+  memset(in1.data(), 1,in1.size()*sizeof(int));
+  int * gpu_in_data  = static_cast<int*>(sycl_device.allocate(in.size()*sizeof(int)));
+  sycl_device.memset(gpu_in_data, 1,in.size()*sizeof(int) );
+  sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(int) );
+  for (int i=0; i<in.size(); i++)
     VERIFY_IS_APPROX(in(i), in1(i));
   sycl_device.deallocate(gpu_in_data);
 }