Apply clang-format
diff --git a/Eigen/src/Core/arch/GPU/Complex.h b/Eigen/src/Core/arch/GPU/Complex.h
index 8a7869c..fa46aec 100644
--- a/Eigen/src/Core/arch/GPU/Complex.h
+++ b/Eigen/src/Core/arch/GPU/Complex.h
@@ -31,7 +31,7 @@
 //    to the first inclusion of <complex>.
 
 #if defined(EIGEN_GPUCC) && defined(EIGEN_GPU_COMPILE_PHASE)
-    
+
 // ICC already specializes std::complex<float> and std::complex<double>
 // operators, preventing us from making them device functions here.
 // This will lead to silent runtime errors if the operators are used on device.
@@ -62,33 +62,30 @@
 // Specialized std::complex overloads.
 namespace complex_operator_detail {
 
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_multiply(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_multiply(const std::complex<T>& a,
+                                                                       const std::complex<T>& b) {
   const T a_real = numext::real(a);
   const T a_imag = numext::imag(a);
   const T b_real = numext::real(b);
   const T b_imag = numext::imag(b);
-  return std::complex<T>(
-      a_real * b_real - a_imag * b_imag,
-      a_imag * b_real + a_real * b_imag);
+  return std::complex<T>(a_real * b_real - a_imag * b_imag, a_imag * b_real + a_real * b_imag);
 }
 
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_divide_fast(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide_fast(const std::complex<T>& a,
+                                                                          const std::complex<T>& b) {
   const T a_real = numext::real(a);
   const T a_imag = numext::imag(a);
   const T b_real = numext::real(b);
   const T b_imag = numext::imag(b);
   const T norm = (b_real * b_real + b_imag * b_imag);
-  return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm,
-                          (a_imag * b_real - a_real * b_imag) / norm);
+  return std::complex<T>((a_real * b_real + a_imag * b_imag) / norm, (a_imag * b_real - a_real * b_imag) / norm);
 }
 
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_divide_stable(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide_stable(const std::complex<T>& a,
+                                                                            const std::complex<T>& b) {
   const T a_real = numext::real(a);
   const T a_imag = numext::imag(a);
   const T b_real = numext::real(b);
@@ -99,13 +96,13 @@
   const T rscale = scale_imag ? T(1) : b_real / b_imag;
   const T iscale = scale_imag ? b_imag / b_real : T(1);
   const T denominator = b_real * rscale + b_imag * iscale;
-  return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator, 
+  return std::complex<T>((a_real * rscale + a_imag * iscale) / denominator,
                          (a_imag * rscale - a_real * iscale) / denominator);
 }
 
-template<typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-std::complex<T> complex_divide(const std::complex<T>& a, const std::complex<T>& b) {
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> complex_divide(const std::complex<T>& a,
+                                                                     const std::complex<T>& b) {
 #if EIGEN_FAST_MATH
   return complex_divide_fast(a, b);
 #else
@@ -118,131 +115,107 @@
 //       since they are already specialized for float/double/long double within
 //       the standard <complex> header. We also do not specialize the stream
 //       operators.
-#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T)                                    \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator+(const std::complex<T>& a) { return a; }                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator-(const std::complex<T>& a) {                                           \
-  return std::complex<T>(-numext::real(a), -numext::imag(a));                                   \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator+(const std::complex<T>& a, const std::complex<T>& b) {                 \
-  return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b)); \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator+(const std::complex<T>& a, const T& b) {                               \
-  return std::complex<T>(numext::real(a) + b, numext::imag(a));                                 \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator+(const T& a, const std::complex<T>& b) {                               \
-  return std::complex<T>(a + numext::real(b), numext::imag(b));                                 \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator-(const std::complex<T>& a, const std::complex<T>& b) {                 \
-  return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b)); \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator-(const std::complex<T>& a, const T& b) {                               \
-  return std::complex<T>(numext::real(a) - b, numext::imag(a));                                 \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator-(const T& a, const std::complex<T>& b) {                               \
-  return std::complex<T>(a - numext::real(b), -numext::imag(b));                                \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator*(const std::complex<T>& a, const std::complex<T>& b) {                 \
-  return complex_multiply(a, b);                                                                \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator*(const std::complex<T>& a, const T& b) {                               \
-  return std::complex<T>(numext::real(a) * b, numext::imag(a) * b);                             \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator*(const T& a, const std::complex<T>& b) {                               \
-  return std::complex<T>(a * numext::real(b), a * numext::imag(b));                             \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator/(const std::complex<T>& a, const std::complex<T>& b) {                 \
-  return complex_divide(a, b);                                                                  \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator/(const std::complex<T>& a, const T& b) {                               \
-  return std::complex<T>(numext::real(a) / b, numext::imag(a) / b);                             \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T> operator/(const T& a, const std::complex<T>& b) {                               \
-  return complex_divide(std::complex<T>(a, 0), b);                                              \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) {                     \
-  numext::real_ref(a) += numext::real(b);                                                       \
-  numext::imag_ref(a) += numext::imag(b);                                                       \
-  return a;                                                                                     \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) {                     \
-  numext::real_ref(a) -= numext::real(b);                                                       \
-  numext::imag_ref(a) -= numext::imag(b);                                                       \
-  return a;                                                                                     \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) {                     \
-  a = complex_multiply(a, b);                                                                   \
-  return a;                                                                                     \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-std::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) {                     \
-  a = complex_divide(a, b);                                                                     \
-  return  a;                                                                                    \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-bool operator==(const std::complex<T>& a, const std::complex<T>& b) {                           \
-  return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b);              \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-bool operator==(const std::complex<T>& a, const T& b) {                                         \
-  return numext::real(a) == b && numext::imag(a) == 0;                                          \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-bool operator==(const T& a, const std::complex<T>& b) {                                         \
-  return a  == numext::real(b) && 0 == numext::imag(b);                                         \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-bool operator!=(const std::complex<T>& a, const std::complex<T>& b) {                           \
-  return !(a == b);                                                                             \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-bool operator!=(const std::complex<T>& a, const T& b) {                                         \
-  return !(a == b);                                                                             \
-}                                                                                               \
-                                                                                                \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE                                                           \
-bool operator!=(const T& a, const std::complex<T>& b) {                                         \
-  return !(a == b);                                                                             \
-}
+#define EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(T)                                                        \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const std::complex<T>& a) { return a; }           \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const std::complex<T>& a) {                       \
+    return std::complex<T>(-numext::real(a), -numext::imag(a));                                                     \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const std::complex<T>& a,                         \
+                                                                  const std::complex<T>& b) {                       \
+    return std::complex<T>(numext::real(a) + numext::real(b), numext::imag(a) + numext::imag(b));                   \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const std::complex<T>& a, const T& b) {           \
+    return std::complex<T>(numext::real(a) + b, numext::imag(a));                                                   \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator+(const T& a, const std::complex<T>& b) {           \
+    return std::complex<T>(a + numext::real(b), numext::imag(b));                                                   \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const std::complex<T>& a,                         \
+                                                                  const std::complex<T>& b) {                       \
+    return std::complex<T>(numext::real(a) - numext::real(b), numext::imag(a) - numext::imag(b));                   \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const std::complex<T>& a, const T& b) {           \
+    return std::complex<T>(numext::real(a) - b, numext::imag(a));                                                   \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator-(const T& a, const std::complex<T>& b) {           \
+    return std::complex<T>(a - numext::real(b), -numext::imag(b));                                                  \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator*(const std::complex<T>& a,                         \
+                                                                  const std::complex<T>& b) {                       \
+    return complex_multiply(a, b);                                                                                  \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator*(const std::complex<T>& a, const T& b) {           \
+    return std::complex<T>(numext::real(a) * b, numext::imag(a) * b);                                               \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator*(const T& a, const std::complex<T>& b) {           \
+    return std::complex<T>(a * numext::real(b), a * numext::imag(b));                                               \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator/(const std::complex<T>& a,                         \
+                                                                  const std::complex<T>& b) {                       \
+    return complex_divide(a, b);                                                                                    \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator/(const std::complex<T>& a, const T& b) {           \
+    return std::complex<T>(numext::real(a) / b, numext::imag(a) / b);                                               \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T> operator/(const T& a, const std::complex<T>& b) {           \
+    return complex_divide(std::complex<T>(a, 0), b);                                                                \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator+=(std::complex<T>& a, const std::complex<T>& b) { \
+    numext::real_ref(a) += numext::real(b);                                                                         \
+    numext::imag_ref(a) += numext::imag(b);                                                                         \
+    return a;                                                                                                       \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator-=(std::complex<T>& a, const std::complex<T>& b) { \
+    numext::real_ref(a) -= numext::real(b);                                                                         \
+    numext::imag_ref(a) -= numext::imag(b);                                                                         \
+    return a;                                                                                                       \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator*=(std::complex<T>& a, const std::complex<T>& b) { \
+    a = complex_multiply(a, b);                                                                                     \
+    return a;                                                                                                       \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex<T>& operator/=(std::complex<T>& a, const std::complex<T>& b) { \
+    a = complex_divide(a, b);                                                                                       \
+    return a;                                                                                                       \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator==(const std::complex<T>& a, const std::complex<T>& b) {       \
+    return numext::real(a) == numext::real(b) && numext::imag(a) == numext::imag(b);                                \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator==(const std::complex<T>& a, const T& b) {                     \
+    return numext::real(a) == b && numext::imag(a) == 0;                                                            \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator==(const T& a, const std::complex<T>& b) {                     \
+    return a == numext::real(b) && 0 == numext::imag(b);                                                            \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator!=(const std::complex<T>& a, const std::complex<T>& b) {       \
+    return !(a == b);                                                                                               \
+  }                                                                                                                 \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator!=(const std::complex<T>& a, const T& b) { return !(a == b); } \
+                                                                                                                    \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator!=(const T& a, const std::complex<T>& b) { return !(a == b); }
 
 // Do not specialize for long double, since that reduces to double on device.
 EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS(float)
@@ -250,7 +223,6 @@
 
 #undef EIGEN_CREATE_STD_COMPLEX_OPERATOR_SPECIALIZATIONS
 
-  
 }  // namespace complex_operator_detail
 
 EIGEN_USING_STD_COMPLEX_OPERATORS
diff --git a/Eigen/src/Core/arch/GPU/MathFunctions.h b/Eigen/src/Core/arch/GPU/MathFunctions.h
index f8191db..606215f 100644
--- a/Eigen/src/Core/arch/GPU/MathFunctions.h
+++ b/Eigen/src/Core/arch/GPU/MathFunctions.h
@@ -21,86 +21,73 @@
 // introduce conflicts between these packet_traits definitions and the ones
 // we'll use on the host side (SSE, AVX, ...)
 #if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 plog<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plog<float4>(const float4& a) {
   return make_float4(logf(a.x), logf(a.y), logf(a.z), logf(a.w));
 }
 
-template<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 plog<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plog<double2>(const double2& a) {
   using ::log;
   return make_double2(log(a.x), log(a.y));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 plog1p<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plog1p<float4>(const float4& a) {
   return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w));
 }
 
-template<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 plog1p<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plog1p<double2>(const double2& a) {
   return make_double2(log1p(a.x), log1p(a.y));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pexp<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pexp<float4>(const float4& a) {
   return make_float4(expf(a.x), expf(a.y), expf(a.z), expf(a.w));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pexp<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pexp<double2>(const double2& a) {
   using ::exp;
   return make_double2(exp(a.x), exp(a.y));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pexpm1<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pexpm1<float4>(const float4& a) {
   return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pexpm1<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pexpm1<double2>(const double2& a) {
   return make_double2(expm1(a.x), expm1(a.y));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 psqrt<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt<float4>(const float4& a) {
   return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 psqrt<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psqrt<double2>(const double2& a) {
   using ::sqrt;
   return make_double2(sqrt(a.x), sqrt(a.y));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 prsqrt<float4>(const float4& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 prsqrt<float4>(const float4& a) {
   return make_float4(rsqrtf(a.x), rsqrtf(a.y), rsqrtf(a.z), rsqrtf(a.w));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 prsqrt<double2>(const double2& a)
-{
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 prsqrt<double2>(const double2& a) {
   return make_double2(rsqrt(a.x), rsqrt(a.y));
 }
 
-
 #endif
 
-} // end namespace internal
+}  // end namespace internal
 
-} // end namespace Eigen
+}  // end namespace Eigen
 
-#endif // EIGEN_MATH_FUNCTIONS_GPU_H
+#endif  // EIGEN_MATH_FUNCTIONS_GPU_H
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 5c959ed..7900b0e 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -36,23 +36,29 @@
 // we'll use on the host side (SSE, AVX, ...)
 #if defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
 
-template<> struct is_arithmetic<float4>  { enum { value = true }; };
-template<> struct is_arithmetic<double2> { enum { value = true }; };
+template <>
+struct is_arithmetic<float4> {
+  enum { value = true };
+};
+template <>
+struct is_arithmetic<double2> {
+  enum { value = true };
+};
 
-template<> struct packet_traits<float> : default_packet_traits
-{
+template <>
+struct packet_traits<float> : default_packet_traits {
   typedef float4 type;
   typedef float4 half;
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
-    size=4,
+    size = 4,
 
-    HasDiv  = 1,
-    HasSin  = 0,
-    HasCos  = 0,
-    HasLog  = 1,
-    HasExp  = 1,
+    HasDiv = 1,
+    HasSin = 0,
+    HasCos = 0,
+    HasLog = 1,
+    HasExp = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
     HasLGamma = 1,
@@ -74,18 +80,18 @@
   };
 };
 
-template<> struct packet_traits<double> : default_packet_traits
-{
+template <>
+struct packet_traits<double> : default_packet_traits {
   typedef double2 type;
   typedef double2 half;
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
-    size=2,
+    size = 2,
 
-    HasDiv  = 1,
-    HasLog  = 1,
-    HasExp  = 1,
+    HasDiv = 1,
+    HasLog = 1,
+    HasExp = 1,
     HasSqrt = 1,
     HasRsqrt = 1,
     HasLGamma = 1,
@@ -107,14 +113,37 @@
   };
 };
 
+template <>
+struct unpacket_traits<float4> {
+  typedef float type;
+  enum {
+    size = 4,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+  typedef float4 half;
+};
+template <>
+struct unpacket_traits<double2> {
+  typedef double type;
+  enum {
+    size = 2,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+  typedef double2 half;
+};
 
-template<> struct unpacket_traits<float4>  { typedef float  type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef float4 half; };
-template<> struct unpacket_traits<double2> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef double2 half; };
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float&  from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pset1<float4>(const float& from) {
   return make_float4(from, from, from, from);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const double& from) {
   return make_double2(from, from);
 }
 
@@ -123,259 +152,254 @@
 // of the functions, while the latter can only deal with one of them.
 #if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
-                                                        const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a, const float& b) {
   return __int_as_float(__float_as_int(a) & __float_as_int(b));
 }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a,
-                                                         const double& b) {
-  return __longlong_as_double(__double_as_longlong(a) &
-                              __double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a, const double& b) {
+  return __longlong_as_double(__double_as_longlong(a) & __double_as_longlong(b));
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a,
-                                                       const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a, const float& b) {
   return __int_as_float(__float_as_int(a) | __float_as_int(b));
 }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a,
-                                                        const double& b) {
-  return __longlong_as_double(__double_as_longlong(a) |
-                              __double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a, const double& b) {
+  return __longlong_as_double(__double_as_longlong(a) | __double_as_longlong(b));
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a,
-                                                        const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a, const float& b) {
   return __int_as_float(__float_as_int(a) ^ __float_as_int(b));
 }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a,
-                                                         const double& b) {
-  return __longlong_as_double(__double_as_longlong(a) ^
-                              __double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a, const double& b) {
+  return __longlong_as_double(__double_as_longlong(a) ^ __double_as_longlong(b));
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a,
-                                                           const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a, const float& b) {
   return __int_as_float(__float_as_int(a) & ~__float_as_int(b));
 }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a,
-                                                            const double& b) {
-  return __longlong_as_double(__double_as_longlong(a) &
-                              ~__double_as_longlong(b));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a, const double& b) {
+  return __longlong_as_double(__double_as_longlong(a) & ~__double_as_longlong(b));
 }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a,
-                                                    const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a, const float& b) {
   return __int_as_float(a == b ? 0xffffffffu : 0u);
 }
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a,
-                                                     const double& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a, const double& b) {
   return __longlong_as_double(a == b ? 0xffffffffffffffffull : 0ull);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float lt_mask(const float& a,
-                                                    const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float lt_mask(const float& a, const float& b) {
   return __int_as_float(a < b ? 0xffffffffu : 0u);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a,
-                                                     const double& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double lt_mask(const double& a, const double& b) {
   return __longlong_as_double(a < b ? 0xffffffffffffffffull : 0ull);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float le_mask(const float& a,
-                                                    const float& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float le_mask(const float& a, const float& b) {
   return __int_as_float(a <= b ? 0xffffffffu : 0u);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double le_mask(const double& a,
-                                                     const double& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double le_mask(const double& a, const double& b) {
   return __longlong_as_double(a <= b ? 0xffffffffffffffffull : 0ull);
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
-                                                          const float4& b) {
-  return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y),
-                     bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a, const float4& b) {
+  return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y), bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a,
-                                                            const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a, const double2& b) {
   return make_double2(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y));
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a,
-                                                         const float4& b) {
-  return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y),
-                     bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a, const float4& b) {
+  return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y), bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a,
-                                                           const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a, const double2& b) {
   return make_double2(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y));
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a,
-                                                          const float4& b) {
-  return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y),
-                     bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a, const float4& b) {
+  return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y), bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a,
-                                                            const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a, const double2& b) {
   return make_double2(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y));
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a,
-                                                             const float4& b) {
-  return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y),
-                     bitwise_andnot(a.z, b.z), bitwise_andnot(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a, const float4& b) {
+  return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y), bitwise_andnot(a.z, b.z),
+                     bitwise_andnot(a.w, b.w));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pandnot<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pandnot<double2>(const double2& a, const double2& b) {
   return make_double2(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y));
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a,
-                                                             const float4& b) {
-  return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z),
-                     eq_mask(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a, const float4& b) {
+  return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z), eq_mask(a.w, b.w));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_lt<float4>(const float4& a,
-                                                             const float4& b) {
-  return make_float4(lt_mask(a.x, b.x), lt_mask(a.y, b.y), lt_mask(a.z, b.z),
-                     lt_mask(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_lt<float4>(const float4& a, const float4& b) {
+  return make_float4(lt_mask(a.x, b.x), lt_mask(a.y, b.y), lt_mask(a.z, b.z), lt_mask(a.w, b.w));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_le<float4>(const float4& a,
-                                                             const float4& b) {
-  return make_float4(le_mask(a.x, b.x), le_mask(a.y, b.y), le_mask(a.z, b.z),
-                     le_mask(a.w, b.w));
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_le<float4>(const float4& a, const float4& b) {
+  return make_float4(le_mask(a.x, b.x), le_mask(a.y, b.y), le_mask(a.z, b.z), le_mask(a.w, b.w));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pcmp_eq<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pcmp_eq<double2>(const double2& a, const double2& b) {
   return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pcmp_lt<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pcmp_lt<double2>(const double2& a, const double2& b) {
   return make_double2(lt_mask(a.x, b.x), lt_mask(a.y, b.y));
 }
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
-pcmp_le<double2>(const double2& a, const double2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pcmp_le<double2>(const double2& a, const double2& b) {
   return make_double2(le_mask(a.x, b.x), le_mask(a.y, b.y));
 }
-#endif // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
+#endif  // defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG &&
+        // !EIGEN_COMP_NVCC)
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
-  return make_float4(a, a+1, a+2, a+3);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
+  return make_float4(a, a + 1, a + 2, a + 3);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
-  return make_double2(a, a+1);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset<double2>(const double& a) {
+  return make_double2(a, a + 1);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
-  return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 padd<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
-  return make_double2(a.x+b.x, a.y+b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 padd<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x + b.x, a.y + b.y);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
-  return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psub<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
-  return make_double2(a.x-b.x, a.y-b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 psub<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x - b.x, a.y - b.y);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pnegate(const float4& a) {
   return make_float4(-a.x, -a.y, -a.z, -a.w);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pnegate(const double2& a) {
   return make_double2(-a.x, -a.y);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) { return a; }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) { return a; }
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
-  return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pconj(const float4& a) {
+  return a;
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
-  return make_double2(a.x*b.x, a.y*b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pconj(const double2& a) {
+  return a;
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
-  return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmul<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
-  return make_double2(a.x/b.x, a.y/b.y);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmul<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x * b.x, a.y * b.y);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pdiv<float4>(const float4& a, const float4& b) {
+  return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pdiv<double2>(const double2& a, const double2& b) {
+  return make_double2(a.x / b.x, a.y / b.y);
+}
+
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmin<float4>(const float4& a, const float4& b) {
   return make_float4(fminf(a.x, b.x), fminf(a.y, b.y), fminf(a.z, b.z), fminf(a.w, b.w));
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmin<double2>(const double2& a, const double2& b) {
   return make_double2(fmin(a.x, b.x), fmin(a.y, b.y));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pmax<float4>(const float4& a, const float4& b) {
   return make_float4(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z), fmaxf(a.w, b.w));
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pmax<double2>(const double2& a, const double2& b) {
   return make_double2(fmax(a.x, b.x), fmax(a.y, b.y));
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pload<float4>(const float* from) {
   return *reinterpret_cast<const float4*>(from);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pload<double2>(const double* from) {
   return *reinterpret_cast<const double2*>(from);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploadu<float4>(const float* from) {
   return make_float4(from[0], from[1], from[2], from[3]);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const double* from) {
   return make_double2(from[0], from[1]);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float*   from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) {
   return make_float4(from[0], from[0], from[1], from[1]);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double*  from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) {
   return make_double2(from[0], from[0]);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float*   to, const float4& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<float>(float* to, const float4& from) {
   *reinterpret_cast<float4*>(to) = from;
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<double>(double* to, const double2& from) {
   *reinterpret_cast<double2*>(to) = from;
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float*  to, const float4& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const float4& from) {
   to[0] = from.x;
   to[1] = from.y;
   to[2] = from.z;
   to[3] = from.w;
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const double2& from) {
   to[0] = from.x;
   to[1] = from.y;
 }
 
-template<>
+template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
 #if defined(EIGEN_GPU_HAS_LDG)
   return __ldg(reinterpret_cast<const float4*>(from));
@@ -383,7 +407,7 @@
   return make_float4(from[0], from[1], from[2], from[3]);
 #endif
 }
-template<>
+template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
 #if defined(EIGEN_GPU_HAS_LDG)
   return __ldg(reinterpret_cast<const double2*>(from));
@@ -392,93 +416,110 @@
 #endif
 }
 
-template<>
+template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
 #if defined(EIGEN_GPU_HAS_LDG)
-  return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+  return make_float4(__ldg(from + 0), __ldg(from + 1), __ldg(from + 2), __ldg(from + 3));
 #else
   return make_float4(from[0], from[1], from[2], from[3]);
 #endif
 }
-template<>
+template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
 #if defined(EIGEN_GPU_HAS_LDG)
-  return make_double2(__ldg(from+0), __ldg(from+1));
+  return make_double2(__ldg(from + 0), __ldg(from + 1));
 #else
   return make_double2(from[0], from[1]);
 #endif
 }
 
-template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
-  return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+template <>
+EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
+  return make_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]);
 }
 
-template<> EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
-  return make_double2(from[0*stride], from[1*stride]);
+template <>
+EIGEN_DEVICE_FUNC inline double2 pgather<double, double2>(const double* from, Index stride) {
+  return make_double2(from[0 * stride], from[1 * stride]);
 }
 
-template<> EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
-  to[stride*0] = from.x;
-  to[stride*1] = from.y;
-  to[stride*2] = from.z;
-  to[stride*3] = from.w;
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<float, float4>(float* to, const float4& from, Index stride) {
+  to[stride * 0] = from.x;
+  to[stride * 1] = from.y;
+  to[stride * 2] = from.z;
+  to[stride * 3] = from.w;
 }
-template<> EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
-  to[stride*0] = from.x;
-  to[stride*1] = from.y;
+template <>
+EIGEN_DEVICE_FUNC inline void pscatter<double, double2>(double* to, const double2& from, Index stride) {
+  to[stride * 0] = from.x;
+  to[stride * 1] = from.y;
 }
 
-template<> EIGEN_DEVICE_FUNC inline float  pfirst<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float pfirst<float4>(const float4& a) {
   return a.x;
 }
-template<> EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double pfirst<double2>(const double2& a) {
   return a.x;
 }
 
-template<> EIGEN_DEVICE_FUNC inline float  predux<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux<float4>(const float4& a) {
   return a.x + a.y + a.z + a.w;
 }
-template<> EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux<double2>(const double2& a) {
   return a.x + a.y;
 }
 
-template<> EIGEN_DEVICE_FUNC inline float  predux_max<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux_max<float4>(const float4& a) {
   return fmaxf(fmaxf(a.x, a.y), fmaxf(a.z, a.w));
 }
-template<> EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux_max<double2>(const double2& a) {
   return fmax(a.x, a.y);
 }
 
-template<> EIGEN_DEVICE_FUNC inline float  predux_min<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux_min<float4>(const float4& a) {
   return fminf(fminf(a.x, a.y), fminf(a.z, a.w));
 }
-template<> EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux_min<double2>(const double2& a) {
   return fmin(a.x, a.y);
 }
 
-template<> EIGEN_DEVICE_FUNC inline float  predux_mul<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float predux_mul<float4>(const float4& a) {
   return a.x * a.y * a.z * a.w;
 }
-template<> EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double predux_mul<double2>(const double2& a) {
   return a.x * a.y;
 }
 
-template<> EIGEN_DEVICE_FUNC inline float4  pabs<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float4 pabs<float4>(const float4& a) {
   return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
 }
-template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
   return make_double2(fabs(a.x), fabs(a.y));
 }
 
-template<> EIGEN_DEVICE_FUNC inline float4  pfloor<float4>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC inline float4 pfloor<float4>(const float4& a) {
   return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
 }
-template<> EIGEN_DEVICE_FUNC inline double2 pfloor<double2>(const double2& a) {
+template <>
+EIGEN_DEVICE_FUNC inline double2 pfloor<double2>(const double2& a) {
   return make_double2(floor(a.x), floor(a.y));
 }
 
-EIGEN_DEVICE_FUNC inline void
-ptranspose(PacketBlock<float4,4>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<float4, 4>& kernel) {
   float tmp = kernel.packet[0].y;
   kernel.packet[0].y = kernel.packet[1].x;
   kernel.packet[1].x = tmp;
@@ -504,14 +545,13 @@
   kernel.packet[3].z = tmp;
 }
 
-EIGEN_DEVICE_FUNC inline void
-ptranspose(PacketBlock<double2,2>& kernel) {
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<double2, 2>& kernel) {
   double tmp = kernel.packet[0].y;
   kernel.packet[0].y = kernel.packet[1].x;
   kernel.packet[1].x = tmp;
 }
 
-#endif // defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
+#endif  // defined(EIGEN_GPUCC) && defined(EIGEN_USE_GPU)
 
 // Half-packet functions are not available on the host for CUDA 9.0-9.2, only
 // on device. There is no benefit to using them on the host anyways, since they are
@@ -519,41 +559,68 @@
 #if (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
 
 typedef ulonglong2 Packet4h2;
-template<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };
-template<> struct is_arithmetic<Packet4h2> { enum { value = true }; };
+template <>
+struct unpacket_traits<Packet4h2> {
+  typedef Eigen::half type;
+  enum {
+    size = 8,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+  typedef Packet4h2 half;
+};
+template <>
+struct is_arithmetic<Packet4h2> {
+  enum { value = true };
+};
 
-template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef half2 half; };
-template<> struct is_arithmetic<half2> { enum { value = true }; };
+template <>
+struct unpacket_traits<half2> {
+  typedef Eigen::half type;
+  enum {
+    size = 2,
+    alignment = Aligned16,
+    vectorizable = true,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+  typedef half2 half;
+};
+template <>
+struct is_arithmetic<half2> {
+  enum { value = true };
+};
 
-template<> struct packet_traits<Eigen::half> : default_packet_traits
-{
+template <>
+struct packet_traits<Eigen::half> : default_packet_traits {
   typedef Packet4h2 type;
   typedef Packet4h2 half;
   enum {
     Vectorizable = 1,
     AlignedOnScalar = 1,
-    size=8,
-    HasAdd    = 1,
-    HasSub    = 1,
-    HasMul    = 1,
-    HasDiv    = 1,
-    HasSqrt   = 1,
-    HasRsqrt  = 1,
-    HasExp    = 1,
-    HasExpm1  = 1,
-    HasLog    = 1,
-    HasLog1p  = 1
+    size = 8,
+    HasAdd = 1,
+    HasSub = 1,
+    HasMul = 1,
+    HasDiv = 1,
+    HasSqrt = 1,
+    HasRsqrt = 1,
+    HasExp = 1,
+    HasExpm1 = 1,
+    HasLog = 1,
+    HasLog1p = 1
   };
 };
 
-template<>
+template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
   return __half2half2(from);
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pset1<Packet4h2>(const Eigen::half& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pset1<Packet4h2>(const Eigen::half& from) {
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
   p_alias[0] = pset1<half2>(from);
@@ -569,59 +636,48 @@
   return *reinterpret_cast<const half2*>(from);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) {
-  return __halves2half2(from[0], from[1]);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploadu(const Eigen::half* from) { return __halves2half2(from[0], from[1]); }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half*  from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 ploaddup(const Eigen::half* from) {
   return __halves2half2(from[0], from[0]);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
-                                                  const half2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to, const half2& from) {
   *reinterpret_cast<half2*>(to) = from;
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
-                                                   const half2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to, const half2& from) {
   to[0] = __low2half(from);
   to[1] = __high2half(from);
 }
 
-
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
-    const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(const Eigen::half* from) {
 #if defined(EIGEN_GPU_HAS_LDG)
   // Input is guaranteed to be properly aligned.
   return __ldg(reinterpret_cast<const half2*>(from));
 #else
-  return __halves2half2(*(from+0), *(from+1));
+  return __halves2half2(*(from + 0), *(from + 1));
 #endif
 }
 
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(
-    const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(const Eigen::half* from) {
 #if defined(EIGEN_GPU_HAS_LDG)
-  return __halves2half2(__ldg(from+0), __ldg(from+1));
+  return __halves2half2(__ldg(from + 0), __ldg(from + 1));
 #else
-  return __halves2half2(*(from+0), *(from+1));
+  return __halves2half2(*(from + 0), *(from + 1));
 #endif
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,
-                                                    Index stride) {
-  return __halves2half2(from[0*stride], from[1*stride]);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from, Index stride) {
+  return __halves2half2(from[0 * stride], from[1 * stride]);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter(
-    Eigen::half* to, const half2& from, Index stride) {
-  to[stride*0] = __low2half(from);
-  to[stride*1] = __high2half(from);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter(Eigen::half* to, const half2& from, Index stride) {
+  to[stride * 0] = __low2half(from);
+  to[stride * 1] = __high2half(from);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) {
-  return __low2half(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst(const half2& a) { return __low2half(a); }
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pabs(const half2& a) {
   half a1 = __low2half(a);
@@ -641,8 +697,7 @@
   return pset1<half2>(false_half);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
-ptranspose(PacketBlock<half2,2>& kernel) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<half2, 2>& kernel) {
   __half a1 = __low2half(kernel.packet[0]);
   __half a2 = __high2half(kernel.packet[0]);
   __half b1 = __low2half(kernel.packet[1]);
@@ -660,9 +715,7 @@
 #endif
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,
-                                                    const half2& a,
-                                                    const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask, const half2& a, const half2& b) {
   half mask_low = __low2half(mask);
   half mask_high = __high2half(mask);
   half result_low = mask_low == half(0) ? __low2half(b) : __low2half(a);
@@ -670,8 +723,7 @@
   return __halves2half2(result_low, result_high);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_eq(const half2& a,
-                                                    const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_eq(const half2& a, const half2& b) {
   half true_half = half_impl::raw_uint16_to_half(0xffffu);
   half false_half = half_impl::raw_uint16_to_half(0x0000u);
   half a1 = __low2half(a);
@@ -683,8 +735,7 @@
   return __halves2half2(eq1, eq2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_lt(const half2& a,
-                                                    const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_lt(const half2& a, const half2& b) {
   half true_half = half_impl::raw_uint16_to_half(0xffffu);
   half false_half = half_impl::raw_uint16_to_half(0x0000u);
   half a1 = __low2half(a);
@@ -696,8 +747,7 @@
   return __halves2half2(eq1, eq2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_le(const half2& a,
-                                                    const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcmp_le(const half2& a, const half2& b) {
   half true_half = half_impl::raw_uint16_to_half(0xffffu);
   half false_half = half_impl::raw_uint16_to_half(0x0000u);
   half a1 = __low2half(a);
@@ -709,8 +759,7 @@
   return __halves2half2(eq1, eq2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pand(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pand(const half2& a, const half2& b) {
   half a1 = __low2half(a);
   half a2 = __high2half(a);
   half b1 = __low2half(b);
@@ -720,8 +769,7 @@
   return __halves2half2(result1, result2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 por(const half2& a,
-                                                const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 por(const half2& a, const half2& b) {
   half a1 = __low2half(a);
   half a2 = __high2half(a);
   half b1 = __low2half(b);
@@ -731,8 +779,7 @@
   return __halves2half2(result1, result2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pxor(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pxor(const half2& a, const half2& b) {
   half a1 = __low2half(a);
   half a2 = __high2half(a);
   half b1 = __low2half(b);
@@ -742,8 +789,7 @@
   return __halves2half2(result1, result2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a,
-                                                    const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a, const half2& b) {
   half a1 = __low2half(a);
   half a2 = __high2half(a);
   half b1 = __low2half(b);
@@ -753,8 +799,7 @@
   return __halves2half2(result1, result2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a, const half2& b) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
   return __hadd2(a, b);
 #else
@@ -768,8 +813,7 @@
 #endif
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a, const half2& b) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
   return __hsub2(a, b);
 #else
@@ -795,8 +839,7 @@
 
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a, const half2& b) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
   return __hmul2(a, b);
 #else
@@ -810,11 +853,9 @@
 #endif
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
-                                                  const half2& b,
-                                                  const half2& c) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a, const half2& b, const half2& c) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
-   return __hfma2(a, b, c);
+  return __hfma2(a, b, c);
 #else
   float a1 = __low2float(a);
   float a2 = __high2float(a);
@@ -828,8 +869,7 @@
 #endif
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a, const half2& b) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
   return __h2div(a, b);
 #else
@@ -843,8 +883,7 @@
 #endif
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin(const half2& a, const half2& b) {
   float a1 = __low2float(a);
   float a2 = __high2float(a);
   float b1 = __low2float(b);
@@ -854,8 +893,7 @@
   return __halves2half2(r1, r2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,
-                                                 const half2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a, const half2& b) {
   float a1 = __low2float(a);
   float a2 = __high2float(a);
   float b1 = __low2float(b);
@@ -925,28 +963,15 @@
   return __floats2half2_rn(r1, r2);
 }
 
-#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || \
-  defined(EIGEN_HIP_DEVICE_COMPILE)
+#if (EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)) || defined(EIGEN_HIP_DEVICE_COMPILE)
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 plog(const half2& a) {
-  return h2log(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog(const half2& a) { return h2log(a); }
 
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 pexp(const half2& a) {
-  return h2exp(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) { return h2exp(a); }
 
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 psqrt(const half2& a) {
-  return h2sqrt(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psqrt(const half2& a) { return h2sqrt(a); }
 
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-half2 prsqrt(const half2& a) {
-  return h2rsqrt(a);
-}
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt(const half2& a) { return h2rsqrt(a); }
 
 #else
 
@@ -982,18 +1007,16 @@
   return __floats2half2_rn(r1, r2);
 }
 #endif
-} // namespace
+}  // namespace
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pload<Packet4h2>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pload<Packet4h2>(const Eigen::half* from) {
   return *reinterpret_cast<const Packet4h2*>(from);
 }
 
 // unaligned load;
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-ploadu<Packet4h2>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ploadu<Packet4h2>(const Eigen::half* from) {
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
   p_alias[0] = ploadu(from + 0);
@@ -1004,8 +1027,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-ploaddup<Packet4h2>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ploaddup<Packet4h2>(const Eigen::half* from) {
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
   p_alias[0] = ploaddup(from + 0);
@@ -1016,24 +1038,21 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(
-    Eigen::half* to, const Packet4h2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore<Eigen::half>(Eigen::half* to, const Packet4h2& from) {
   *reinterpret_cast<Packet4h2*>(to) = from;
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(
-    Eigen::half* to, const Packet4h2& from) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(Eigen::half* to, const Packet4h2& from) {
   const half2* from_alias = reinterpret_cast<const half2*>(&from);
-  pstoreu(to + 0,from_alias[0]);
-  pstoreu(to + 2,from_alias[1]);
-  pstoreu(to + 4,from_alias[2]);
-  pstoreu(to + 6,from_alias[3]);
+  pstoreu(to + 0, from_alias[0]);
+  pstoreu(to + 2, from_alias[1]);
+  pstoreu(to + 4, from_alias[2]);
+  pstoreu(to + 6, from_alias[3]);
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
-ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2 ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
 #if defined(EIGEN_GPU_HAS_LDG)
   Packet4h2 r;
   r = __ldg(reinterpret_cast<const Packet4h2*>(from));
@@ -1050,8 +1069,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
-ploadt_ro<Packet4h2, Unaligned>(const Eigen::half* from) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2 ploadt_ro<Packet4h2, Unaligned>(const Eigen::half* from) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   r_alias[0] = ploadt_ro_unaligned(from + 0);
@@ -1062,8 +1080,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pgather<Eigen::half, Packet4h2>(const Eigen::half* from, Index stride) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pgather<Eigen::half, Packet4h2>(const Eigen::half* from, Index stride) {
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
   p_alias[0] = __halves2half2(from[0 * stride], from[1 * stride]);
@@ -1074,8 +1091,8 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h2>(
-    Eigen::half* to, const Packet4h2& from, Index stride) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<Eigen::half, Packet4h2>(Eigen::half* to, const Packet4h2& from,
+                                                                            Index stride) {
   const half2* from_alias = reinterpret_cast<const half2*>(&from);
   pscatter(to + stride * 0, from_alias[0], stride);
   pscatter(to + stride * 2, from_alias[1], stride);
@@ -1084,14 +1101,12 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h2>(
-    const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half pfirst<Packet4h2>(const Packet4h2& a) {
   return pfirst(*(reinterpret_cast<const half2*>(&a)));
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pabs<Packet4h2>(
-    const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pabs<Packet4h2>(const Packet4h2& a) {
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1103,8 +1118,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ptrue<Packet4h2>(
-    const Packet4h2& /*a*/) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 ptrue<Packet4h2>(const Packet4h2& /*a*/) {
   half true_half = half_impl::raw_uint16_to_half(0xffffu);
   return pset1<Packet4h2>(true_half);
 }
@@ -1115,9 +1129,9 @@
   return pset1<Packet4h2>(false_half);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_double(
-    double* d_row0, double* d_row1, double* d_row2, double* d_row3,
-    double* d_row4, double* d_row5, double* d_row6, double* d_row7) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_double(double* d_row0, double* d_row1, double* d_row2,
+                                                             double* d_row3, double* d_row4, double* d_row5,
+                                                             double* d_row6, double* d_row7) {
   double d_tmp;
   d_tmp = d_row0[1];
   d_row0[1] = d_row4[0];
@@ -1136,8 +1150,8 @@
   d_row7[0] = d_tmp;
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half2(
-    half2* f_row0, half2* f_row1, half2* f_row2, half2* f_row3) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half2(half2* f_row0, half2* f_row1, half2* f_row2,
+                                                            half2* f_row3) {
   half2 f_tmp;
   f_tmp = f_row0[1];
   f_row0[1] = f_row2[0];
@@ -1148,8 +1162,7 @@
   f_row3[0] = f_tmp;
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
-ptranspose_half(half2& f0, half2& f1) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose_half(half2& f0, half2& f1) {
   __half a1 = __low2half(f0);
   __half a2 = __high2half(f0);
   __half b1 = __low2half(f1);
@@ -1158,8 +1171,7 @@
   f1 = __halves2half2(a2, b2);
 }
 
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void
-ptranspose(PacketBlock<Packet4h2,8>& kernel) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4h2, 8>& kernel) {
   double* d_row0 = reinterpret_cast<double*>(&kernel.packet[0]);
   double* d_row1 = reinterpret_cast<double*>(&kernel.packet[1]);
   double* d_row2 = reinterpret_cast<double*>(&kernel.packet[2]);
@@ -1168,9 +1180,7 @@
   double* d_row5 = reinterpret_cast<double*>(&kernel.packet[5]);
   double* d_row6 = reinterpret_cast<double*>(&kernel.packet[6]);
   double* d_row7 = reinterpret_cast<double*>(&kernel.packet[7]);
-  ptranspose_double(d_row0, d_row1, d_row2, d_row3,
-                    d_row4, d_row5, d_row6, d_row7);
-
+  ptranspose_double(d_row0, d_row1, d_row2, d_row3, d_row4, d_row5, d_row6, d_row7);
 
   half2* f_row0 = reinterpret_cast<half2*>(d_row0);
   half2* f_row1 = reinterpret_cast<half2*>(d_row1);
@@ -1211,23 +1221,18 @@
   ptranspose_half(f_row0[1], f_row1[1]);
   ptranspose_half(f_row2[0], f_row3[0]);
   ptranspose_half(f_row2[1], f_row3[1]);
-
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-plset<Packet4h2>(const Eigen::half& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 plset<Packet4h2>(const Eigen::half& a) {
 #if defined(EIGEN_HIP_DEVICE_COMPILE)
 
   Packet4h2 r;
   half2* p_alias = reinterpret_cast<half2*>(&r);
   p_alias[0] = __halves2half2(a, __hadd(a, __float2half(1.0f)));
-  p_alias[1] = __halves2half2(__hadd(a, __float2half(2.0f)),
-                              __hadd(a, __float2half(3.0f)));
-  p_alias[2] = __halves2half2(__hadd(a, __float2half(4.0f)),
-                              __hadd(a, __float2half(5.0f)));
-  p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)),
-                              __hadd(a, __float2half(7.0f)));
+  p_alias[1] = __halves2half2(__hadd(a, __float2half(2.0f)), __hadd(a, __float2half(3.0f)));
+  p_alias[2] = __halves2half2(__hadd(a, __float2half(4.0f)), __hadd(a, __float2half(5.0f)));
+  p_alias[3] = __halves2half2(__hadd(a, __float2half(6.0f)), __hadd(a, __float2half(7.0f)));
   return r;
 #elif defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
   Packet4h2 r;
@@ -1235,8 +1240,8 @@
 
   half2 b = pset1<half2>(a);
   half2 c;
-  half2 half_offset0 = __halves2half2(__float2half(0.0f),__float2half(2.0f));
-  half2 half_offset1 = __halves2half2(__float2half(4.0f),__float2half(6.0f));
+  half2 half_offset0 = __halves2half2(__float2half(0.0f), __float2half(2.0f));
+  half2 half_offset1 = __halves2half2(__float2half(4.0f), __float2half(6.0f));
 
   c = __hadd2(b, half_offset0);
   r_alias[0] = plset(__low2half(c));
@@ -1261,9 +1266,8 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pselect<Packet4h2>(const Packet4h2& mask, const Packet4h2& a,
-                   const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pselect<Packet4h2>(const Packet4h2& mask, const Packet4h2& a,
+                                                                   const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* mask_alias = reinterpret_cast<const half2*>(&mask);
@@ -1277,8 +1281,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pcmp_eq<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcmp_eq<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1291,8 +1294,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pcmp_lt<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcmp_lt<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1305,8 +1307,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pcmp_le<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcmp_le<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1319,8 +1320,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pand<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pand<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1333,8 +1333,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 por<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 por<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1347,8 +1346,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pxor<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pxor<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1361,8 +1359,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pandnot<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pandnot<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1375,8 +1372,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 padd<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 padd<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1389,8 +1385,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psub<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 psub<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1420,8 +1415,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmul<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmul<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1434,8 +1428,8 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmadd<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b, const Packet4h2& c) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmadd<Packet4h2>(const Packet4h2& a, const Packet4h2& b,
+                                                                 const Packet4h2& c) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1449,8 +1443,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pdiv<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pdiv<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1463,8 +1456,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmin<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmin<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1477,8 +1469,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmax<Packet4h2>(
-    const Packet4h2& a, const Packet4h2& b) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pmax<Packet4h2>(const Packet4h2& a, const Packet4h2& b) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1491,64 +1482,53 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<Packet4h2>(
-    const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux<Packet4h2>(const Packet4h2& a) {
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
 
-  return predux(a_alias[0]) + predux(a_alias[1]) +
-         predux(a_alias[2]) + predux(a_alias[3]);
+  return predux(a_alias[0]) + predux(a_alias[1]) + predux(a_alias[2]) + predux(a_alias[3]);
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(
-    const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max<Packet4h2>(const Packet4h2& a) {
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
-  half2 m0 = __halves2half2(predux_max(a_alias[0]),
-                            predux_max(a_alias[1]));
-  half2 m1 = __halves2half2(predux_max(a_alias[2]),
-                            predux_max(a_alias[3]));
-  __half first  = predux_max(m0);
+  half2 m0 = __halves2half2(predux_max(a_alias[0]), predux_max(a_alias[1]));
+  half2 m1 = __halves2half2(predux_max(a_alias[2]), predux_max(a_alias[3]));
+  __half first = predux_max(m0);
   __half second = predux_max(m1);
 #if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
   return (__hgt(first, second) ? first : second);
 #else
-  float ffirst  = __half2float(first);
+  float ffirst = __half2float(first);
   float fsecond = __half2float(second);
-  return (ffirst > fsecond)? first: second;
+  return (ffirst > fsecond) ? first : second;
 #endif
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(
-    const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min<Packet4h2>(const Packet4h2& a) {
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
-  half2 m0 = __halves2half2(predux_min(a_alias[0]),
-                            predux_min(a_alias[1]));
-  half2 m1 = __halves2half2(predux_min(a_alias[2]),
-                            predux_min(a_alias[3]));
-  __half first  = predux_min(m0);
+  half2 m0 = __halves2half2(predux_min(a_alias[0]), predux_min(a_alias[1]));
+  half2 m1 = __halves2half2(predux_min(a_alias[2]), predux_min(a_alias[3]));
+  __half first = predux_min(m0);
   __half second = predux_min(m1);
 #if defined(EIGEN_CUDA_HAS_FP16_ARITHMETIC)
   return (__hlt(first, second) ? first : second);
 #else
-  float ffirst  = __half2float(first);
+  float ffirst = __half2float(first);
   float fsecond = __half2float(second);
-  return (ffirst < fsecond)? first: second;
+  return (ffirst < fsecond) ? first : second;
 #endif
 }
 
 // likely overflow/underflow
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4h2>(
-    const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<Packet4h2>(const Packet4h2& a) {
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
-  return predux_mul(pmul(pmul(a_alias[0], a_alias[1]),
-                                       pmul(a_alias[2], a_alias[3])));
+  return predux_mul(pmul(pmul(a_alias[0], a_alias[1]), pmul(a_alias[2], a_alias[3])));
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-plog1p<Packet4h2>(const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 plog1p<Packet4h2>(const Packet4h2& a) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1560,8 +1540,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-pexpm1<Packet4h2>(const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pexpm1<Packet4h2>(const Packet4h2& a) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1609,8 +1588,7 @@
 }
 
 template <>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
-prsqrt<Packet4h2>(const Packet4h2& a) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 prsqrt<Packet4h2>(const Packet4h2& a) {
   Packet4h2 r;
   half2* r_alias = reinterpret_cast<half2*>(&r);
   const half2* a_alias = reinterpret_cast<const half2*>(&a);
@@ -1623,9 +1601,8 @@
 
 // The following specialized padd, pmul, pdiv, pmin, pmax, pset1 are needed for
 // the implementation of GPU half reduction.
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
-                                                        const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a, const half2& b) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
   return __hadd2(a, b);
 #else
@@ -1639,9 +1616,8 @@
 #endif
 }
 
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
-                                                        const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a, const half2& b) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
   return __hmul2(a, b);
 #else
@@ -1655,9 +1631,8 @@
 #endif
 }
 
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a,
-                                                        const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
 #if defined(EIGEN_GPU_HAS_FP16_ARITHMETIC)
   return __h2div(a, b);
 #else
@@ -1671,9 +1646,8 @@
 #endif
 }
 
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a,
-                                                        const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& a, const half2& b) {
   float a1 = __low2float(a);
   float a2 = __high2float(a);
   float b1 = __low2float(b);
@@ -1683,9 +1657,8 @@
   return __halves2half2(r1, r2);
 }
 
-template<>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a,
-                                                        const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& a, const half2& b) {
   float a1 = __low2float(a);
   float a2 = __high2float(a);
   float b1 = __low2float(b);
@@ -1695,15 +1668,14 @@
   return __halves2half2(r1, r2);
 }
 
-#endif // (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
+#endif  // (defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)) && defined(EIGEN_GPU_COMPILE_PHASE)
 
 #undef EIGEN_GPU_HAS_LDG
 #undef EIGEN_CUDA_HAS_FP16_ARITHMETIC
 #undef EIGEN_GPU_HAS_FP16_ARITHMETIC
 
-} // end namespace internal
+}  // end namespace internal
 
-} // end namespace Eigen
+}  // end namespace Eigen
 
-
-#endif // EIGEN_PACKET_MATH_GPU_H
+#endif  // EIGEN_PACKET_MATH_GPU_H
diff --git a/Eigen/src/Core/arch/GPU/Tuple.h b/Eigen/src/Core/arch/GPU/Tuple.h
index e223ca1..6bea9ac 100644
--- a/Eigen/src/Core/arch/GPU/Tuple.h
+++ b/Eigen/src/Core/arch/GPU/Tuple.h
@@ -20,196 +20,173 @@
 namespace tuple_impl {
 
 // Internal tuple implementation.
-template<size_t N, typename... Types>
+template <size_t N, typename... Types>
 class TupleImpl;
 
 // Generic recursive tuple.
-template<size_t N, typename T1, typename... Ts>
+template <size_t N, typename T1, typename... Ts>
 class TupleImpl<N, T1, Ts...> {
  public:
   // Tuple may contain Eigen types.
   EIGEN_MAKE_ALIGNED_OPERATOR_NEW
-  
+
   // Default constructor, enable if all types are default-constructible.
-  template<typename U1 = T1, typename EnableIf = std::enable_if_t<
-      std::is_default_constructible<U1>::value
-      && reduce_all<std::is_default_constructible<Ts>::value...>::value
-    >>
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
-  TupleImpl() : head_{}, tail_{} {}
- 
+  template <typename U1 = T1,
+            typename EnableIf = std::enable_if_t<std::is_default_constructible<U1>::value &&
+                                                 reduce_all<std::is_default_constructible<Ts>::value...>::value>>
+  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC TupleImpl() : head_{}, tail_{} {}
+
   // Element constructor.
-  template<typename U1, typename... Us, 
-           // Only enable if...
-           typename EnableIf = std::enable_if_t<
-              // the number of input arguments match, and ...
-              sizeof...(Us) == sizeof...(Ts) && (
-                // this does not look like a copy/move constructor.
-                N > 1 || std::is_convertible<U1, T1>::value)
-           >>
-  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC
-  TupleImpl(U1&& arg1, Us&&... args) 
-    : head_(std::forward<U1>(arg1)), tail_(std::forward<Us>(args)...) {}
- 
-  // The first stored value. 
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  T1& head() {
-    return head_;
-  }
-  
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  const T1& head() const {
-    return head_;
-  }
-  
+  template <typename U1, typename... Us,
+            // Only enable if...
+            typename EnableIf = std::enable_if_t<
+                // the number of input arguments match, and ...
+                sizeof...(Us) == sizeof...(Ts) && (
+                                                      // this does not look like a copy/move constructor.
+                                                      N > 1 || std::is_convertible<U1, T1>::value)>>
+  EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC TupleImpl(U1&& arg1, Us&&... args)
+      : head_(std::forward<U1>(arg1)), tail_(std::forward<Us>(args)...) {}
+
+  // The first stored value.
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T1& head() { return head_; }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const T1& head() const { return head_; }
+
   // The tail values.
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  TupleImpl<N-1, Ts...>& tail() {
-    return tail_;
-  }
-  
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  const TupleImpl<N-1, Ts...>& tail() const {
-    return tail_;
-  }
-  
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  void swap(TupleImpl& other) {
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TupleImpl<N - 1, Ts...>& tail() { return tail_; }
+
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const TupleImpl<N - 1, Ts...>& tail() const { return tail_; }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void swap(TupleImpl& other) {
     using numext::swap;
     swap(head_, other.head_);
     swap(tail_, other.tail_);
   }
-  
-  template<typename... UTypes>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  TupleImpl& operator=(const TupleImpl<N, UTypes...>& other) {
+
+  template <typename... UTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleImpl& operator=(const TupleImpl<N, UTypes...>& other) {
     head_ = other.head_;
     tail_ = other.tail_;
     return *this;
   }
-  
-  template<typename... UTypes>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  TupleImpl& operator=(TupleImpl<N, UTypes...>&& other) {
+
+  template <typename... UTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleImpl& operator=(TupleImpl<N, UTypes...>&& other) {
     head_ = std::move(other.head_);
     tail_ = std::move(other.tail_);
     return *this;
   }
-  
+
  private:
   // Allow related tuples to reference head_/tail_.
-  template<size_t M, typename... UTypes>
+  template <size_t M, typename... UTypes>
   friend class TupleImpl;
- 
+
   T1 head_;
-  TupleImpl<N-1, Ts...> tail_;
+  TupleImpl<N - 1, Ts...> tail_;
 };
 
 // Empty tuple specialization.
-template<>
+template <>
 class TupleImpl<size_t(0)> {};
 
-template<typename TupleType>
+template <typename TupleType>
 struct is_tuple : std::false_type {};
 
-template<typename... Types>
-struct is_tuple< TupleImpl<sizeof...(Types), Types...> > : std::true_type {};
+template <typename... Types>
+struct is_tuple<TupleImpl<sizeof...(Types), Types...>> : std::true_type {};
 
 // Gets an element from a tuple.
-template<size_t Idx, typename T1, typename... Ts>
+template <size_t Idx, typename T1, typename... Ts>
 struct tuple_get_impl {
   using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
   using ReturnType = typename tuple_get_impl<Idx - 1, Ts...>::ReturnType;
-  
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  ReturnType& run(TupleType& tuple) {
-    return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ReturnType& run(TupleType& tuple) {
+    return tuple_get_impl<Idx - 1, Ts...>::run(tuple.tail());
   }
 
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  const ReturnType& run(const TupleType& tuple) {
-    return tuple_get_impl<Idx-1, Ts...>::run(tuple.tail());
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const ReturnType& run(const TupleType& tuple) {
+    return tuple_get_impl<Idx - 1, Ts...>::run(tuple.tail());
   }
 };
 
 // Base case, getting the head element.
-template<typename T1, typename... Ts>
+template <typename T1, typename... Ts>
 struct tuple_get_impl<0, T1, Ts...> {
   using TupleType = TupleImpl<sizeof...(Ts) + 1, T1, Ts...>;
   using ReturnType = T1;
 
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  T1& run(TupleType& tuple) {
-    return tuple.head();
-  }
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T1& run(TupleType& tuple) { return tuple.head(); }
 
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-  const T1& run(const TupleType& tuple) {
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const T1& run(const TupleType& tuple) {
     return tuple.head();
   }
 };
 
 // Concatenates N Tuples.
-template<size_t NTuples, typename... Tuples>
+template <size_t NTuples, typename... Tuples>
 struct tuple_cat_impl;
 
-template<size_t NTuples, size_t N1, typename... Args1, size_t N2, typename... Args2, typename... Tuples>
+template <size_t NTuples, size_t N1, typename... Args1, size_t N2, typename... Args2, typename... Tuples>
 struct tuple_cat_impl<NTuples, TupleImpl<N1, Args1...>, TupleImpl<N2, Args2...>, Tuples...> {
   using TupleType1 = TupleImpl<N1, Args1...>;
   using TupleType2 = TupleImpl<N2, Args2...>;
   using MergedTupleType = TupleImpl<N1 + N2, Args1..., Args2...>;
-  
-  using ReturnType = typename tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::ReturnType;
-  
+
+  using ReturnType = typename tuple_cat_impl<NTuples - 1, MergedTupleType, Tuples...>::ReturnType;
+
   // Uses the index sequences to extract and merge elements from tuple1 and tuple2,
   // then recursively calls again.
-  template<typename Tuple1, size_t... I1s, typename Tuple2, size_t... I2s, typename... MoreTuples>
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  ReturnType run(Tuple1&& tuple1, std::index_sequence<I1s...>,
-                 Tuple2&& tuple2, std::index_sequence<I2s...>,
-                 MoreTuples&&... tuples) {
-    return tuple_cat_impl<NTuples-1, MergedTupleType, Tuples...>::run(
+  template <typename Tuple1, size_t... I1s, typename Tuple2, size_t... I2s, typename... MoreTuples>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run(Tuple1&& tuple1,
+                                                                              std::index_sequence<I1s...>,
+                                                                              Tuple2&& tuple2,
+                                                                              std::index_sequence<I2s...>,
+                                                                              MoreTuples&&... tuples) {
+    return tuple_cat_impl<NTuples - 1, MergedTupleType, Tuples...>::run(
         MergedTupleType(tuple_get_impl<I1s, Args1...>::run(std::forward<Tuple1>(tuple1))...,
                         tuple_get_impl<I2s, Args2...>::run(std::forward<Tuple2>(tuple2))...),
         std::forward<MoreTuples>(tuples)...);
   }
-  
+
   // Concatenates the first two tuples.
-  template<typename Tuple1, typename Tuple2, typename... MoreTuples>
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  ReturnType run(Tuple1&& tuple1, Tuple2&& tuple2, MoreTuples&&... tuples) {
-    return run(std::forward<Tuple1>(tuple1), std::make_index_sequence<N1>{},
-               std::forward<Tuple2>(tuple2), std::make_index_sequence<N2>{},
-               std::forward<MoreTuples>(tuples)...);
+  template <typename Tuple1, typename Tuple2, typename... MoreTuples>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run(Tuple1&& tuple1, Tuple2&& tuple2,
+                                                                              MoreTuples&&... tuples) {
+    return run(std::forward<Tuple1>(tuple1), std::make_index_sequence<N1>{}, std::forward<Tuple2>(tuple2),
+               std::make_index_sequence<N2>{}, std::forward<MoreTuples>(tuples)...);
   }
 };
 
 // Base case with a single tuple.
-template<size_t N, typename... Args>
-struct tuple_cat_impl<1, TupleImpl<N, Args...> > { 
+template <size_t N, typename... Args>
+struct tuple_cat_impl<1, TupleImpl<N, Args...>> {
   using ReturnType = TupleImpl<N, Args...>;
-  
-  template<typename Tuple1>
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  ReturnType run(Tuple1&& tuple1) {
+
+  template <typename Tuple1>
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run(Tuple1&& tuple1) {
     return tuple1;
   }
 };
 
 // Special case of no tuples.
-template<>
-struct tuple_cat_impl<0> { 
+template <>
+struct tuple_cat_impl<0> {
   using ReturnType = TupleImpl<0>;
-  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  ReturnType run() {return ReturnType{}; }
+  static EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType run() { return ReturnType{}; }
 };
 
 // For use in make_tuple, unwraps a reference_wrapper.
 template <typename T>
-struct unwrap_reference_wrapper { using type = T; };
- 
+struct unwrap_reference_wrapper {
+  using type = T;
+};
+
 template <typename T>
-struct unwrap_reference_wrapper<std::reference_wrapper<T> > { using type = T&; };
+struct unwrap_reference_wrapper<std::reference_wrapper<T>> {
+  using type = T&;
+};
 
 // For use in make_tuple, decays a type and unwraps a reference_wrapper.
 template <typename T>
@@ -220,11 +197,11 @@
 /**
  * Utility for determining a tuple's size.
  */
-template<typename Tuple>
+template <typename Tuple>
 struct tuple_size;
 
-template<typename... Types >
-struct tuple_size< TupleImpl<sizeof...(Types), Types...> > : std::integral_constant<size_t, sizeof...(Types)> {};
+template <typename... Types>
+struct tuple_size<TupleImpl<sizeof...(Types), Types...>> : std::integral_constant<size_t, sizeof...(Types)> {};
 
 /**
  * Gets an element of a tuple.
@@ -233,17 +210,15 @@
  * \param tuple the tuple.
  * \return a reference to the desired element.
  */
-template<size_t Idx, typename... Types>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-const typename tuple_get_impl<Idx, Types...>::ReturnType&
-get(const TupleImpl<sizeof...(Types), Types...>& tuple) {
+template <size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename tuple_get_impl<Idx, Types...>::ReturnType& get(
+    const TupleImpl<sizeof...(Types), Types...>& tuple) {
   return tuple_get_impl<Idx, Types...>::run(tuple);
 }
 
-template<size_t Idx, typename... Types>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename tuple_get_impl<Idx, Types...>::ReturnType&
-get(TupleImpl<sizeof...(Types), Types...>& tuple) {
+template <size_t Idx, typename... Types>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename tuple_get_impl<Idx, Types...>::ReturnType& get(
+    TupleImpl<sizeof...(Types), Types...>& tuple) {
   return tuple_get_impl<Idx, Types...>::run(tuple);
 }
 
@@ -252,31 +227,27 @@
  * \param tuples ... list of tuples.
  * \return concatenated tuple.
  */
-template<typename... Tuples,
-          typename EnableIf = std::enable_if_t<
-            internal::reduce_all<
-              is_tuple<typename std::decay<Tuples>::type>::value...>::value>>
+template <typename... Tuples, typename EnableIf = std::enable_if_t<
+                                  internal::reduce_all<is_tuple<typename std::decay<Tuples>::type>::value...>::value>>
 EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-typename tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::ReturnType
-tuple_cat(Tuples&&... tuples) {
+    typename tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::ReturnType
+    tuple_cat(Tuples&&... tuples) {
   return tuple_cat_impl<sizeof...(Tuples), typename std::decay<Tuples>::type...>::run(std::forward<Tuples>(tuples)...);
 }
 
 /**
  * Tie arguments together into a tuple.
  */
-template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), Args&...> >
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-ReturnType tie(Args&... args) EIGEN_NOEXCEPT {
-    return ReturnType{args...};
+template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), Args&...>>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType tie(Args&... args) EIGEN_NOEXCEPT {
+  return ReturnType{args...};
 }
 
 /**
  * Create a tuple of l-values with the supplied arguments.
  */
-template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), typename unwrap_decay<Args>::type...> >
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-ReturnType make_tuple(Args&&... args) {
+template <typename... Args, typename ReturnType = TupleImpl<sizeof...(Args), typename unwrap_decay<Args>::type...>>
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ReturnType make_tuple(Args&&... args) {
   return ReturnType{std::forward<Args>(args)...};
 }
 
@@ -284,15 +255,15 @@
  * Forward a set of arguments as a tuple.
  */
 template <typename... Args>
-EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-TupleImpl<sizeof...(Args), Args...> forward_as_tuple(Args&&... args) {
+EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TupleImpl<sizeof...(Args), Args...> forward_as_tuple(
+    Args&&... args) {
   return TupleImpl<sizeof...(Args), Args...>(std::forward<Args>(args)...);
 }
 
 /**
  * Alternative to std::tuple that can be used on device.
  */
-template<typename... Types>
+template <typename... Types>
 using tuple = TupleImpl<sizeof...(Types), Types...>;
 
 }  // namespace tuple_impl
diff --git a/Eigen/src/Core/arch/GPU/TypeCasting.h b/Eigen/src/Core/arch/GPU/TypeCasting.h
index aa89cd2..ae43f8e 100644
--- a/Eigen/src/Core/arch/GPU/TypeCasting.h
+++ b/Eigen/src/Core/arch/GPU/TypeCasting.h
@@ -22,61 +22,56 @@
 
 template <>
 struct type_casting_traits<Eigen::half, float> {
-  enum {
-    VectorizedCast = 1,
-    SrcCoeffRatio = 1,
-    TgtCoeffRatio = 2
-  };
+  enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
 };
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
   float2 r1 = __half22float2(a);
   float2 r2 = __half22float2(b);
   return make_float4(r1.x, r1.y, r2.x, r2.y);
 }
 
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
   Packet4h2 r;
-  half2* r_alias=reinterpret_cast<half2*>(&r);
-  r_alias[0]=__floats2half2_rn(a.x,a.y);
-  r_alias[1]=__floats2half2_rn(a.z,a.w);
-  r_alias[2]=__floats2half2_rn(b.x,b.y);
-  r_alias[3]=__floats2half2_rn(b.z,b.w);
+  half2* r_alias = reinterpret_cast<half2*>(&r);
+  r_alias[0] = __floats2half2_rn(a.x, a.y);
+  r_alias[1] = __floats2half2_rn(a.z, a.w);
+  r_alias[2] = __floats2half2_rn(b.x, b.y);
+  r_alias[3] = __floats2half2_rn(b.z, b.w);
   return r;
 }
 
 template <>
 struct type_casting_traits<float, Eigen::half> {
-  enum {
-    VectorizedCast = 1,
-    SrcCoeffRatio = 2,
-    TgtCoeffRatio = 1
-  };
+  enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
 };
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
   // Simply discard the second half of the input
   float4 r;
-  const half2* a_alias=reinterpret_cast<const half2*>(&a);
+  const half2* a_alias = reinterpret_cast<const half2*>(&a);
   float2 r1 = __half22float2(a_alias[0]);
   float2 r2 = __half22float2(a_alias[1]);
-  r.x=static_cast<float>(r1.x);
-  r.y=static_cast<float>(r1.y);
-  r.z=static_cast<float>(r2.x);
-  r.w=static_cast<float>(r2.y);
+  r.x = static_cast<float>(r1.x);
+  r.y = static_cast<float>(r1.y);
+  r.z = static_cast<float>(r2.x);
+  r.w = static_cast<float>(r2.y);
   return r;
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
   // Simply discard the second half of the input
   return __floats2half2_rn(a.x, a.y);
 }
 
 #endif
 
-} // end namespace internal
+}  // end namespace internal
 
-} // end namespace Eigen
+}  // end namespace Eigen
 
-#endif // EIGEN_TYPE_CASTING_GPU_H
+#endif  // EIGEN_TYPE_CASTING_GPU_H