Add internal ctz/clz implementation.
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 95f9b97..087d5db 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -628,6 +628,149 @@
   // no value, error at compile time
 };
 
+template <typename BitsType, typename EnableIf = void>
+struct count_bits_impl {
+  static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value,
+                "BitsType must be an unsigned integer");
+
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    int n = CHAR_BIT * sizeof(BitsType);
+    int shift = n / 2;
+    while (bits > 0 && shift > 0) {
+      BitsType y = bits >> shift;
+      if (y > 0) {
+        n -= shift;
+        bits = y;
+      }
+      shift /= 2;
+    }
+    if (shift == 0) {
+      --n;
+    }
+    return n;
+  }
+
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    int n = CHAR_BIT * sizeof(BitsType);
+    int shift = n / 2;
+    while (bits > 0 && shift > 0) {
+      BitsType y = bits << shift;
+      if (y > 0) {
+        n -= shift;
+        bits = y;
+      }
+      shift /= 2;
+    }
+    if (shift == 0) {
+      --n;
+    }
+    return n;
+  }
+};
+
+// Count leading zeros.
+template <typename BitsType>
+EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+  return count_bits_impl<BitsType>::clz(bits);
+}
+
+// Count trailing zeros.
+template <typename BitsType>
+EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+  return count_bits_impl<BitsType>::ctz(bits);
+}
+
+#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+
+template <typename BitsType>
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned int)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT;
+    return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset;
+  }
+
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    return bits == 0 ? kNumBits : __builtin_ctz(static_cast<unsigned int>(bits));
+  }
+};
+
+template <typename BitsType>
+struct count_bits_impl<
+    BitsType, std::enable_if_t<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT;
+    return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset;
+  }
+
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    return bits == 0 ? kNumBits : __builtin_ctzl(static_cast<unsigned long>(bits));
+  }
+};
+
+template <typename BitsType>
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) &&
+                                                  sizeof(BitsType) <= sizeof(unsigned long long)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT;
+    return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset;
+  }
+
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    return bits == 0 ? kNumBits : __builtin_ctzll(static_cast<unsigned long long>(bits));
+  }
+};
+
+#elif EIGEN_COMP_MSVC
+
+template <typename BitsType>
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned long)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT);
+    unsigned long out;
+    _BitScanReverse(&out, static_cast<unsigned long>(bits));
+    return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset);
+  }
+
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    unsigned long out;
+    _BitScanForward(&out, static_cast<unsigned long>(bits));
+    return bits == 0 ? kNumBits : static_cast<int>(out);
+  }
+};
+
+#ifdef _WIN64
+
+template <typename BitsType>
+struct count_bits_impl<
+    BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(__int64) - sizeof(BitsType)) * CHAR_BIT);
+    unsigned long out;
+    _BitScanReverse64(&out, static_cast<unsigned __int64>(bits));
+    return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset);
+  }
+
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    unsigned long out;
+    _BitScanForward64(&out, static_cast<unsigned __int64>(bits));
+    return bits == 0 ? kNumBits : static_cast<int>(out);
+  }
+};
+
+#endif  // _WIN64
+
+#endif  // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+
 template <typename Scalar>
 struct random_default_impl<Scalar, false, true> {
   static inline Scalar run(const Scalar& x, const Scalar& y) {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index fbbc98a..4c7c3a4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -48,7 +48,7 @@
   set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
   set(CHOLMOD_ALL_LIBS  ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ")
-  
+
   ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ")
@@ -61,7 +61,7 @@
   set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ")
-  
+
   ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ")
@@ -74,7 +74,7 @@
   set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ")
-  
+
   ei_add_test(klu_support "" "${KLU_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ")
@@ -87,7 +87,7 @@
   set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
   ei_add_property(EIGEN_TESTED_BACKENDS  "SuperLU, ")
-  
+
   ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}")
 else()
   ei_add_property(EIGEN_MISSING_BACKENDS  "SuperLU, ")
@@ -171,6 +171,7 @@
 set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
 add_custom_target(BuildOfficial)
 
+ei_add_test(clz)
 ei_add_test(rand)
 ei_add_test(meta)
 ei_add_test(maxsizevector)
@@ -406,7 +407,7 @@
   string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-  string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")  
+  string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 
   if(EIGEN_TEST_CUDA_CLANG)
     string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
@@ -433,12 +434,12 @@
     set(CUDA_NVCC_FLAGS  "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
     cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
   endif()
-  
+
   set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
-  
+
   ei_add_test(gpu_example)
   ei_add_test(gpu_basic)
-  
+
   unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
 
 endif()
@@ -477,7 +478,7 @@
       message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
     else ()
       message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
-    endif() 
+    endif()
   endif()
 endif()
 
diff --git a/test/clz.cpp b/test/clz.cpp
new file mode 100644
index 0000000..b56d328
--- /dev/null
+++ b/test/clz.cpp
@@ -0,0 +1,74 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2023 The Eigen Authors
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+template <typename T>
+int ref_clz(T val) {
+  constexpr int kNumBits = sizeof(T) * CHAR_BIT;
+  T kMsbMask = T(1) << (kNumBits - 1);
+  int z = 0;
+  for (; z < kNumBits && ((val & kMsbMask) == 0); ++z) {
+    val <<= 1;
+  }
+  return z;
+}
+
+template <typename T>
+int ref_ctz(T val) {
+  constexpr int kNumBits = sizeof(T) * CHAR_BIT;
+  T kLsbMask = T(1);
+  int z = 0;
+  for (; z < kNumBits && ((val & kLsbMask) == 0); ++z) {
+    val >>= 1;
+  }
+  return z;
+}
+
+template <typename T>
+void test_clz_ctz() {
+  T step = sizeof(T) <= 2 ? 1 : (Eigen::NumTraits<T>::highest() / (T(1) << 16));
+  T iters = Eigen::NumTraits<T>::highest() / step;
+  for (T i = 0; i < iters; ++i) {
+    T val = i * step;
+    int expected_clz = ref_clz(val);
+    int actual_clz = Eigen::internal::clz(val);
+    VERIFY(expected_clz == actual_clz);
+
+    int expected_ctz = ref_ctz(val);
+    int actual_ctz = Eigen::internal::ctz(val);
+    VERIFY(expected_ctz == actual_ctz);
+  }
+}
+
+template <typename T>
+void test_clz_ctz_random() {
+  for (int i = 0; i < 1024 * 1024; ++i) {
+    T val = Eigen::internal::random<T>();
+    int expected_clz = ref_clz(val);
+    int actual_clz = Eigen::internal::clz(val);
+    VERIFY(expected_clz == actual_clz);
+
+    int expected_ctz = ref_ctz(val);
+    int actual_ctz = Eigen::internal::ctz(val);
+    VERIFY(expected_ctz == actual_ctz);
+  }
+}
+
+EIGEN_DECLARE_TEST(clz) {
+  CALL_SUBTEST_1(test_clz_ctz<uint8_t>());
+  CALL_SUBTEST_2(test_clz_ctz<uint16_t>());
+  CALL_SUBTEST_3(test_clz_ctz<uint32_t>());
+  CALL_SUBTEST_4(test_clz_ctz<uint64_t>());
+
+  for (int i = 0; i < g_repeat; i++) {
+    test_clz_ctz_random<uint32_t>();
+    test_clz_ctz_random<uint64_t>();
+  }
+}