Add internal ctz/clz implementation.
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index 95f9b97..087d5db 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -628,6 +628,149 @@
// no value, error at compile time
};
+template <typename BitsType, typename EnableIf = void>
+struct count_bits_impl {
+ static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value,
+ "BitsType must be an unsigned integer");
+
+ static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+ int n = CHAR_BIT * sizeof(BitsType);
+ int shift = n / 2;
+ while (bits > 0 && shift > 0) {
+ BitsType y = bits >> shift;
+ if (y > 0) {
+ n -= shift;
+ bits = y;
+ }
+ shift /= 2;
+ }
+ if (shift == 0) {
+ --n;
+ }
+ return n;
+ }
+
+ static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+ int n = CHAR_BIT * sizeof(BitsType);
+ int shift = n / 2;
+ while (bits > 0 && shift > 0) {
+ BitsType y = bits << shift;
+ if (y > 0) {
+ n -= shift;
+ bits = y;
+ }
+ shift /= 2;
+ }
+ if (shift == 0) {
+ --n;
+ }
+ return n;
+ }
+};
+
+// Count leading zeros.
+template <typename BitsType>
+EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+ return count_bits_impl<BitsType>::clz(bits);
+}
+
+// Count trailing zeros.
+template <typename BitsType>
+EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+ return count_bits_impl<BitsType>::ctz(bits);
+}
+
+#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+
+template <typename BitsType>
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned int)>> {
+ static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+ static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+ static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+ static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT;
+ return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset;
+ }
+
+ static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+ return bits == 0 ? kNumBits : __builtin_ctz(static_cast<unsigned int>(bits));
+ }
+};
+
+template <typename BitsType>
+struct count_bits_impl<
+ BitsType, std::enable_if_t<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>> {
+ static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+ static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+ static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+ static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT;
+ return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset;
+ }
+
+ static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+ return bits == 0 ? kNumBits : __builtin_ctzl(static_cast<unsigned long>(bits));
+ }
+};
+
+template <typename BitsType>
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) &&
+ sizeof(BitsType) <= sizeof(unsigned long long)>> {
+ static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+ static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+ static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+ static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT;
+ return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset;
+ }
+
+ static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+ return bits == 0 ? kNumBits : __builtin_ctzll(static_cast<unsigned long long>(bits));
+ }
+};
+
+#elif EIGEN_COMP_MSVC
+
+template <typename BitsType>
+struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned long)>> {
+ static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+ static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+ static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+ static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT);
+ unsigned long out;
+ _BitScanReverse(&out, static_cast<unsigned long>(bits));
+ return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset);
+ }
+
+ static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+ unsigned long out;
+ _BitScanForward(&out, static_cast<unsigned long>(bits));
+ return bits == 0 ? kNumBits : static_cast<int>(out);
+ }
+};
+
+#ifdef _WIN64
+
+template <typename BitsType>
+struct count_bits_impl<
+ BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>> {
+ static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+ static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer");
+ static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+ static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(__int64) - sizeof(BitsType)) * CHAR_BIT);
+ unsigned long out;
+ _BitScanReverse64(&out, static_cast<unsigned __int64>(bits));
+ return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset);
+ }
+
+ static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+ unsigned long out;
+ _BitScanForward64(&out, static_cast<unsigned __int64>(bits));
+ return bits == 0 ? kNumBits : static_cast<int>(out);
+ }
+};
+
+#endif // _WIN64
+
+#endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+
template <typename Scalar>
struct random_default_impl<Scalar, false, true> {
static inline Scalar run(const Scalar& x, const Scalar& y) {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index fbbc98a..4c7c3a4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -48,7 +48,7 @@
set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ")
-
+
ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ")
@@ -61,7 +61,7 @@
set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ")
-
+
ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ")
@@ -74,7 +74,7 @@
set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ")
-
+
ei_add_test(klu_support "" "${KLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ")
@@ -87,7 +87,7 @@
set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES})
ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ")
-
+
ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}")
else()
ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ")
@@ -171,6 +171,7 @@
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official")
add_custom_target(BuildOfficial)
+ei_add_test(clz)
ei_add_test(rand)
ei_add_test(meta)
ei_add_test(maxsizevector)
@@ -406,7 +407,7 @@
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
- string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
if(EIGEN_TEST_CUDA_CLANG)
string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
@@ -433,12 +434,12 @@
set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
endif()
-
+
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
-
+
ei_add_test(gpu_example)
ei_add_test(gpu_basic)
-
+
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
@@ -477,7 +478,7 @@
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
else ()
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
- endif()
+ endif()
endif()
endif()
diff --git a/test/clz.cpp b/test/clz.cpp
new file mode 100644
index 0000000..b56d328
--- /dev/null
+++ b/test/clz.cpp
@@ -0,0 +1,74 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2023 The Eigen Authors
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+template <typename T>
+int ref_clz(T val) {
+ constexpr int kNumBits = sizeof(T) * CHAR_BIT;
+ T kMsbMask = T(1) << (kNumBits - 1);
+ int z = 0;
+ for (; z < kNumBits && ((val & kMsbMask) == 0); ++z) {
+ val <<= 1;
+ }
+ return z;
+}
+
+template <typename T>
+int ref_ctz(T val) {
+ constexpr int kNumBits = sizeof(T) * CHAR_BIT;
+ T kLsbMask = T(1);
+ int z = 0;
+ for (; z < kNumBits && ((val & kLsbMask) == 0); ++z) {
+ val >>= 1;
+ }
+ return z;
+}
+
+template <typename T>
+void test_clz_ctz() {
+ T step = sizeof(T) <= 2 ? 1 : (Eigen::NumTraits<T>::highest() / (T(1) << 16));
+ T iters = Eigen::NumTraits<T>::highest() / step;
+ for (T i = 0; i < iters; ++i) {
+ T val = i * step;
+ int expected_clz = ref_clz(val);
+ int actual_clz = Eigen::internal::clz(val);
+ VERIFY(expected_clz == actual_clz);
+
+ int expected_ctz = ref_ctz(val);
+ int actual_ctz = Eigen::internal::ctz(val);
+ VERIFY(expected_ctz == actual_ctz);
+ }
+}
+
+template <typename T>
+void test_clz_ctz_random() {
+ for (int i = 0; i < 1024 * 1024; ++i) {
+ T val = Eigen::internal::random<T>();
+ int expected_clz = ref_clz(val);
+ int actual_clz = Eigen::internal::clz(val);
+ VERIFY(expected_clz == actual_clz);
+
+ int expected_ctz = ref_ctz(val);
+ int actual_ctz = Eigen::internal::ctz(val);
+ VERIFY(expected_ctz == actual_ctz);
+ }
+}
+
+EIGEN_DECLARE_TEST(clz) {
+ CALL_SUBTEST_1(test_clz_ctz<uint8_t>());
+ CALL_SUBTEST_2(test_clz_ctz<uint16_t>());
+ CALL_SUBTEST_3(test_clz_ctz<uint32_t>());
+ CALL_SUBTEST_4(test_clz_ctz<uint64_t>());
+
+ for (int i = 0; i < g_repeat; i++) {
+ test_clz_ctz_random<uint32_t>();
+ test_clz_ctz_random<uint64_t>();
+ }
+}