Add internal ctz/clz implementation.
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 95f9b97..087d5db 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h
@@ -628,6 +628,149 @@ // no value, error at compile time }; +template <typename BitsType, typename EnableIf = void> +struct count_bits_impl { + static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value, + "BitsType must be an unsigned integer"); + + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits >> shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits << shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; + } +}; + +// Count leading zeros. +template <typename BitsType> +EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + return count_bits_impl<BitsType>::clz(bits); +} + +// Count trailing zeros. +template <typename BitsType> +EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return count_bits_impl<BitsType>::ctz(bits); +} + +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + +template <typename BitsType> +struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned int)>> { + static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctz(static_cast<unsigned int>(bits)); + } +}; + +template <typename BitsType> +struct count_bits_impl< + BitsType, std::enable_if_t<sizeof(unsigned int) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(unsigned long)>> { + static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzl(static_cast<unsigned long>(bits)); + } +}; + +template <typename BitsType> +struct count_bits_impl<BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(unsigned long long)>> { + static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzll(static_cast<unsigned long long>(bits)); + } +}; + +#elif EIGEN_COMP_MSVC + +template <typename BitsType> +struct count_bits_impl<BitsType, std::enable_if_t<sizeof(BitsType) <= sizeof(unsigned long)>> { + static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT); + unsigned long out; + _BitScanReverse(&out, static_cast<unsigned long>(bits)); + return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward(&out, static_cast<unsigned long>(bits)); + return bits == 0 ? kNumBits : static_cast<int>(out); + } +}; + +#ifdef _WIN64 + +template <typename BitsType> +struct count_bits_impl< + BitsType, std::enable_if_t<sizeof(unsigned long) < sizeof(BitsType) && sizeof(BitsType) <= sizeof(__int64)>> { + static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT); + static_assert(std::is_integral<BitsType>::value, "BitsType must be a built-in integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = static_cast<int>((sizeof(__int64) - sizeof(BitsType)) * CHAR_BIT); + unsigned long out; + _BitScanReverse64(&out, static_cast<unsigned __int64>(bits)); + return bits == 0 ? kNumBits : static_cast<int>(out - kLeadingBitsOffset); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward64(&out, static_cast<unsigned __int64>(bits)); + return bits == 0 ? kNumBits : static_cast<int>(out); + } +}; + +#endif // _WIN64 + +#endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + template <typename Scalar> struct random_default_impl<Scalar, false, true> { static inline Scalar run(const Scalar& x, const Scalar& y) {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fbbc98a..4c7c3a4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt
@@ -48,7 +48,7 @@ set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "CHOLMOD, ") - + ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "CHOLMOD, ") @@ -61,7 +61,7 @@ set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "UMFPACK, ") - + ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "UMFPACK, ") @@ -74,7 +74,7 @@ set(SPARSE_LIBS ${SPARSE_LIBS} ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) set(KLU_ALL_LIBS ${KLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "KLU, ") - + ei_add_test(klu_support "" "${KLU_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "KLU, ") @@ -87,7 +87,7 @@ set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ") - + ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}") else() ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ") @@ -171,6 +171,7 @@ set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official") add_custom_target(BuildOfficial) +ei_add_test(clz) ei_add_test(rand) ei_add_test(meta) ei_add_test(maxsizevector) @@ -406,7 +407,7 @@ string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") if(EIGEN_TEST_CUDA_CLANG) string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") @@ -433,12 +434,12 @@ set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") endif() - + set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") - + ei_add_test(gpu_example) ei_add_test(gpu_basic) - + unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() @@ -477,7 +478,7 @@ message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen") else () message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}") - endif() + endif() endif() endif()
diff --git a/test/clz.cpp b/test/clz.cpp new file mode 100644 index 0000000..b56d328 --- /dev/null +++ b/test/clz.cpp
@@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2023 The Eigen Authors +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template <typename T> +int ref_clz(T val) { + constexpr int kNumBits = sizeof(T) * CHAR_BIT; + T kMsbMask = T(1) << (kNumBits - 1); + int z = 0; + for (; z < kNumBits && ((val & kMsbMask) == 0); ++z) { + val <<= 1; + } + return z; +} + +template <typename T> +int ref_ctz(T val) { + constexpr int kNumBits = sizeof(T) * CHAR_BIT; + T kLsbMask = T(1); + int z = 0; + for (; z < kNumBits && ((val & kLsbMask) == 0); ++z) { + val >>= 1; + } + return z; +} + +template <typename T> +void test_clz_ctz() { + T step = sizeof(T) <= 2 ? 1 : (Eigen::NumTraits<T>::highest() / (T(1) << 16)); + T iters = Eigen::NumTraits<T>::highest() / step; + for (T i = 0; i < iters; ++i) { + T val = i * step; + int expected_clz = ref_clz(val); + int actual_clz = Eigen::internal::clz(val); + VERIFY(expected_clz == actual_clz); + + int expected_ctz = ref_ctz(val); + int actual_ctz = Eigen::internal::ctz(val); + VERIFY(expected_ctz == actual_ctz); + } +} + +template <typename T> +void test_clz_ctz_random() { + for (int i = 0; i < 1024 * 1024; ++i) { + T val = Eigen::internal::random<T>(); + int expected_clz = ref_clz(val); + int actual_clz = Eigen::internal::clz(val); + VERIFY(expected_clz == actual_clz); + + int expected_ctz = ref_ctz(val); + int actual_ctz = Eigen::internal::ctz(val); + VERIFY(expected_ctz == actual_ctz); + } +} + +EIGEN_DECLARE_TEST(clz) { + CALL_SUBTEST_1(test_clz_ctz<uint8_t>()); + CALL_SUBTEST_2(test_clz_ctz<uint16_t>()); + CALL_SUBTEST_3(test_clz_ctz<uint32_t>()); + CALL_SUBTEST_4(test_clz_ctz<uint64_t>()); + + for (int i = 0; i < g_repeat; i++) { + test_clz_ctz_random<uint32_t>(); + test_clz_ctz_random<uint64_t>(); + } +}