| // This file is part of Eigen, a lightweight C++ template library | 
 | // for linear algebra. | 
 | // | 
 | // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> | 
 | // | 
 | // This Source Code Form is subject to the terms of the Mozilla | 
 | // Public License v. 2.0. If a copy of the MPL was not distributed | 
 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
 |  | 
 | #ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H | 
 | #define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H | 
 |  | 
 | #ifdef EIGEN_AVOID_THREAD_LOCAL | 
 |  | 
 | #ifdef EIGEN_THREAD_LOCAL | 
 | #undef EIGEN_THREAD_LOCAL | 
 | #endif | 
 |  | 
 | #else | 
 |  | 
 | #if EIGEN_MAX_CPP_VER >= 11 &&                         \ | 
 |     ((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \ | 
 |      __has_feature(cxx_thread_local)                || \ | 
 |      (EIGEN_COMP_MSVC >= 1900) ) | 
 | #define EIGEN_THREAD_LOCAL static thread_local | 
 | #endif | 
 |  | 
 | // Disable TLS for Apple and Android builds with older toolchains. | 
 | #if defined(__APPLE__) | 
 | // Included for TARGET_OS_IPHONE, __IPHONE_OS_VERSION_MIN_REQUIRED, | 
 | // __IPHONE_8_0. | 
 | #include <Availability.h> | 
 | #include <TargetConditionals.h> | 
 | #endif | 
 | // Checks whether C++11's `thread_local` storage duration specifier is | 
 | // supported. | 
 | #if defined(__apple_build_version__) &&     \ | 
 |     ((__apple_build_version__ < 8000042) || \ | 
 |      (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0)) | 
 | // Notes: Xcode's clang did not support `thread_local` until version | 
 | // 8, and even then not for all iOS < 9.0. | 
 | #undef EIGEN_THREAD_LOCAL | 
 |  | 
 | #elif defined(__ANDROID__) && EIGEN_COMP_CLANG | 
 | // There are platforms for which TLS should not be used even though the compiler | 
 | // makes it seem like it's supported (Android NDK < r12b for example). | 
 | // This is primarily because of linker problems and toolchain misconfiguration: | 
 | // TLS isn't supported until NDK r12b per | 
 | // https://developer.android.com/ndk/downloads/revision_history.html | 
 | // Since NDK r16, `__NDK_MAJOR__` and `__NDK_MINOR__` are defined in | 
 | // <android/ndk-version.h>. For NDK < r16, users should define these macros, | 
 | // e.g. `-D__NDK_MAJOR__=11 -D__NKD_MINOR__=0` for NDK r11. | 
 | #if __has_include(<android/ndk-version.h>) | 
 | #include <android/ndk-version.h> | 
 | #endif  // __has_include(<android/ndk-version.h>) | 
 | #if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \ | 
 |     defined(__NDK_MINOR__) &&                                               \ | 
 |     ((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1))) | 
 | #undef EIGEN_THREAD_LOCAL | 
 | #endif | 
 | #endif  // defined(__ANDROID__) && defined(__clang__) | 
 |  | 
 | #endif  // EIGEN_AVOID_THREAD_LOCAL | 
 |  | 
 | #include "./InternalHeaderCheck.h" | 
 |  | 
 | namespace Eigen { | 
 |  | 
 | namespace internal { | 
 | template <typename T> | 
 | struct ThreadLocalNoOpInitialize { | 
 |   void operator()(T&) const {} | 
 | }; | 
 |  | 
 | template <typename T> | 
 | struct ThreadLocalNoOpRelease { | 
 |   void operator()(T&) const {} | 
 | }; | 
 |  | 
 | }  // namespace internal | 
 |  | 
 | // Thread local container for elements of type T, that does not use thread local | 
 | // storage. As long as the number of unique threads accessing this storage | 
 | // is smaller than `capacity_`, it is lock-free and wait-free. Otherwise it will | 
 | // use a mutex for synchronization. | 
 | // | 
 | // Type `T` has to be default constructible, and by default each thread will get | 
 | // a default constructed value. It is possible to specify custom `initialize` | 
 | // callable, that will be called lazily from each thread accessing this object, | 
 | // and will be passed a default initialized object of type `T`. Also it's | 
 | // possible to pass a custom `release` callable, that will be invoked before | 
 | // calling ~T(). | 
 | // | 
 | // Example: | 
 | // | 
 | //   struct Counter { | 
 | //     int value = 0; | 
 | //   } | 
 | // | 
 | //   Eigen::ThreadLocal<Counter> counter(10); | 
 | // | 
 | //   // Each thread will have access to it's own counter object. | 
 | //   Counter& cnt = counter.local(); | 
 | //   cnt++; | 
 | // | 
 | // WARNING: Eigen::ThreadLocal uses the OS-specific value returned by | 
 | // std::this_thread::get_id() to identify threads. This value is not guaranteed | 
 | // to be unique except for the life of the thread. A newly created thread may | 
 | // get an OS-specific ID equal to that of an already destroyed thread. | 
 | // | 
 | // Somewhat similar to TBB thread local storage, with similar restrictions: | 
 | // https://www.threadingbuildingblocks.org/docs/help/reference/thread_local_storage/enumerable_thread_specific_cls.html | 
 | // | 
 | template <typename T, | 
 |           typename Initialize = internal::ThreadLocalNoOpInitialize<T>, | 
 |           typename Release = internal::ThreadLocalNoOpRelease<T>> | 
 | class ThreadLocal { | 
 |   // We preallocate default constructed elements in MaxSizedVector. | 
 |   static_assert(std::is_default_constructible<T>::value, | 
 |                 "ThreadLocal data type must be default constructible"); | 
 |  | 
 |  public: | 
 |   explicit ThreadLocal(int capacity) | 
 |       : ThreadLocal(capacity, internal::ThreadLocalNoOpInitialize<T>(), | 
 |                     internal::ThreadLocalNoOpRelease<T>()) {} | 
 |  | 
 |   ThreadLocal(int capacity, Initialize initialize) | 
 |       : ThreadLocal(capacity, std::move(initialize), | 
 |                     internal::ThreadLocalNoOpRelease<T>()) {} | 
 |  | 
 |   ThreadLocal(int capacity, Initialize initialize, Release release) | 
 |       : initialize_(std::move(initialize)), | 
 |         release_(std::move(release)), | 
 |         capacity_(capacity), | 
 |         data_(capacity_), | 
 |         ptr_(capacity_), | 
 |         filled_records_(0) { | 
 |     eigen_assert(capacity_ >= 0); | 
 |     data_.resize(capacity_); | 
 |     for (int i = 0; i < capacity_; ++i) { | 
 |       ptr_.emplace_back(nullptr); | 
 |     } | 
 |   } | 
 |  | 
 |   T& local() { | 
 |     std::thread::id this_thread = std::this_thread::get_id(); | 
 |     if (capacity_ == 0) return SpilledLocal(this_thread); | 
 |  | 
 |     std::size_t h = std::hash<std::thread::id>()(this_thread); | 
 |     const int start_idx = h % capacity_; | 
 |  | 
 |     // NOTE: From the definition of `std::this_thread::get_id()` it is | 
 |     // guaranteed that we never can have concurrent insertions with the same key | 
 |     // to our hash-map like data structure. If we didn't find an element during | 
 |     // the initial traversal, it's guaranteed that no one else could have | 
 |     // inserted it while we are in this function. This allows to massively | 
 |     // simplify out lock-free insert-only hash map. | 
 |  | 
 |     // Check if we already have an element for `this_thread`. | 
 |     int idx = start_idx; | 
 |     while (ptr_[idx].load() != nullptr) { | 
 |       ThreadIdAndValue& record = *(ptr_[idx].load()); | 
 |       if (record.thread_id == this_thread) return record.value; | 
 |  | 
 |       idx += 1; | 
 |       if (idx >= capacity_) idx -= capacity_; | 
 |       if (idx == start_idx) break; | 
 |     } | 
 |  | 
 |     // If we are here, it means that we found an insertion point in lookup | 
 |     // table at `idx`, or we did a full traversal and table is full. | 
 |  | 
 |     // If lock-free storage is full, fallback on mutex. | 
 |     if (filled_records_.load() >= capacity_) return SpilledLocal(this_thread); | 
 |  | 
 |     // We double check that we still have space to insert an element into a lock | 
 |     // free storage. If old value in `filled_records_` is larger than the | 
 |     // records capacity, it means that some other thread added an element while | 
 |     // we were traversing lookup table. | 
 |     int insertion_index = | 
 |         filled_records_.fetch_add(1, std::memory_order_relaxed); | 
 |     if (insertion_index >= capacity_) return SpilledLocal(this_thread); | 
 |  | 
 |     // At this point it's guaranteed that we can access to | 
 |     // data_[insertion_index_] without a data race. | 
 |     data_[insertion_index].thread_id = this_thread; | 
 |     initialize_(data_[insertion_index].value); | 
 |  | 
 |     // That's the pointer we'll put into the lookup table. | 
 |     ThreadIdAndValue* inserted = &data_[insertion_index]; | 
 |  | 
 |     // We'll use nullptr pointer to ThreadIdAndValue in a compare-and-swap loop. | 
 |     ThreadIdAndValue* empty = nullptr; | 
 |  | 
 |     // Now we have to find an insertion point into the lookup table. We start | 
 |     // from the `idx` that was identified as an insertion point above, it's | 
 |     // guaranteed that we will have an empty record somewhere in a lookup table | 
 |     // (because we created a record in the `data_`). | 
 |     const int insertion_idx = idx; | 
 |  | 
 |     do { | 
 |       // Always start search from the original insertion candidate. | 
 |       idx = insertion_idx; | 
 |       while (ptr_[idx].load() != nullptr) { | 
 |         idx += 1; | 
 |         if (idx >= capacity_) idx -= capacity_; | 
 |         // If we did a full loop, it means that we don't have any free entries | 
 |         // in the lookup table, and this means that something is terribly wrong. | 
 |         eigen_assert(idx != insertion_idx); | 
 |       } | 
 |       // Atomic CAS of the pointer guarantees that any other thread, that will | 
 |       // follow this pointer will see all the mutations in the `data_`. | 
 |     } while (!ptr_[idx].compare_exchange_weak(empty, inserted)); | 
 |  | 
 |     return inserted->value; | 
 |   } | 
 |  | 
 |   // WARN: It's not thread safe to call it concurrently with `local()`. | 
 |   void ForEach(std::function<void(std::thread::id, T&)> f) { | 
 |     // Reading directly from `data_` is unsafe, because only CAS to the | 
 |     // record in `ptr_` makes all changes visible to other threads. | 
 |     for (auto& ptr : ptr_) { | 
 |       ThreadIdAndValue* record = ptr.load(); | 
 |       if (record == nullptr) continue; | 
 |       f(record->thread_id, record->value); | 
 |     } | 
 |  | 
 |     // We did not spill into the map based storage. | 
 |     if (filled_records_.load(std::memory_order_relaxed) < capacity_) return; | 
 |  | 
 |     // Adds a happens before edge from the last call to SpilledLocal(). | 
 |     std::unique_lock<std::mutex> lock(mu_); | 
 |     for (auto& kv : per_thread_map_) { | 
 |       f(kv.first, kv.second); | 
 |     } | 
 |   } | 
 |  | 
 |   // WARN: It's not thread safe to call it concurrently with `local()`. | 
 |   ~ThreadLocal() { | 
 |     // Reading directly from `data_` is unsafe, because only CAS to the record | 
 |     // in `ptr_` makes all changes visible to other threads. | 
 |     for (auto& ptr : ptr_) { | 
 |       ThreadIdAndValue* record = ptr.load(); | 
 |       if (record == nullptr) continue; | 
 |       release_(record->value); | 
 |     } | 
 |  | 
 |     // We did not spill into the map based storage. | 
 |     if (filled_records_.load(std::memory_order_relaxed) < capacity_) return; | 
 |  | 
 |     // Adds a happens before edge from the last call to SpilledLocal(). | 
 |     std::unique_lock<std::mutex> lock(mu_); | 
 |     for (auto& kv : per_thread_map_) { | 
 |       release_(kv.second); | 
 |     } | 
 |   } | 
 |  | 
 |  private: | 
 |   struct ThreadIdAndValue { | 
 |     std::thread::id thread_id; | 
 |     T value; | 
 |   }; | 
 |  | 
 |   // Use unordered map guarded by a mutex when lock free storage is full. | 
 |   T& SpilledLocal(std::thread::id this_thread) { | 
 |     std::unique_lock<std::mutex> lock(mu_); | 
 |  | 
 |     auto it = per_thread_map_.find(this_thread); | 
 |     if (it == per_thread_map_.end()) { | 
 |       auto result = per_thread_map_.emplace(this_thread, T()); | 
 |       eigen_assert(result.second); | 
 |       initialize_((*result.first).second); | 
 |       return (*result.first).second; | 
 |     } else { | 
 |       return it->second; | 
 |     } | 
 |   } | 
 |  | 
 |   Initialize initialize_; | 
 |   Release release_; | 
 |   const int capacity_; | 
 |  | 
 |   // Storage that backs lock-free lookup table `ptr_`. Records stored in this | 
 |   // storage contiguously starting from index 0. | 
 |   MaxSizeVector<ThreadIdAndValue> data_; | 
 |  | 
 |   // Atomic pointers to the data stored in `data_`. Used as a lookup table for | 
 |   // linear probing hash map (https://en.wikipedia.org/wiki/Linear_probing). | 
 |   MaxSizeVector<std::atomic<ThreadIdAndValue*>> ptr_; | 
 |  | 
 |   // Number of records stored in the `data_`. | 
 |   std::atomic<int> filled_records_; | 
 |  | 
 |   // We fallback on per thread map if lock-free storage is full. In practice | 
 |   // this should never happen, if `capacity_` is a reasonable estimate of the | 
 |   // number of threads running in a system. | 
 |   std::mutex mu_;  // Protects per_thread_map_. | 
 |   std::unordered_map<std::thread::id, T> per_thread_map_; | 
 | }; | 
 |  | 
 | }  // namespace Eigen | 
 |  | 
 | #endif  // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H |