Add a yield instruction in the two spinloops of the threaded matmul implementation.
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index e9d0cae..ebfac01 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -97,6 +97,7 @@
// Then, we set info->task_info[tid].users to the number of threads to mark that all other threads are going to
// use it.
while (info->task_info[tid].users != 0) {
+ std::this_thread::yield();
}
info->task_info[tid].users = threads;
@@ -115,6 +116,7 @@
// However, no need to wait for the B' part which has been updated by the current thread!
if (shift > 0) {
while (info->task_info[i].sync != k) {
+ std::this_thread::yield();
}
}