Add a yield instruction in the two spinloops of the threaded matmul implementation.
diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index e9d0cae..ebfac01 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -97,6 +97,7 @@
         // Then, we set info->task_info[tid].users to the number of threads to mark that all other threads are going to
         // use it.
         while (info->task_info[tid].users != 0) {
+          std::this_thread::yield();
         }
         info->task_info[tid].users = threads;
 
@@ -115,6 +116,7 @@
           // However, no need to wait for the B' part which has been updated by the current thread!
           if (shift > 0) {
             while (info->task_info[i].sync != k) {
+              std::this_thread::yield();
             }
           }