| #define EIGEN_USE_THREADS |
| |
| #include <string> |
| |
| #include "tensor_benchmarks.h" |
| |
| #define CREATE_THREAD_POOL(threads) \ |
| Eigen::ThreadPool pool(threads); \ |
| Eigen::ThreadPoolDevice device(&pool, threads); |
| |
| // Contractions for number of threads ranging from 1 to 32 |
| // Dimensions are Rows, Cols, Depth |
| #define BM_ContractionCPU(D1, D2, D3) \ |
| static void BM_##Contraction##_##D1##x##D2##x##D3(int iters, int Threads) { \ |
| StopBenchmarkTiming(); \ |
| CREATE_THREAD_POOL(Threads); \ |
| BenchmarkSuite<Eigen::ThreadPoolDevice, float> suite(device, D1, D2, D3); \ |
| suite.contraction(iters); \ |
| } \ |
| BENCHMARK_RANGE(BM_##Contraction##_##D1##x##D2##x##D3, 1, 32); |
| |
| // Vector Matrix and Matrix Vector products |
| BM_ContractionCPU(1, 2000, 500); |
| BM_ContractionCPU(2000, 1, 500); |
| |
| // Various skinny matrices |
| BM_ContractionCPU(250, 3, 512); |
| BM_ContractionCPU(1500, 3, 512); |
| |
| BM_ContractionCPU(512, 800, 4); |
| BM_ContractionCPU(512, 80, 800); |
| BM_ContractionCPU(512, 80, 13522); |
| BM_ContractionCPU(1, 80, 13522); |
| |
| BM_ContractionCPU(3200, 512, 4); |
| BM_ContractionCPU(3200, 512, 80); |
| BM_ContractionCPU(3200, 80, 512); |