| // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas |
| // possible options: |
| // -DEIGEN_DONT_VECTORIZE |
| // -msse2 |
| |
| // #define EIGEN_DEFAULT_TO_ROW_MAJOR |
| #define _FLOAT |
| |
| #include <iostream> |
| |
| #include <Eigen/Core> |
| #include "BenchTimer.h" |
| |
| // include the BLAS headers |
| extern "C" { |
| #include <cblas.h> |
| } |
| #include <string> |
| |
| #ifdef _FLOAT |
| typedef float Scalar; |
| #define CBLAS_GEMM cblas_sgemm |
| #else |
| typedef double Scalar; |
| #define CBLAS_GEMM cblas_dgemm |
| #endif |
| |
| typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix; |
| void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); |
| void check_product(int M, int N, int K); |
| void check_product(void); |
| |
| int main(int argc, char* argv[]) { |
| // disable SSE exceptions |
| #ifdef __GNUC__ |
| { |
| int aux; |
| asm("stmxcsr %[aux] \n\t" |
| "orl $32832, %[aux] \n\t" |
| "ldmxcsr %[aux] \n\t" |
| : |
| : [aux] "m"(aux)); |
| } |
| #endif |
| |
| int nbtries = 1, nbloops = 1, M, N, K; |
| |
| if (argc == 2) { |
| if (std::string(argv[1]) == "check") |
| check_product(); |
| else |
| M = N = K = atoi(argv[1]); |
| } else if ((argc == 3) && (std::string(argv[1]) == "auto")) { |
| M = N = K = atoi(argv[2]); |
| nbloops = 1000000000 / (M * M * M); |
| if (nbloops < 1) nbloops = 1; |
| nbtries = 6; |
| } else if (argc == 4) { |
| M = N = K = atoi(argv[1]); |
| nbloops = atoi(argv[2]); |
| nbtries = atoi(argv[3]); |
| } else if (argc == 6) { |
| M = atoi(argv[1]); |
| N = atoi(argv[2]); |
| K = atoi(argv[3]); |
| nbloops = atoi(argv[4]); |
| nbtries = atoi(argv[5]); |
| } else { |
| std::cout << "Usage: " << argv[0] << " size \n"; |
| std::cout << "Usage: " << argv[0] << " auto size\n"; |
| std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; |
| std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; |
| std::cout << "Usage: " << argv[0] << " check\n"; |
| std::cout << "Options:\n"; |
| std::cout << " size unique size of the 2 matrices (integer)\n"; |
| std::cout << " auto automatically set the number of repetitions and tries\n"; |
| std::cout << " nbloops number of times the GEMM routines is executed\n"; |
| std::cout << " nbtries number of times the loop is benched (return the best try)\n"; |
| std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"; |
| std::cout << " check check eigen product using cblas as a reference\n"; |
| exit(1); |
| } |
| |
| double nbmad = double(M) * double(N) * double(K) * double(nbloops); |
| |
| if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n"; |
| |
| Scalar alpha, beta; |
| MyMatrix ma(M, K), mb(K, N), mc(M, N); |
| ma = MyMatrix::Random(M, K); |
| mb = MyMatrix::Random(K, N); |
| mc = MyMatrix::Random(M, N); |
| |
| Eigen::BenchTimer timer; |
| |
| // we simply compute c += a*b, so: |
| alpha = 1; |
| beta = 1; |
| |
| // bench cblas |
| // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B); |
| if (!(std::string(argv[1]) == "auto")) { |
| timer.reset(); |
| for (uint k = 0; k < nbtries; ++k) { |
| timer.start(); |
| for (uint j = 0; j < nbloops; ++j) |
| #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR |
| CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, |
| mc.data(), N); |
| #else |
| CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, |
| mc.data(), M); |
| #endif |
| timer.stop(); |
| } |
| if (!(std::string(argv[1]) == "auto")) |
| std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n"; |
| else |
| std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n"; |
| } |
| |
| // clear |
| ma = MyMatrix::Random(M, K); |
| mb = MyMatrix::Random(K, N); |
| mc = MyMatrix::Random(M, N); |
| |
| // eigen |
| // if (!(std::string(argv[1])=="auto")) |
| { |
| timer.reset(); |
| for (uint k = 0; k < nbtries; ++k) { |
| timer.start(); |
| bench_eigengemm(mc, ma, mb, nbloops); |
| timer.stop(); |
| } |
| if (!(std::string(argv[1]) == "auto")) |
| std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n"; |
| else |
| std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n"; |
| } |
| |
| std::cout << "l1: " << Eigen::l1CacheSize() << std::endl; |
| std::cout << "l2: " << Eigen::l2CacheSize() << std::endl; |
| |
| return 0; |
| } |
| |
| using namespace Eigen; |
| |
| void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) { |
| for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb; |
| } |
| |
| #define MYVERIFY(A, M) \ |
| if (!(A)) { \ |
| std::cout << "FAIL: " << M << "\n"; \ |
| } |
| void check_product(int M, int N, int K) { |
| MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N); |
| ma = MyMatrix::Random(M, K); |
| mb = MyMatrix::Random(K, N); |
| maT = ma.transpose(); |
| mbT = mb.transpose(); |
| mc = MyMatrix::Random(M, N); |
| |
| MyMatrix::Scalar eps = 1e-4; |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M); |
| meigen += ma * mb; |
| MYVERIFY(meigen.isApprox(mref, eps), ". * ."); |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); |
| meigen += maT.transpose() * mb; |
| MYVERIFY(meigen.isApprox(mref, eps), "T * ."); |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); |
| meigen += (maT.transpose()) * (mbT.transpose()); |
| MYVERIFY(meigen.isApprox(mref, eps), "T * T"); |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); |
| meigen += ma * mbT.transpose(); |
| MYVERIFY(meigen.isApprox(mref, eps), ". * T"); |
| } |
| |
| void check_product(void) { |
| int M, N, K; |
| for (uint i = 0; i < 1000; ++i) { |
| M = internal::random<int>(1, 64); |
| N = internal::random<int>(1, 768); |
| K = internal::random<int>(1, 768); |
| M = (0 + M) * 1; |
| std::cout << M << " x " << N << " x " << K << "\n"; |
| check_product(M, N, K); |
| } |
| } |