| |
| // g++-4.2 -O3 -DNDEBUG -I.. benchBlasGemm.cpp /usr/lib/libcblas.so.3 - o benchBlasGemm |
| // possible options: |
| // -DEIGEN_DONT_VECTORIZE |
| // -msse2 |
| |
| // #define EIGEN_DEFAULT_TO_ROW_MAJOR |
| #define _FLOAT |
| |
| #include <iostream> |
| |
| #include <Eigen/Core> |
| #include "BenchTimer.h" |
| |
| // include the BLAS headers |
| #include <cblas.h> |
| #include <string> |
| |
| #ifdef _FLOAT |
| typedef float Scalar; |
| #define CBLAS_GEMM cblas_sgemm |
| #else |
| typedef double Scalar; |
| #define CBLAS_GEMM cblas_dgemm |
| #endif |
| |
| |
| typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix; |
| void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); |
| void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); |
| void check_product(int M, int N, int K); |
| void check_product(void); |
| |
| int main(int argc, char *argv[]) |
| { |
| // disable SSE exceptions |
| #ifdef __GNUC__ |
| { |
| int aux; |
| asm( |
| "stmxcsr %[aux] \n\t" |
| "orl $32832, %[aux] \n\t" |
| "ldmxcsr %[aux] \n\t" |
| : : [aux] "m" (aux)); |
| } |
| #endif |
| |
| int nbtries=1, nbloops=1, M, N, K; |
| |
| if (argc==2) |
| { |
| if (std::string(argv[1])=="check") |
| check_product(); |
| else |
| M = N = K = atoi(argv[1]); |
| } |
| else if ((argc==3) && (std::string(argv[1])=="auto")) |
| { |
| M = N = K = atoi(argv[2]); |
| nbloops = 1000000000/(M*M*M); |
| if (nbloops<1) |
| nbloops = 1; |
| nbtries = 6; |
| } |
| else if (argc==4) |
| { |
| M = N = K = atoi(argv[1]); |
| nbloops = atoi(argv[2]); |
| nbtries = atoi(argv[3]); |
| } |
| else if (argc==6) |
| { |
| M = atoi(argv[1]); |
| N = atoi(argv[2]); |
| K = atoi(argv[3]); |
| nbloops = atoi(argv[4]); |
| nbtries = atoi(argv[5]); |
| } |
| else |
| { |
| std::cout << "Usage: " << argv[0] << " size \n"; |
| std::cout << "Usage: " << argv[0] << " auto size\n"; |
| std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; |
| std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; |
| std::cout << "Usage: " << argv[0] << " check\n"; |
| std::cout << "Options:\n"; |
| std::cout << " size unique size of the 2 matrices (integer)\n"; |
| std::cout << " auto automatically set the number of repetitions and tries\n"; |
| std::cout << " nbloops number of times the GEMM routines is executed\n"; |
| std::cout << " nbtries number of times the loop is benched (return the best try)\n"; |
| std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"; |
| std::cout << " check check eigen product using cblas as a reference\n"; |
| exit(1); |
| } |
| |
| double nbmad = double(M) * double(N) * double(K) * double(nbloops); |
| |
| if (!(std::string(argv[1])=="auto")) |
| std::cout << M << " x " << N << " x " << K << "\n"; |
| |
| Scalar alpha, beta; |
| MyMatrix ma(M,K), mb(K,N), mc(M,N); |
| ma = MyMatrix::Random(M,K); |
| mb = MyMatrix::Random(K,N); |
| mc = MyMatrix::Random(M,N); |
| |
| Eigen::BenchTimer timer; |
| |
| // we simply compute c += a*b, so: |
| alpha = 1; |
| beta = 1; |
| |
| // bench cblas |
| // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B); |
| if (!(std::string(argv[1])=="auto")) |
| { |
| timer.reset(); |
| for (uint k=0 ; k<nbtries ; ++k) |
| { |
| timer.start(); |
| for (uint j=0 ; j<nbloops ; ++j) |
| #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR |
| CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N); |
| #else |
| CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M); |
| #endif |
| timer.stop(); |
| } |
| if (!(std::string(argv[1])=="auto")) |
| std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; |
| else |
| std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; |
| } |
| |
| // clear |
| ma = MyMatrix::Random(M,K); |
| mb = MyMatrix::Random(K,N); |
| mc = MyMatrix::Random(M,N); |
| |
| // eigen |
| // if (!(std::string(argv[1])=="auto")) |
| { |
| timer.reset(); |
| for (uint k=0 ; k<nbtries ; ++k) |
| { |
| timer.start(); |
| bench_eigengemm(mc, ma, mb, nbloops); |
| timer.stop(); |
| } |
| if (!(std::string(argv[1])=="auto")) |
| std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; |
| else |
| std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; |
| } |
| |
| // clear |
| ma = MyMatrix::Random(M,K); |
| mb = MyMatrix::Random(K,N); |
| mc = MyMatrix::Random(M,N); |
| |
| // eigen normal |
| if (!(std::string(argv[1])=="auto")) |
| { |
| timer.reset(); |
| for (uint k=0 ; k<nbtries ; ++k) |
| { |
| timer.start(); |
| bench_eigengemm_normal(mc, ma, mb, nbloops); |
| timer.stop(); |
| } |
| std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; |
| } |
| |
| return 0; |
| } |
| |
| using namespace Eigen; |
| |
| void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) |
| { |
| for (uint j=0 ; j<nbloops ; ++j) |
| mc.noalias() += ma * mb; |
| } |
| |
| void bench_eigengemm_normal(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) |
| { |
| for (uint j=0 ; j<nbloops ; ++j) |
| mc.noalias() += GeneralProduct<MyMatrix,MyMatrix,UnrolledProduct>(ma,mb); |
| } |
| |
| #define MYVERIFY(A,M) if (!(A)) { \ |
| std::cout << "FAIL: " << M << "\n"; \ |
| } |
| void check_product(int M, int N, int K) |
| { |
| MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N); |
| ma = MyMatrix::Random(M,K); |
| mb = MyMatrix::Random(K,N); |
| maT = ma.transpose(); |
| mbT = mb.transpose(); |
| mc = MyMatrix::Random(M,N); |
| |
| MyMatrix::Scalar eps = 1e-4; |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M); |
| meigen += ma * mb; |
| MYVERIFY(meigen.isApprox(mref, eps),". * ."); |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); |
| meigen += maT.transpose() * mb; |
| MYVERIFY(meigen.isApprox(mref, eps),"T * ."); |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); |
| meigen += (maT.transpose()) * (mbT.transpose()); |
| MYVERIFY(meigen.isApprox(mref, eps),"T * T"); |
| |
| meigen = mref = mc; |
| CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); |
| meigen += ma * mbT.transpose(); |
| MYVERIFY(meigen.isApprox(mref, eps),". * T"); |
| } |
| |
| void check_product(void) |
| { |
| int M, N, K; |
| for (uint i=0; i<1000; ++i) |
| { |
| M = ei_random<int>(1,64); |
| N = ei_random<int>(1,768); |
| K = ei_random<int>(1,768); |
| M = (0 + M) * 1; |
| std::cout << M << " x " << N << " x " << K << "\n"; |
| check_product(M, N, K); |
| } |
| } |
| |