|  | // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas | 
|  | // possible options: | 
|  | //    -DEIGEN_DONT_VECTORIZE | 
|  | //    -msse2 | 
|  |  | 
|  | // #define EIGEN_DEFAULT_TO_ROW_MAJOR | 
|  | #define _FLOAT | 
|  |  | 
|  | #include <iostream> | 
|  |  | 
|  | #include <Eigen/Core> | 
|  | #include "BenchTimer.h" | 
|  |  | 
|  | // include the BLAS headers | 
|  | extern "C" { | 
|  | #include <cblas.h> | 
|  | } | 
|  | #include <string> | 
|  |  | 
|  | #ifdef _FLOAT | 
|  | typedef float Scalar; | 
|  | #define CBLAS_GEMM cblas_sgemm | 
|  | #else | 
|  | typedef double Scalar; | 
|  | #define CBLAS_GEMM cblas_dgemm | 
|  | #endif | 
|  |  | 
|  |  | 
|  | typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix; | 
|  | void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); | 
|  | void check_product(int M, int N, int K); | 
|  | void check_product(void); | 
|  |  | 
|  | int main(int argc, char *argv[]) | 
|  | { | 
|  | // disable SSE exceptions | 
|  | #ifdef __GNUC__ | 
|  | { | 
|  | int aux; | 
|  | asm( | 
|  | "stmxcsr   %[aux]           \n\t" | 
|  | "orl       $32832, %[aux]   \n\t" | 
|  | "ldmxcsr   %[aux]           \n\t" | 
|  | : : [aux] "m" (aux)); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | int nbtries=1, nbloops=1, M, N, K; | 
|  |  | 
|  | if (argc==2) | 
|  | { | 
|  | if (std::string(argv[1])=="check") | 
|  | check_product(); | 
|  | else | 
|  | M = N = K = atoi(argv[1]); | 
|  | } | 
|  | else if ((argc==3) && (std::string(argv[1])=="auto")) | 
|  | { | 
|  | M = N = K = atoi(argv[2]); | 
|  | nbloops = 1000000000/(M*M*M); | 
|  | if (nbloops<1) | 
|  | nbloops = 1; | 
|  | nbtries = 6; | 
|  | } | 
|  | else if (argc==4) | 
|  | { | 
|  | M = N = K = atoi(argv[1]); | 
|  | nbloops = atoi(argv[2]); | 
|  | nbtries = atoi(argv[3]); | 
|  | } | 
|  | else if (argc==6) | 
|  | { | 
|  | M = atoi(argv[1]); | 
|  | N = atoi(argv[2]); | 
|  | K = atoi(argv[3]); | 
|  | nbloops = atoi(argv[4]); | 
|  | nbtries = atoi(argv[5]); | 
|  | } | 
|  | else | 
|  | { | 
|  | std::cout << "Usage: " << argv[0] << " size  \n"; | 
|  | std::cout << "Usage: " << argv[0] << " auto size\n"; | 
|  | std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; | 
|  | std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; | 
|  | std::cout << "Usage: " << argv[0] << " check\n"; | 
|  | std::cout << "Options:\n"; | 
|  | std::cout << "    size       unique size of the 2 matrices (integer)\n"; | 
|  | std::cout << "    auto       automatically set the number of repetitions and tries\n"; | 
|  | std::cout << "    nbloops    number of times the GEMM routines is executed\n"; | 
|  | std::cout << "    nbtries    number of times the loop is benched (return the best try)\n"; | 
|  | std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n"; | 
|  | std::cout << "    check      check eigen product using cblas as a reference\n"; | 
|  | exit(1); | 
|  | } | 
|  |  | 
|  | double nbmad = double(M) * double(N) * double(K) * double(nbloops); | 
|  |  | 
|  | if (!(std::string(argv[1])=="auto")) | 
|  | std::cout << M << " x " << N << " x " << K << "\n"; | 
|  |  | 
|  | Scalar alpha, beta; | 
|  | MyMatrix ma(M,K), mb(K,N), mc(M,N); | 
|  | ma = MyMatrix::Random(M,K); | 
|  | mb = MyMatrix::Random(K,N); | 
|  | mc = MyMatrix::Random(M,N); | 
|  |  | 
|  | Eigen::BenchTimer timer; | 
|  |  | 
|  | // we simply compute c += a*b, so: | 
|  | alpha = 1; | 
|  | beta = 1; | 
|  |  | 
|  | // bench cblas | 
|  | // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B); | 
|  | if (!(std::string(argv[1])=="auto")) | 
|  | { | 
|  | timer.reset(); | 
|  | for (uint k=0 ; k<nbtries ; ++k) | 
|  | { | 
|  | timer.start(); | 
|  | for (uint j=0 ; j<nbloops ; ++j) | 
|  | #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR | 
|  | CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N); | 
|  | #else | 
|  | CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M); | 
|  | #endif | 
|  | timer.stop(); | 
|  | } | 
|  | if (!(std::string(argv[1])=="auto")) | 
|  | std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; | 
|  | else | 
|  | std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; | 
|  | } | 
|  |  | 
|  | // clear | 
|  | ma = MyMatrix::Random(M,K); | 
|  | mb = MyMatrix::Random(K,N); | 
|  | mc = MyMatrix::Random(M,N); | 
|  |  | 
|  | // eigen | 
|  | //   if (!(std::string(argv[1])=="auto")) | 
|  | { | 
|  | timer.reset(); | 
|  | for (uint k=0 ; k<nbtries ; ++k) | 
|  | { | 
|  | timer.start(); | 
|  | bench_eigengemm(mc, ma, mb, nbloops); | 
|  | timer.stop(); | 
|  | } | 
|  | if (!(std::string(argv[1])=="auto")) | 
|  | std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n"; | 
|  | else | 
|  | std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n"; | 
|  | } | 
|  |  | 
|  | std::cout << "l1: " << Eigen::l1CacheSize() << std::endl; | 
|  | std::cout << "l2: " << Eigen::l2CacheSize() << std::endl; | 
|  |  | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | using namespace Eigen; | 
|  |  | 
|  | void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) | 
|  | { | 
|  | for (uint j=0 ; j<nbloops ; ++j) | 
|  | mc.noalias() += ma * mb; | 
|  | } | 
|  |  | 
|  | #define MYVERIFY(A,M) if (!(A)) { \ | 
|  | std::cout << "FAIL: " << M << "\n"; \ | 
|  | } | 
|  | void check_product(int M, int N, int K) | 
|  | { | 
|  | MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N); | 
|  | ma = MyMatrix::Random(M,K); | 
|  | mb = MyMatrix::Random(K,N); | 
|  | maT = ma.transpose(); | 
|  | mbT = mb.transpose(); | 
|  | mc = MyMatrix::Random(M,N); | 
|  |  | 
|  | MyMatrix::Scalar eps = 1e-4; | 
|  |  | 
|  | meigen = mref = mc; | 
|  | CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M); | 
|  | meigen += ma * mb; | 
|  | MYVERIFY(meigen.isApprox(mref, eps),". * ."); | 
|  |  | 
|  | meigen = mref = mc; | 
|  | CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M); | 
|  | meigen += maT.transpose() * mb; | 
|  | MYVERIFY(meigen.isApprox(mref, eps),"T * ."); | 
|  |  | 
|  | meigen = mref = mc; | 
|  | CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M); | 
|  | meigen += (maT.transpose()) * (mbT.transpose()); | 
|  | MYVERIFY(meigen.isApprox(mref, eps),"T * T"); | 
|  |  | 
|  | meigen = mref = mc; | 
|  | CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M); | 
|  | meigen += ma * mbT.transpose(); | 
|  | MYVERIFY(meigen.isApprox(mref, eps),". * T"); | 
|  | } | 
|  |  | 
|  | void check_product(void) | 
|  | { | 
|  | int M, N, K; | 
|  | for (uint i=0; i<1000; ++i) | 
|  | { | 
|  | M = internal::random<int>(1,64); | 
|  | N = internal::random<int>(1,768); | 
|  | K = internal::random<int>(1,768); | 
|  | M = (0 + M) * 1; | 
|  | std::cout << M << " x " << N << " x " << K << "\n"; | 
|  | check_product(M, N, K); | 
|  | } | 
|  | } | 
|  |  |