blob: a57966e8cc2caeb3ecea42e75b442e0c4445f6fc [file] [log] [blame]
// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
// possible options:
// -DEIGEN_DONT_VECTORIZE
// -msse2
// #define EIGEN_DEFAULT_TO_ROW_MAJOR
#define _FLOAT
#include <iostream>
#include <Eigen/Core>
#include "BenchTimer.h"
// include the BLAS headers
extern "C" {
#include <cblas.h>
}
#include <string>
#ifdef _FLOAT
typedef float Scalar;
#define CBLAS_GEMM cblas_sgemm
#else
typedef double Scalar;
#define CBLAS_GEMM cblas_dgemm
#endif
typedef Eigen::Matrix<Scalar, Eigen::Dynamic, Eigen::Dynamic> MyMatrix;
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
void check_product(int M, int N, int K);
void check_product(void);
int main(int argc, char* argv[]) {
// disable SSE exceptions
#ifdef __GNUC__
{
int aux;
asm("stmxcsr %[aux] \n\t"
"orl $32832, %[aux] \n\t"
"ldmxcsr %[aux] \n\t"
:
: [aux] "m"(aux));
}
#endif
int nbtries = 1, nbloops = 1, M, N, K;
if (argc == 2) {
if (std::string(argv[1]) == "check")
check_product();
else
M = N = K = atoi(argv[1]);
} else if ((argc == 3) && (std::string(argv[1]) == "auto")) {
M = N = K = atoi(argv[2]);
nbloops = 1000000000 / (M * M * M);
if (nbloops < 1) nbloops = 1;
nbtries = 6;
} else if (argc == 4) {
M = N = K = atoi(argv[1]);
nbloops = atoi(argv[2]);
nbtries = atoi(argv[3]);
} else if (argc == 6) {
M = atoi(argv[1]);
N = atoi(argv[2]);
K = atoi(argv[3]);
nbloops = atoi(argv[4]);
nbtries = atoi(argv[5]);
} else {
std::cout << "Usage: " << argv[0] << " size \n";
std::cout << "Usage: " << argv[0] << " auto size\n";
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
std::cout << "Usage: " << argv[0] << " check\n";
std::cout << "Options:\n";
std::cout << " size unique size of the 2 matrices (integer)\n";
std::cout << " auto automatically set the number of repetitions and tries\n";
std::cout << " nbloops number of times the GEMM routines is executed\n";
std::cout << " nbtries number of times the loop is benched (return the best try)\n";
std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
std::cout << " check check eigen product using cblas as a reference\n";
exit(1);
}
double nbmad = double(M) * double(N) * double(K) * double(nbloops);
if (!(std::string(argv[1]) == "auto")) std::cout << M << " x " << N << " x " << K << "\n";
Scalar alpha, beta;
MyMatrix ma(M, K), mb(K, N), mc(M, N);
ma = MyMatrix::Random(M, K);
mb = MyMatrix::Random(K, N);
mc = MyMatrix::Random(M, N);
Eigen::BenchTimer timer;
// we simply compute c += a*b, so:
alpha = 1;
beta = 1;
// bench cblas
// ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
if (!(std::string(argv[1]) == "auto")) {
timer.reset();
for (uint k = 0; k < nbtries; ++k) {
timer.start();
for (uint j = 0; j < nbloops; ++j)
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta,
mc.data(), N);
#else
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta,
mc.data(), M);
#endif
timer.stop();
}
if (!(std::string(argv[1]) == "auto"))
std::cout << "cblas: " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
else
std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
}
// clear
ma = MyMatrix::Random(M, K);
mb = MyMatrix::Random(K, N);
mc = MyMatrix::Random(M, N);
// eigen
// if (!(std::string(argv[1])=="auto"))
{
timer.reset();
for (uint k = 0; k < nbtries; ++k) {
timer.start();
bench_eigengemm(mc, ma, mb, nbloops);
timer.stop();
}
if (!(std::string(argv[1]) == "auto"))
std::cout << "eigen : " << timer.value() << " (" << 1e-3 * floor(1e-6 * nbmad / timer.value()) << " GFlops/s)\n";
else
std::cout << M << " : " << timer.value() << " ; " << 1e-3 * floor(1e-6 * nbmad / timer.value()) << "\n";
}
std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
return 0;
}
using namespace Eigen;
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops) {
for (uint j = 0; j < nbloops; ++j) mc.noalias() += ma * mb;
}
#define MYVERIFY(A, M) \
if (!(A)) { \
std::cout << "FAIL: " << M << "\n"; \
}
void check_product(int M, int N, int K) {
MyMatrix ma(M, K), mb(K, N), mc(M, N), maT(K, M), mbT(N, K), meigen(M, N), mref(M, N);
ma = MyMatrix::Random(M, K);
mb = MyMatrix::Random(K, N);
maT = ma.transpose();
mbT = mb.transpose();
mc = MyMatrix::Random(M, N);
MyMatrix::Scalar eps = 1e-4;
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
meigen += ma * mb;
MYVERIFY(meigen.isApprox(mref, eps), ". * .");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
meigen += maT.transpose() * mb;
MYVERIFY(meigen.isApprox(mref, eps), "T * .");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
meigen += (maT.transpose()) * (mbT.transpose());
MYVERIFY(meigen.isApprox(mref, eps), "T * T");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
meigen += ma * mbT.transpose();
MYVERIFY(meigen.isApprox(mref, eps), ". * T");
}
void check_product(void) {
int M, N, K;
for (uint i = 0; i < 1000; ++i) {
M = internal::random<int>(1, 64);
N = internal::random<int>(1, 768);
K = internal::random<int>(1, 768);
M = (0 + M) * 1;
std::cout << M << " x " << N << " x " << K << "\n";
check_product(M, N, K);
}
}