blob: 8cb1f7ef77863606b6343755403cbf60490d29fc [file]
#include <benchmark/benchmark.h>
#include <Eigen/SVD>
using namespace Eigen;
// Benchmark the UpperBidiagonalization reduction in isolation,
// covering both the unblocked and blocked code paths, and
// sweeping the block size parameter for tuning.
template <typename Scalar>
using Mat = Matrix<Scalar, Dynamic, Dynamic>;
// ---------- Blocked (default) path ----------
template <typename Scalar>
static void BM_UpperBidiag_Blocked(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
Mat<Scalar> A = Mat<Scalar>::Random(rows, cols);
internal::UpperBidiagonalization<Mat<Scalar>> ubd(rows, cols);
for (auto _ : state) {
ubd.compute(A);
benchmark::DoNotOptimize(ubd.bidiagonal().toDenseMatrix().data());
}
state.SetItemsProcessed(state.iterations());
}
// ---------- Unblocked path ----------
template <typename Scalar>
static void BM_UpperBidiag_Unblocked(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
Mat<Scalar> A = Mat<Scalar>::Random(rows, cols);
internal::UpperBidiagonalization<Mat<Scalar>> ubd(rows, cols);
for (auto _ : state) {
ubd.computeUnblocked(A);
benchmark::DoNotOptimize(ubd.bidiagonal().toDenseMatrix().data());
}
state.SetItemsProcessed(state.iterations());
}
// ---------- Blocked with explicit block size ----------
template <typename Scalar>
static void BM_UpperBidiag_BlockSize(benchmark::State& state) {
const Index rows = state.range(0);
const Index cols = state.range(1);
const Index blockSize = state.range(2);
Mat<Scalar> A = Mat<Scalar>::Random(rows, cols);
using RealScalar = typename NumTraits<Scalar>::Real;
using BidiagType = internal::BandMatrix<RealScalar, Dynamic, Dynamic, 1, 0, RowMajor>;
Mat<Scalar> householder(rows, cols);
BidiagType bidiagonal(cols, cols);
for (auto _ : state) {
householder = A;
internal::upperbidiagonalization_inplace_blocked(householder, bidiagonal, blockSize);
benchmark::DoNotOptimize(bidiagonal.toDenseMatrix().data());
}
state.SetItemsProcessed(state.iterations());
state.SetLabel("bs=" + std::to_string(blockSize));
}
// ---------- Size configurations ----------
// clang-format off
// Small sizes: always hit the unblocked path.
#define SMALL_SIZES \
->Args({4, 4})->Args({8, 8})->Args({16, 16})->Args({32, 32}) \
->Args({16, 4})->Args({32, 8})->Args({32, 16})
// Medium sizes: exercise the blocked path.
#define MEDIUM_SIZES \
->Args({64, 64})->Args({128, 128})->Args({256, 256})->Args({512, 512}) \
->Args({256, 64})->Args({512, 64})->Args({512, 128})
// Large sizes: dominated by the blocked GEMM updates.
#define LARGE_SIZES \
->Args({1024, 1024})->Args({2048, 2048})->Args({4096, 4096}) \
->Args({2048, 256})->Args({4096, 256})->Args({4096, 1024})
// Block size sweep: {rows, cols, blockSize}
// Square matrices from 128 to 4096, block sizes 16..96.
#define BLOCKSIZE_SWEEP \
->Args({128, 128, 16})->Args({128, 128, 32})->Args({128, 128, 48})->Args({128, 128, 64})->Args({128, 128, 96}) \
->Args({256, 256, 16})->Args({256, 256, 32})->Args({256, 256, 48})->Args({256, 256, 64})->Args({256, 256, 96}) \
->Args({512, 512, 16})->Args({512, 512, 32})->Args({512, 512, 48})->Args({512, 512, 64})->Args({512, 512, 96}) \
->Args({1024, 1024, 16})->Args({1024, 1024, 32})->Args({1024, 1024, 48})->Args({1024, 1024, 64})->Args({1024, 1024, 96}) \
->Args({2048, 2048, 16})->Args({2048, 2048, 32})->Args({2048, 2048, 48})->Args({2048, 2048, 64})->Args({2048, 2048, 96}) \
->Args({4096, 4096, 16})->Args({4096, 4096, 32})->Args({4096, 4096, 48})->Args({4096, 4096, 64})->Args({4096, 4096, 96})
// Tall-skinny block size sweep.
#define BLOCKSIZE_SWEEP_TALL \
->Args({1024, 128, 16})->Args({1024, 128, 32})->Args({1024, 128, 48})->Args({1024, 128, 64}) \
->Args({4096, 256, 16})->Args({4096, 256, 32})->Args({4096, 256, 48})->Args({4096, 256, 64}) \
->Args({4096, 1024, 16})->Args({4096, 1024, 32})->Args({4096, 1024, 48})->Args({4096, 1024, 64})
// ---------- Register benchmarks ----------
// Blocked — float
BENCHMARK(BM_UpperBidiag_Blocked<float>) SMALL_SIZES ->Name("Bidiag_Blocked_float_Small");
BENCHMARK(BM_UpperBidiag_Blocked<float>) MEDIUM_SIZES ->Name("Bidiag_Blocked_float_Medium");
BENCHMARK(BM_UpperBidiag_Blocked<float>) LARGE_SIZES ->Name("Bidiag_Blocked_float_Large");
// Blocked — double
BENCHMARK(BM_UpperBidiag_Blocked<double>) SMALL_SIZES ->Name("Bidiag_Blocked_double_Small");
BENCHMARK(BM_UpperBidiag_Blocked<double>) MEDIUM_SIZES ->Name("Bidiag_Blocked_double_Medium");
BENCHMARK(BM_UpperBidiag_Blocked<double>) LARGE_SIZES ->Name("Bidiag_Blocked_double_Large");
// Unblocked — float
BENCHMARK(BM_UpperBidiag_Unblocked<float>) SMALL_SIZES ->Name("Bidiag_Unblocked_float_Small");
BENCHMARK(BM_UpperBidiag_Unblocked<float>) MEDIUM_SIZES ->Name("Bidiag_Unblocked_float_Medium");
// Unblocked — double
BENCHMARK(BM_UpperBidiag_Unblocked<double>) SMALL_SIZES ->Name("Bidiag_Unblocked_double_Small");
BENCHMARK(BM_UpperBidiag_Unblocked<double>) MEDIUM_SIZES ->Name("Bidiag_Unblocked_double_Medium");
// Block size sweep — double
BENCHMARK(BM_UpperBidiag_BlockSize<double>) BLOCKSIZE_SWEEP ->Name("Bidiag_BS_double_Square");
BENCHMARK(BM_UpperBidiag_BlockSize<double>) BLOCKSIZE_SWEEP_TALL ->Name("Bidiag_BS_double_Tall");
// Block size sweep — float
BENCHMARK(BM_UpperBidiag_BlockSize<float>) BLOCKSIZE_SWEEP ->Name("Bidiag_BS_float_Square");
BENCHMARK(BM_UpperBidiag_BlockSize<float>) BLOCKSIZE_SWEEP_TALL ->Name("Bidiag_BS_float_Tall");
#undef SMALL_SIZES
#undef MEDIUM_SIZES
#undef LARGE_SIZES
#undef BLOCKSIZE_SWEEP
#undef BLOCKSIZE_SWEEP_TALL
// clang-format on