CI: fix bench builds (drop Apply/internal::Benchmark*) + relax eigensolver_selfadjoint tolerance libeigen/eigen!2507 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
diff --git a/benchmarks/Householder/bench_householder.cpp b/benchmarks/Householder/bench_householder.cpp index 61d39ed..047b27b 100644 --- a/benchmarks/Householder/bench_householder.cpp +++ b/benchmarks/Householder/bench_householder.cpp
@@ -263,108 +263,89 @@ } // ============================================================================= -// Size configurations +// Size configurations: chained ->Arg / ->Args macros applied at registration. // ============================================================================= -static void VectorSizes(::benchmark::Benchmark* b) { - for (int n : {8, 16, 32, 64, 128, 256, 512, 1024, 4096}) b->Arg(n); -} +// clang-format off +#define VECTOR_SIZES \ + ->Arg(8)->Arg(16)->Arg(32)->Arg(64)->Arg(128)->Arg(256)->Arg(512)->Arg(1024)->Arg(4096) -static void SquareSizes(::benchmark::Benchmark* b) { - for (int n : {32, 48, 64, 80, 96, 112, 128, 160, 192, 256, 384, 512, 768, 1024}) b->Args({n, n}); -} +#define SQUARE_SIZES \ + ->Args({32, 32})->Args({48, 48})->Args({64, 64})->Args({80, 80})->Args({96, 96}) \ + ->Args({112, 112})->Args({128, 128})->Args({160, 160})->Args({192, 192})->Args({256, 256}) \ + ->Args({384, 384})->Args({512, 512})->Args({768, 768})->Args({1024, 1024}) // Fine-grained sizes around the blocking threshold to find the crossover point. -static void SquareSizesFine(::benchmark::Benchmark* b) { - for (int n : {32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 160, 192, 256}) b->Args({n, n}); -} +#define SQUARE_SIZES_FINE \ + ->Args({32, 32})->Args({40, 40})->Args({48, 48})->Args({56, 56})->Args({64, 64}) \ + ->Args({72, 72})->Args({80, 80})->Args({88, 88})->Args({96, 96})->Args({112, 112}) \ + ->Args({128, 128})->Args({160, 160})->Args({192, 192})->Args({256, 256}) // Rectangular: many rows, fewer columns (m_length = cols, dst is rows x rows). -static void RectApplyRight(::benchmark::Benchmark* b) { - // Square - for (int n : {48, 64, 96, 128, 256, 512, 1024}) b->Args({n, n}); - // Wide dst * narrow Q: dst is (rows x rows), Q is (cols x cols), so rows > cols. - b->Args({256, 64}); - b->Args({256, 128}); - b->Args({512, 64}); - b->Args({512, 128}); - b->Args({1024, 64}); - b->Args({1024, 128}); - b->Args({1024, 256}); -} +// Wide dst * narrow Q: dst is (rows x rows), Q is (cols x cols), so rows > cols. +#define RECT_APPLY_RIGHT_SIZES \ + ->Args({48, 48})->Args({64, 64})->Args({96, 96})->Args({128, 128}) \ + ->Args({256, 256})->Args({512, 512})->Args({1024, 1024}) \ + ->Args({256, 64})->Args({256, 128}) \ + ->Args({512, 64})->Args({512, 128}) \ + ->Args({1024, 64})->Args({1024, 128})->Args({1024, 256}) -static void RectSizes(::benchmark::Benchmark* b) { - // Square - for (int n : {32, 64, 128, 256, 512, 1024}) b->Args({n, n}); - // Tall-thin - b->Args({1000, 32}); - b->Args({1000, 100}); - b->Args({10000, 32}); - b->Args({10000, 100}); -} +// Square plus tall-thin shapes. +#define RECT_SIZES \ + ->Args({32, 32})->Args({64, 64})->Args({128, 128}) \ + ->Args({256, 256})->Args({512, 512})->Args({1024, 1024}) \ + ->Args({1000, 32})->Args({1000, 100})->Args({10000, 32})->Args({10000, 100}) -static void BlockSizes(::benchmark::Benchmark* b) { - for (int n : {64, 128, 256, 512, 1024}) { - b->Args({n, n}); - b->Args({n, 32}); - } -} +#define BLOCK_SIZES \ + ->Args({64, 64})->Args({64, 32}) \ + ->Args({128, 128})->Args({128, 32}) \ + ->Args({256, 256})->Args({256, 32}) \ + ->Args({512, 512})->Args({512, 32}) \ + ->Args({1024, 1024})->Args({1024, 32}) +// clang-format on // ============================================================================= // Register benchmarks: float // ============================================================================= -BENCHMARK(BM_MakeHouseholderInPlace<float>)->Apply(VectorSizes)->Name("MakeHouseholderInPlace_float"); -BENCHMARK(BM_MakeHouseholder<float>)->Apply(VectorSizes)->Name("MakeHouseholder_float"); -BENCHMARK(BM_ApplyHouseholderOnTheLeft<float>)->Apply(RectSizes)->Name("ApplyHouseholderOnTheLeft_float"); -BENCHMARK(BM_ApplyHouseholderOnTheRight<float>)->Apply(RectSizes)->Name("ApplyHouseholderOnTheRight_float"); -BENCHMARK(BM_HouseholderSequence_EvalTo<float>)->Apply(SquareSizesFine)->Name("HouseholderSequence_EvalTo_float"); -BENCHMARK(BM_HouseholderSequence_ApplyLeft<float>)->Apply(RectSizes)->Name("HouseholderSequence_ApplyLeft_float"); +BENCHMARK(BM_MakeHouseholderInPlace<float>) VECTOR_SIZES->Name("MakeHouseholderInPlace_float"); +BENCHMARK(BM_MakeHouseholder<float>) VECTOR_SIZES->Name("MakeHouseholder_float"); +BENCHMARK(BM_ApplyHouseholderOnTheLeft<float>) RECT_SIZES->Name("ApplyHouseholderOnTheLeft_float"); +BENCHMARK(BM_ApplyHouseholderOnTheRight<float>) RECT_SIZES->Name("ApplyHouseholderOnTheRight_float"); +BENCHMARK(BM_HouseholderSequence_EvalTo<float>) SQUARE_SIZES_FINE->Name("HouseholderSequence_EvalTo_float"); +BENCHMARK(BM_HouseholderSequence_ApplyLeft<float>) RECT_SIZES->Name("HouseholderSequence_ApplyLeft_float"); BENCHMARK(BM_HouseholderSequence_ApplyRight<float>) - ->Apply(RectApplyRight) - ->Name("HouseholderSequence_ApplyRight_float"); +RECT_APPLY_RIGHT_SIZES->Name("HouseholderSequence_ApplyRight_float"); BENCHMARK(BM_HouseholderSequence_AdjointApplyLeft<float>) - ->Apply(RectSizes) - ->Name("HouseholderSequence_AdjointApplyLeft_float"); -BENCHMARK(BM_BlockHouseholder_TriangularFactor<float>) - ->Apply(VectorSizes) - ->Name("BlockHouseholder_TriangularFactor_float"); -BENCHMARK(BM_BlockHouseholder_ApplyLeft<float>)->Apply(BlockSizes)->Name("BlockHouseholder_ApplyLeft_float"); +RECT_SIZES->Name("HouseholderSequence_AdjointApplyLeft_float"); +BENCHMARK(BM_BlockHouseholder_TriangularFactor<float>) VECTOR_SIZES->Name("BlockHouseholder_TriangularFactor_float"); +BENCHMARK(BM_BlockHouseholder_ApplyLeft<float>) BLOCK_SIZES->Name("BlockHouseholder_ApplyLeft_float"); // ============================================================================= // Register benchmarks: double // ============================================================================= -BENCHMARK(BM_MakeHouseholderInPlace<double>)->Apply(VectorSizes)->Name("MakeHouseholderInPlace_double"); -BENCHMARK(BM_MakeHouseholder<double>)->Apply(VectorSizes)->Name("MakeHouseholder_double"); -BENCHMARK(BM_ApplyHouseholderOnTheLeft<double>)->Apply(RectSizes)->Name("ApplyHouseholderOnTheLeft_double"); -BENCHMARK(BM_ApplyHouseholderOnTheRight<double>)->Apply(RectSizes)->Name("ApplyHouseholderOnTheRight_double"); -BENCHMARK(BM_HouseholderSequence_EvalTo<double>)->Apply(SquareSizesFine)->Name("HouseholderSequence_EvalTo_double"); -BENCHMARK(BM_HouseholderSequence_ApplyLeft<double>)->Apply(RectSizes)->Name("HouseholderSequence_ApplyLeft_double"); +BENCHMARK(BM_MakeHouseholderInPlace<double>) VECTOR_SIZES->Name("MakeHouseholderInPlace_double"); +BENCHMARK(BM_MakeHouseholder<double>) VECTOR_SIZES->Name("MakeHouseholder_double"); +BENCHMARK(BM_ApplyHouseholderOnTheLeft<double>) RECT_SIZES->Name("ApplyHouseholderOnTheLeft_double"); +BENCHMARK(BM_ApplyHouseholderOnTheRight<double>) RECT_SIZES->Name("ApplyHouseholderOnTheRight_double"); +BENCHMARK(BM_HouseholderSequence_EvalTo<double>) SQUARE_SIZES_FINE->Name("HouseholderSequence_EvalTo_double"); +BENCHMARK(BM_HouseholderSequence_ApplyLeft<double>) RECT_SIZES->Name("HouseholderSequence_ApplyLeft_double"); BENCHMARK(BM_HouseholderSequence_ApplyRight<double>) - ->Apply(RectApplyRight) - ->Name("HouseholderSequence_ApplyRight_double"); +RECT_APPLY_RIGHT_SIZES->Name("HouseholderSequence_ApplyRight_double"); BENCHMARK(BM_HouseholderSequence_AdjointApplyLeft<double>) - ->Apply(RectSizes) - ->Name("HouseholderSequence_AdjointApplyLeft_double"); -BENCHMARK(BM_BlockHouseholder_TriangularFactor<double>) - ->Apply(VectorSizes) - ->Name("BlockHouseholder_TriangularFactor_double"); -BENCHMARK(BM_BlockHouseholder_ApplyLeft<double>)->Apply(BlockSizes)->Name("BlockHouseholder_ApplyLeft_double"); +RECT_SIZES->Name("HouseholderSequence_AdjointApplyLeft_double"); +BENCHMARK(BM_BlockHouseholder_TriangularFactor<double>) VECTOR_SIZES->Name("BlockHouseholder_TriangularFactor_double"); +BENCHMARK(BM_BlockHouseholder_ApplyLeft<double>) BLOCK_SIZES->Name("BlockHouseholder_ApplyLeft_double"); // ============================================================================= // Register benchmarks: std::complex<double> // ============================================================================= -BENCHMARK(BM_MakeHouseholderInPlace<std::complex<double>>) - ->Apply(VectorSizes) - ->Name("MakeHouseholderInPlace_complexdouble"); +BENCHMARK(BM_MakeHouseholderInPlace<std::complex<double>>) VECTOR_SIZES->Name("MakeHouseholderInPlace_complexdouble"); BENCHMARK(BM_ApplyHouseholderOnTheLeft<std::complex<double>>) - ->Apply(RectSizes) - ->Name("ApplyHouseholderOnTheLeft_complexdouble"); +RECT_SIZES->Name("ApplyHouseholderOnTheLeft_complexdouble"); BENCHMARK(BM_HouseholderSequence_EvalTo<std::complex<double>>) - ->Apply(SquareSizes) - ->Name("HouseholderSequence_EvalTo_complexdouble"); +SQUARE_SIZES->Name("HouseholderSequence_EvalTo_complexdouble"); BENCHMARK(BM_HouseholderSequence_ApplyLeft<std::complex<double>>) - ->Apply(SquareSizes) - ->Name("HouseholderSequence_ApplyLeft_complexdouble"); +SQUARE_SIZES->Name("HouseholderSequence_ApplyLeft_complexdouble");
diff --git a/test/eigensolver_selfadjoint.cpp b/test/eigensolver_selfadjoint.cpp index 9f50fcb..f2d852f 100644 --- a/test/eigensolver_selfadjoint.cpp +++ b/test/eigensolver_selfadjoint.cpp
@@ -41,7 +41,7 @@ scaledA /= scaling; MatrixType residual = scaledA * eiSymm.eigenvectors() - eiSymm.eigenvectors() * (eiSymm.eigenvalues() / scaling).asDiagonal(); - RealScalar tol = RealScalar(4) * RealScalar(numext::maxi(Index(1), n)) * NumTraits<RealScalar>::epsilon(); + RealScalar tol = RealScalar(8) * RealScalar(numext::maxi(Index(1), n)) * NumTraits<RealScalar>::epsilon(); for (Index i = 0; i < n; ++i) { VERIFY(residual.col(i).norm() <= tol); } @@ -50,7 +50,7 @@ // Eigenvectors must be unitary. Use a tolerance proportional to n*epsilon, // which is the expected rounding error for Householder-based orthogonal transformations. - RealScalar unitary_tol = RealScalar(4) * RealScalar(numext::maxi(Index(1), n)) * NumTraits<RealScalar>::epsilon(); + RealScalar unitary_tol = RealScalar(8) * RealScalar(numext::maxi(Index(1), n)) * NumTraits<RealScalar>::epsilon(); // But don't go below the test_precision floor (matters for float). unitary_tol = numext::maxi(unitary_tol, test_precision<RealScalar>()); VERIFY(eiSymm.eigenvectors().isUnitary(unitary_tol));
diff --git a/unsupported/benchmarks/IterativeSolvers/bench_iterative_solvers.cpp b/unsupported/benchmarks/IterativeSolvers/bench_iterative_solvers.cpp index 5105d40..8027a6d 100644 --- a/unsupported/benchmarks/IterativeSolvers/bench_iterative_solvers.cpp +++ b/unsupported/benchmarks/IterativeSolvers/bench_iterative_solvers.cpp
@@ -192,18 +192,13 @@ state.counters["iterations"] = solver.iterations(); } -static void SolverSizes(::benchmark::Benchmark* b) { - for (int n : {1000, 10000, 100000}) { - for (int bw : {5, 20}) { - b->Args({n, bw}); - } - } -} +// {n, bandwidth} as a Cartesian product. +#define SOLVER_SIZES ->ArgsProduct({{1000, 10000, 100000}, {5, 20}}) -BENCHMARK(BM_GMRES)->Apply(SolverSizes); -BENCHMARK(BM_DGMRES)->Apply(SolverSizes); -BENCHMARK(BM_MINRES)->Apply(SolverSizes); -BENCHMARK(BM_IDRS)->Apply(SolverSizes); -BENCHMARK(BM_BiCGSTABL)->Apply(SolverSizes); -BENCHMARK(BM_CG_Reference)->Apply(SolverSizes); -BENCHMARK(BM_BiCGSTAB_Reference)->Apply(SolverSizes); +BENCHMARK(BM_GMRES) SOLVER_SIZES; +BENCHMARK(BM_DGMRES) SOLVER_SIZES; +BENCHMARK(BM_MINRES) SOLVER_SIZES; +BENCHMARK(BM_IDRS) SOLVER_SIZES; +BENCHMARK(BM_BiCGSTABL) SOLVER_SIZES; +BENCHMARK(BM_CG_Reference) SOLVER_SIZES; +BENCHMARK(BM_BiCGSTAB_Reference) SOLVER_SIZES;
diff --git a/unsupported/benchmarks/KroneckerProduct/bench_kronecker.cpp b/unsupported/benchmarks/KroneckerProduct/bench_kronecker.cpp index 882887e..2ad7fed 100644 --- a/unsupported/benchmarks/KroneckerProduct/bench_kronecker.cpp +++ b/unsupported/benchmarks/KroneckerProduct/bench_kronecker.cpp
@@ -63,21 +63,8 @@ state.counters["output_size"] = na * nb; } -static void KroneckerSizes(::benchmark::Benchmark* b) { - for (int na : {4, 8, 16}) { - for (int nb : {4, 8, 16}) { - b->Args({na, nb}); - } - } -} +#define KRONECKER_SIZES ->ArgsProduct({{4, 8, 16}, {4, 8, 16}}) +#define KRONECKER_SPARSE_SIZES ->ArgsProduct({{16, 32, 64, 128}, {16, 32, 64, 128}}) -static void KroneckerSparseSizes(::benchmark::Benchmark* b) { - for (int na : {16, 32, 64, 128}) { - for (int nb : {16, 32, 64, 128}) { - b->Args({na, nb}); - } - } -} - -BENCHMARK(BM_KroneckerDense)->Apply(KroneckerSizes); -BENCHMARK(BM_KroneckerSparse)->Apply(KroneckerSparseSizes); +BENCHMARK(BM_KroneckerDense) KRONECKER_SIZES; +BENCHMARK(BM_KroneckerSparse) KRONECKER_SPARSE_SIZES;
diff --git a/unsupported/benchmarks/MatrixFunctions/bench_matrix_power.cpp b/unsupported/benchmarks/MatrixFunctions/bench_matrix_power.cpp index a1ebea0..5666937 100644 --- a/unsupported/benchmarks/MatrixFunctions/bench_matrix_power.cpp +++ b/unsupported/benchmarks/MatrixFunctions/bench_matrix_power.cpp
@@ -85,15 +85,11 @@ } } -static void MatPowerSizes(::benchmark::Benchmark* b) { - for (int n : {4, 8, 16, 32, 64}) { - b->Arg(n); - } -} +#define MAT_POWER_SIZES ->Arg(4)->Arg(8)->Arg(16)->Arg(32)->Arg(64) -BENCHMARK(BM_MatrixSqrt)->Apply(MatPowerSizes); -BENCHMARK(BM_MatrixPow)->Apply(MatPowerSizes); -BENCHMARK(BM_MatrixCos)->Apply(MatPowerSizes); -BENCHMARK(BM_MatrixSin)->Apply(MatPowerSizes); -BENCHMARK(BM_MatrixCosh)->Apply(MatPowerSizes); -BENCHMARK(BM_MatrixSinh)->Apply(MatPowerSizes); +BENCHMARK(BM_MatrixSqrt) MAT_POWER_SIZES; +BENCHMARK(BM_MatrixPow) MAT_POWER_SIZES; +BENCHMARK(BM_MatrixCos) MAT_POWER_SIZES; +BENCHMARK(BM_MatrixSin) MAT_POWER_SIZES; +BENCHMARK(BM_MatrixCosh) MAT_POWER_SIZES; +BENCHMARK(BM_MatrixSinh) MAT_POWER_SIZES;
diff --git a/unsupported/benchmarks/SpecialFunctions/bench_special_functions.cpp b/unsupported/benchmarks/SpecialFunctions/bench_special_functions.cpp index dc6505d..859e41f 100644 --- a/unsupported/benchmarks/SpecialFunctions/bench_special_functions.cpp +++ b/unsupported/benchmarks/SpecialFunctions/bench_special_functions.cpp
@@ -84,44 +84,46 @@ state.SetBytesProcessed(state.iterations() * n * sizeof(Scalar) * 4); } -static void SpecialSizes(::benchmark::Benchmark* b) { - for (int n : {256, 4096, 65536, 1048576}) b->Arg(n); -} +#define SPECIAL_SIZES ->Arg(256)->Arg(4096)->Arg(65536)->Arg(1048576) // --- Register float --- -BENCHMARK(BM_Lgamma<float>)->Apply(SpecialSizes)->Name("Lgamma_float"); -BENCHMARK(BM_Digamma<float>)->Apply(SpecialSizes)->Name("Digamma_float"); -BENCHMARK(BM_BesselI0<float>)->Apply(SpecialSizes)->Name("BesselI0_float"); -BENCHMARK(BM_BesselI1<float>)->Apply(SpecialSizes)->Name("BesselI1_float"); -BENCHMARK(BM_BesselI0e<float>)->Apply(SpecialSizes)->Name("BesselI0e_float"); -BENCHMARK(BM_BesselI1e<float>)->Apply(SpecialSizes)->Name("BesselI1e_float"); -BENCHMARK(BM_BesselJ0<float>)->Apply(SpecialSizes)->Name("BesselJ0_float"); -BENCHMARK(BM_BesselJ1<float>)->Apply(SpecialSizes)->Name("BesselJ1_float"); -BENCHMARK(BM_BesselY0<float>)->Apply(SpecialSizes)->Name("BesselY0_float"); -BENCHMARK(BM_BesselY1<float>)->Apply(SpecialSizes)->Name("BesselY1_float"); -BENCHMARK(BM_BesselK0<float>)->Apply(SpecialSizes)->Name("BesselK0_float"); -BENCHMARK(BM_BesselK1<float>)->Apply(SpecialSizes)->Name("BesselK1_float"); -BENCHMARK(BM_BesselK0e<float>)->Apply(SpecialSizes)->Name("BesselK0e_float"); -BENCHMARK(BM_BesselK1e<float>)->Apply(SpecialSizes)->Name("BesselK1e_float"); -BENCHMARK(BM_Igamma<float>)->Apply(SpecialSizes)->Name("Igamma_float"); -BENCHMARK(BM_Igammac<float>)->Apply(SpecialSizes)->Name("Igammac_float"); -BENCHMARK(BM_Betainc<float>)->Apply(SpecialSizes)->Name("Betainc_float"); -BENCHMARK(BM_Zeta<float>)->Apply(SpecialSizes)->Name("Zeta_float"); -BENCHMARK(BM_Polygamma<float>)->Apply(SpecialSizes)->Name("Polygamma_float"); +BENCHMARK(BM_Lgamma<float>) SPECIAL_SIZES->Name("Lgamma_float"); +BENCHMARK(BM_Digamma<float>) SPECIAL_SIZES->Name("Digamma_float"); +BENCHMARK(BM_BesselI0<float>) SPECIAL_SIZES->Name("BesselI0_float"); +BENCHMARK(BM_BesselI1<float>) SPECIAL_SIZES->Name("BesselI1_float"); +BENCHMARK(BM_BesselI0e<float>) SPECIAL_SIZES->Name("BesselI0e_float"); +BENCHMARK(BM_BesselI1e<float>) SPECIAL_SIZES->Name("BesselI1e_float"); +BENCHMARK(BM_BesselJ0<float>) SPECIAL_SIZES->Name("BesselJ0_float"); +BENCHMARK(BM_BesselJ1<float>) SPECIAL_SIZES->Name("BesselJ1_float"); +BENCHMARK(BM_BesselY0<float>) SPECIAL_SIZES->Name("BesselY0_float"); +BENCHMARK(BM_BesselY1<float>) SPECIAL_SIZES->Name("BesselY1_float"); +BENCHMARK(BM_BesselK0<float>) SPECIAL_SIZES->Name("BesselK0_float"); +BENCHMARK(BM_BesselK1<float>) SPECIAL_SIZES->Name("BesselK1_float"); +BENCHMARK(BM_BesselK0e<float>) SPECIAL_SIZES->Name("BesselK0e_float"); +BENCHMARK(BM_BesselK1e<float>) SPECIAL_SIZES->Name("BesselK1e_float"); +BENCHMARK(BM_Igamma<float>) SPECIAL_SIZES->Name("Igamma_float"); +BENCHMARK(BM_Igammac<float>) SPECIAL_SIZES->Name("Igammac_float"); +BENCHMARK(BM_Betainc<float>) SPECIAL_SIZES->Name("Betainc_float"); +BENCHMARK(BM_Zeta<float>) SPECIAL_SIZES->Name("Zeta_float"); +BENCHMARK(BM_Polygamma<float>) SPECIAL_SIZES->Name("Polygamma_float"); // --- Register double --- -BENCHMARK(BM_Lgamma<double>)->Apply(SpecialSizes)->Name("Lgamma_double"); -BENCHMARK(BM_Digamma<double>)->Apply(SpecialSizes)->Name("Digamma_double"); -BENCHMARK(BM_BesselI0<double>)->Apply(SpecialSizes)->Name("BesselI0_double"); -BENCHMARK(BM_BesselI1<double>)->Apply(SpecialSizes)->Name("BesselI1_double"); -BENCHMARK(BM_BesselJ0<double>)->Apply(SpecialSizes)->Name("BesselJ0_double"); -BENCHMARK(BM_BesselJ1<double>)->Apply(SpecialSizes)->Name("BesselJ1_double"); -BENCHMARK(BM_BesselY0<double>)->Apply(SpecialSizes)->Name("BesselY0_double"); -BENCHMARK(BM_BesselY1<double>)->Apply(SpecialSizes)->Name("BesselY1_double"); -BENCHMARK(BM_BesselK0<double>)->Apply(SpecialSizes)->Name("BesselK0_double"); -BENCHMARK(BM_BesselK1<double>)->Apply(SpecialSizes)->Name("BesselK1_double"); -BENCHMARK(BM_Igamma<double>)->Apply(SpecialSizes)->Name("Igamma_double"); -BENCHMARK(BM_Igammac<double>)->Apply(SpecialSizes)->Name("Igammac_double"); -BENCHMARK(BM_Betainc<double>)->Apply(SpecialSizes)->Name("Betainc_double"); -BENCHMARK(BM_Zeta<double>)->Apply(SpecialSizes)->Name("Zeta_double"); -BENCHMARK(BM_Polygamma<double>)->Apply(SpecialSizes)->Name("Polygamma_double"); +BENCHMARK(BM_Lgamma<double>) SPECIAL_SIZES->Name("Lgamma_double"); +BENCHMARK(BM_Digamma<double>) SPECIAL_SIZES->Name("Digamma_double"); +BENCHMARK(BM_BesselI0<double>) SPECIAL_SIZES->Name("BesselI0_double"); +BENCHMARK(BM_BesselI1<double>) SPECIAL_SIZES->Name("BesselI1_double"); +BENCHMARK(BM_BesselI0e<double>) SPECIAL_SIZES->Name("BesselI0e_double"); +BENCHMARK(BM_BesselI1e<double>) SPECIAL_SIZES->Name("BesselI1e_double"); +BENCHMARK(BM_BesselJ0<double>) SPECIAL_SIZES->Name("BesselJ0_double"); +BENCHMARK(BM_BesselJ1<double>) SPECIAL_SIZES->Name("BesselJ1_double"); +BENCHMARK(BM_BesselY0<double>) SPECIAL_SIZES->Name("BesselY0_double"); +BENCHMARK(BM_BesselY1<double>) SPECIAL_SIZES->Name("BesselY1_double"); +BENCHMARK(BM_BesselK0<double>) SPECIAL_SIZES->Name("BesselK0_double"); +BENCHMARK(BM_BesselK1<double>) SPECIAL_SIZES->Name("BesselK1_double"); +BENCHMARK(BM_BesselK0e<double>) SPECIAL_SIZES->Name("BesselK0e_double"); +BENCHMARK(BM_BesselK1e<double>) SPECIAL_SIZES->Name("BesselK1e_double"); +BENCHMARK(BM_Igamma<double>) SPECIAL_SIZES->Name("Igamma_double"); +BENCHMARK(BM_Igammac<double>) SPECIAL_SIZES->Name("Igammac_double"); +BENCHMARK(BM_Betainc<double>) SPECIAL_SIZES->Name("Betainc_double"); +BENCHMARK(BM_Zeta<double>) SPECIAL_SIZES->Name("Zeta_double"); +BENCHMARK(BM_Polygamma<double>) SPECIAL_SIZES->Name("Polygamma_double");
diff --git a/unsupported/benchmarks/Splines/bench_splines.cpp b/unsupported/benchmarks/Splines/bench_splines.cpp index e422a8f..852f1c0 100644 --- a/unsupported/benchmarks/Splines/bench_splines.cpp +++ b/unsupported/benchmarks/Splines/bench_splines.cpp
@@ -77,22 +77,18 @@ state.counters["Evals/s"] = benchmark::Counter(neval, benchmark::Counter::kIsIterationInvariantRate); } -static void SplineSizes(::benchmark::Benchmark* b) { - for (int n : {10, 50, 200, 1000}) { - b->Arg(n); - } -} +#define SPLINE_SIZES ->Arg(10)->Arg(50)->Arg(200)->Arg(1000) // 2D cubic splines -BENCHMARK(BM_SplineFit<2, 3>)->Apply(SplineSizes)->Name("SplineFit_2D_Cubic"); -BENCHMARK(BM_SplineEval<2, 3>)->Apply(SplineSizes)->Name("SplineEval_2D_Cubic"); -BENCHMARK(BM_SplineDerivatives<2, 3>)->Apply(SplineSizes)->Name("SplineDerivatives_2D_Cubic"); +BENCHMARK(BM_SplineFit<2, 3>) SPLINE_SIZES->Name("SplineFit_2D_Cubic"); +BENCHMARK(BM_SplineEval<2, 3>) SPLINE_SIZES->Name("SplineEval_2D_Cubic"); +BENCHMARK(BM_SplineDerivatives<2, 3>) SPLINE_SIZES->Name("SplineDerivatives_2D_Cubic"); // 3D cubic splines -BENCHMARK(BM_SplineFit<3, 3>)->Apply(SplineSizes)->Name("SplineFit_3D_Cubic"); -BENCHMARK(BM_SplineEval<3, 3>)->Apply(SplineSizes)->Name("SplineEval_3D_Cubic"); -BENCHMARK(BM_SplineDerivatives<3, 3>)->Apply(SplineSizes)->Name("SplineDerivatives_3D_Cubic"); +BENCHMARK(BM_SplineFit<3, 3>) SPLINE_SIZES->Name("SplineFit_3D_Cubic"); +BENCHMARK(BM_SplineEval<3, 3>) SPLINE_SIZES->Name("SplineEval_3D_Cubic"); +BENCHMARK(BM_SplineDerivatives<3, 3>) SPLINE_SIZES->Name("SplineDerivatives_3D_Cubic"); // 2D quintic splines -BENCHMARK(BM_SplineFit<2, 5>)->Apply(SplineSizes)->Name("SplineFit_2D_Quintic"); -BENCHMARK(BM_SplineEval<2, 5>)->Apply(SplineSizes)->Name("SplineEval_2D_Quintic"); +BENCHMARK(BM_SplineFit<2, 5>) SPLINE_SIZES->Name("SplineFit_2D_Quintic"); +BENCHMARK(BM_SplineEval<2, 5>) SPLINE_SIZES->Name("SplineEval_2D_Quintic");
diff --git a/unsupported/benchmarks/Tensor/bench_broadcasting.cpp b/unsupported/benchmarks/Tensor/bench_broadcasting.cpp index 754fe96..6900535 100644 --- a/unsupported/benchmarks/Tensor/bench_broadcasting.cpp +++ b/unsupported/benchmarks/Tensor/bench_broadcasting.cpp
@@ -140,35 +140,22 @@ state.counters["threads"] = threads; } -static void BroadcastSizes(::benchmark::Benchmark* b) { - for (int m : {64, 256, 1024}) { - for (int n : {64, 256, 1024}) { - b->Args({m, n}); - } - } -} +// {m, n} and {batch, c, h}: pure Cartesian products. +#define BROADCAST_SIZES ->ArgsProduct({{64, 256, 1024}, {64, 256, 1024}}) +#define BROADCAST_RANK4_SIZES ->ArgsProduct({{1, 8}, {64, 256}, {16, 32}}) -static void Rank4Sizes(::benchmark::Benchmark* b) { - for (int batch : {1, 8}) { - for (int c : {64, 256}) { - for (int h : {16, 32}) { - b->Args({batch, c, h}); - } - } - } -} +// {size, size, threads}: explicit because size is repeated. +// clang-format off +#define BROADCAST_THREADPOOL_SIZES \ + ->Args({256, 256, 1})->Args({256, 256, 2})->Args({256, 256, 4}) \ + ->Args({256, 256, 8})->Args({256, 256, 12})->Args({256, 256, 16}) \ + ->Args({1024, 1024, 1})->Args({1024, 1024, 2})->Args({1024, 1024, 4}) \ + ->Args({1024, 1024, 8})->Args({1024, 1024, 12})->Args({1024, 1024, 16}) +// clang-format on -static void BroadcastThreadPoolSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - for (int threads : {1, 2, 4, 8, 12, 16}) { - b->Args({size, size, threads}); - } - } -} - -BENCHMARK(BM_BroadcastRow)->Apply(BroadcastSizes); -BENCHMARK(BM_BroadcastCol)->Apply(BroadcastSizes); -BENCHMARK(BM_BroadcastAdd)->Apply(BroadcastSizes); -BENCHMARK(BM_BroadcastRank4)->Apply(Rank4Sizes); -BENCHMARK(BM_BroadcastRow_ThreadPool)->Apply(BroadcastThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_BroadcastAdd_ThreadPool)->Apply(BroadcastThreadPoolSizes)->UseRealTime(); +BENCHMARK(BM_BroadcastRow) BROADCAST_SIZES; +BENCHMARK(BM_BroadcastCol) BROADCAST_SIZES; +BENCHMARK(BM_BroadcastAdd) BROADCAST_SIZES; +BENCHMARK(BM_BroadcastRank4) BROADCAST_RANK4_SIZES; +BENCHMARK(BM_BroadcastRow_ThreadPool) BROADCAST_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_BroadcastAdd_ThreadPool) BROADCAST_THREADPOOL_SIZES->UseRealTime();
diff --git a/unsupported/benchmarks/Tensor/bench_chained_expressions.cpp b/unsupported/benchmarks/Tensor/bench_chained_expressions.cpp index 0069195..b6a9a34 100644 --- a/unsupported/benchmarks/Tensor/bench_chained_expressions.cpp +++ b/unsupported/benchmarks/Tensor/bench_chained_expressions.cpp
@@ -142,16 +142,18 @@ state.counters["threads"] = threads; } -static void ChainedSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024, 4096}) { - for (int threads : {1, 2, 4, 8, 12, 16}) { - b->Args({size, size, threads}); - } - } -} +// clang-format off +#define CHAINED_SIZES \ + ->Args({256, 256, 1})->Args({256, 256, 2})->Args({256, 256, 4}) \ + ->Args({256, 256, 8})->Args({256, 256, 12})->Args({256, 256, 16}) \ + ->Args({1024, 1024, 1})->Args({1024, 1024, 2})->Args({1024, 1024, 4}) \ + ->Args({1024, 1024, 8})->Args({1024, 1024, 12})->Args({1024, 1024, 16}) \ + ->Args({4096, 4096, 1})->Args({4096, 4096, 2})->Args({4096, 4096, 4}) \ + ->Args({4096, 4096, 8})->Args({4096, 4096, 12})->Args({4096, 4096, 16}) +// clang-format on -BENCHMARK(BM_Copy_ThreadPool)->Apply(ChainedSizes)->UseRealTime(); -BENCHMARK(BM_BiasReLU_ThreadPool)->Apply(ChainedSizes)->UseRealTime(); -BENCHMARK(BM_Polynomial_ThreadPool)->Apply(ChainedSizes)->UseRealTime(); -BENCHMARK(BM_ExpNormalize_ThreadPool)->Apply(ChainedSizes)->UseRealTime(); -BENCHMARK(BM_BatchNorm_ThreadPool)->Apply(ChainedSizes)->UseRealTime(); +BENCHMARK(BM_Copy_ThreadPool) CHAINED_SIZES->UseRealTime(); +BENCHMARK(BM_BiasReLU_ThreadPool) CHAINED_SIZES->UseRealTime(); +BENCHMARK(BM_Polynomial_ThreadPool) CHAINED_SIZES->UseRealTime(); +BENCHMARK(BM_ExpNormalize_ThreadPool) CHAINED_SIZES->UseRealTime(); +BENCHMARK(BM_BatchNorm_ThreadPool) CHAINED_SIZES->UseRealTime();
diff --git a/unsupported/benchmarks/Tensor/bench_coefficient_wise.cpp b/unsupported/benchmarks/Tensor/bench_coefficient_wise.cpp index b6e4c14..df8bb11 100644 --- a/unsupported/benchmarks/Tensor/bench_coefficient_wise.cpp +++ b/unsupported/benchmarks/Tensor/bench_coefficient_wise.cpp
@@ -206,39 +206,33 @@ state.SetBytesProcessed(state.iterations() * batch * C * H * H * sizeof(Scalar) * 2); } -static void CwiseSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - b->Args({size, size}); - } -} +// clang-format off +#define CWISE_SIZES \ + ->Args({256, 256})->Args({1024, 1024}) -static void CwiseThreadPoolSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - for (int threads : {1, 2, 4, 8, 12, 16}) { - b->Args({size, size, threads}); - } - } -} +#define CWISE_THREADPOOL_SIZES \ + ->Args({256, 256, 1})->Args({256, 256, 2})->Args({256, 256, 4}) \ + ->Args({256, 256, 8})->Args({256, 256, 12})->Args({256, 256, 16}) \ + ->Args({1024, 1024, 1})->Args({1024, 1024, 2})->Args({1024, 1024, 4}) \ + ->Args({1024, 1024, 8})->Args({1024, 1024, 12})->Args({1024, 1024, 16}) -static void Rank4Sizes(::benchmark::Benchmark* b) { - b->Args({32, 64, 16}); - b->Args({8, 128, 32}); - b->Args({1, 256, 64}); -} +#define RANK4_SIZES \ + ->Args({32, 64, 16})->Args({8, 128, 32})->Args({1, 256, 64}) +// clang-format on -BENCHMARK(BM_Exp)->Apply(CwiseSizes); -BENCHMARK(BM_Log)->Apply(CwiseSizes); -BENCHMARK(BM_Tanh)->Apply(CwiseSizes); -BENCHMARK(BM_Sigmoid)->Apply(CwiseSizes); -BENCHMARK(BM_ReLU)->Apply(CwiseSizes); -BENCHMARK(BM_Sqrt)->Apply(CwiseSizes); -BENCHMARK(BM_Add)->Apply(CwiseSizes); -BENCHMARK(BM_Mul)->Apply(CwiseSizes); -BENCHMARK(BM_FMA)->Apply(CwiseSizes); -BENCHMARK(BM_ReLU_Rank4)->Apply(Rank4Sizes); -BENCHMARK(BM_Add_ThreadPool)->Apply(CwiseThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_Mul_ThreadPool)->Apply(CwiseThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_FMA_ThreadPool)->Apply(CwiseThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_Exp_ThreadPool)->Apply(CwiseThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_Tanh_ThreadPool)->Apply(CwiseThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_ReLU_ThreadPool)->Apply(CwiseThreadPoolSizes)->UseRealTime(); +BENCHMARK(BM_Exp) CWISE_SIZES; +BENCHMARK(BM_Log) CWISE_SIZES; +BENCHMARK(BM_Tanh) CWISE_SIZES; +BENCHMARK(BM_Sigmoid) CWISE_SIZES; +BENCHMARK(BM_ReLU) CWISE_SIZES; +BENCHMARK(BM_Sqrt) CWISE_SIZES; +BENCHMARK(BM_Add) CWISE_SIZES; +BENCHMARK(BM_Mul) CWISE_SIZES; +BENCHMARK(BM_FMA) CWISE_SIZES; +BENCHMARK(BM_ReLU_Rank4) RANK4_SIZES; +BENCHMARK(BM_Add_ThreadPool) CWISE_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_Mul_ThreadPool) CWISE_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_FMA_ThreadPool) CWISE_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_Exp_ThreadPool) CWISE_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_Tanh_ThreadPool) CWISE_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_ReLU_ThreadPool) CWISE_THREADPOOL_SIZES->UseRealTime();
diff --git a/unsupported/benchmarks/Tensor/bench_contraction.cpp b/unsupported/benchmarks/Tensor/bench_contraction.cpp index 83b1f19..9b1b45d 100644 --- a/unsupported/benchmarks/Tensor/bench_contraction.cpp +++ b/unsupported/benchmarks/Tensor/bench_contraction.cpp
@@ -120,32 +120,29 @@ benchmark::Counter(2.0 * M * N * K, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); } -static void ContractionSizes(::benchmark::Benchmark* b) { - for (int size : {32, 64, 128, 256, 512, 1024}) { - b->Args({size, size, size}); - } - // Non-square - b->Args({256, 256, 1024}); - b->Args({1024, 64, 64}); -} +// clang-format off +#define CONTRACTION_SIZES \ + ->Args({32, 32, 32})->Args({64, 64, 64})->Args({128, 128, 128}) \ + ->Args({256, 256, 256})->Args({512, 512, 512})->Args({1024, 1024, 1024}) \ + ->Args({256, 256, 1024})->Args({1024, 64, 64}) -static void ThreadPoolSizes(::benchmark::Benchmark* b) { - for (int size : {64, 256, 512, 1024}) { - for (int threads : {1, 2, 4, 8, 16}) { - b->Args({size, size, size, threads}); - } - } -} +#define CONTRACTION_THREADPOOL_SIZES \ + ->Args({64, 64, 64, 1})->Args({64, 64, 64, 2})->Args({64, 64, 64, 4}) \ + ->Args({64, 64, 64, 8})->Args({64, 64, 64, 16}) \ + ->Args({256, 256, 256, 1})->Args({256, 256, 256, 2})->Args({256, 256, 256, 4}) \ + ->Args({256, 256, 256, 8})->Args({256, 256, 256, 16}) \ + ->Args({512, 512, 512, 1})->Args({512, 512, 512, 2})->Args({512, 512, 512, 4}) \ + ->Args({512, 512, 512, 8})->Args({512, 512, 512, 16}) \ + ->Args({1024, 1024, 1024, 1})->Args({1024, 1024, 1024, 2})->Args({1024, 1024, 1024, 4}) \ + ->Args({1024, 1024, 1024, 8})->Args({1024, 1024, 1024, 16}) -static void BatchSizes(::benchmark::Benchmark* b) { - for (int batch : {1, 8, 32}) { - for (int size : {64, 256}) { - b->Args({batch, size, size, size}); - } - } -} +#define BATCH_SIZES \ + ->Args({1, 64, 64, 64})->Args({1, 256, 256, 256}) \ + ->Args({8, 64, 64, 64})->Args({8, 256, 256, 256}) \ + ->Args({32, 64, 64, 64})->Args({32, 256, 256, 256}) +// clang-format on -BENCHMARK(BM_Contraction)->Apply(ContractionSizes); -BENCHMARK(BM_Contraction_RowMajor)->Apply(ContractionSizes); -BENCHMARK(BM_Contraction_ThreadPool)->Apply(ThreadPoolSizes); -BENCHMARK(BM_BatchContraction)->Apply(BatchSizes); +BENCHMARK(BM_Contraction) CONTRACTION_SIZES; +BENCHMARK(BM_Contraction_RowMajor) CONTRACTION_SIZES; +BENCHMARK(BM_Contraction_ThreadPool) CONTRACTION_THREADPOOL_SIZES; +BENCHMARK(BM_BatchContraction) BATCH_SIZES;
diff --git a/unsupported/benchmarks/Tensor/bench_convolution.cpp b/unsupported/benchmarks/Tensor/bench_convolution.cpp index 46e44ee..c6a1384 100644 --- a/unsupported/benchmarks/Tensor/bench_convolution.cpp +++ b/unsupported/benchmarks/Tensor/bench_convolution.cpp
@@ -109,43 +109,21 @@ state.counters["threads"] = threads; } -static void Conv1DSizes(::benchmark::Benchmark* b) { - for (int input : {128, 512, 2048}) { - for (int kernel : {3, 5, 11}) { - b->Args({input, kernel}); - } - } -} +// {input, kernel}, {channels, hw, k}, {hw, k, threads}: pure Cartesian products. +#define CONV1D_SIZES ->ArgsProduct({{128, 512, 2048}, {3, 5, 11}}) +#define CONV2D_CHANNEL_SIZES ->ArgsProduct({{3, 64, 128}, {16, 32, 56}, {3, 5}}) +#define CONV2D_THREADPOOL_SIZES ->ArgsProduct({{64, 128, 224}, {3, 5}, {2, 4, 8}}) -static void Conv2DSizes(::benchmark::Benchmark* b) { - for (int hw : {32, 64, 128, 224}) { - for (int k : {3, 5, 7}) { - b->Args({hw, hw, k, k}); - } - } -} +// {hw, hw, k, k}: explicit because hw and k are repeated. +// clang-format off +#define CONV2D_SIZES \ + ->Args({32, 32, 3, 3})->Args({32, 32, 5, 5})->Args({32, 32, 7, 7}) \ + ->Args({64, 64, 3, 3})->Args({64, 64, 5, 5})->Args({64, 64, 7, 7}) \ + ->Args({128, 128, 3, 3})->Args({128, 128, 5, 5})->Args({128, 128, 7, 7}) \ + ->Args({224, 224, 3, 3})->Args({224, 224, 5, 5})->Args({224, 224, 7, 7}) +// clang-format on -static void Conv2DChannelSizes(::benchmark::Benchmark* b) { - for (int c : {3, 64, 128}) { - for (int hw : {16, 32, 56}) { - for (int k : {3, 5}) { - b->Args({c, hw, k}); - } - } - } -} - -static void Conv2DThreadPoolSizes(::benchmark::Benchmark* b) { - for (int hw : {64, 128, 224}) { - for (int k : {3, 5}) { - for (int threads : {2, 4, 8}) { - b->Args({hw, k, threads}); - } - } - } -} - -BENCHMARK(BM_Convolve1D)->Apply(Conv1DSizes); -BENCHMARK(BM_Convolve2D)->Apply(Conv2DSizes); -BENCHMARK(BM_Convolve2D_Channels)->Apply(Conv2DChannelSizes); -BENCHMARK(BM_Convolve2D_ThreadPool)->Apply(Conv2DThreadPoolSizes); +BENCHMARK(BM_Convolve1D) CONV1D_SIZES; +BENCHMARK(BM_Convolve2D) CONV2D_SIZES; +BENCHMARK(BM_Convolve2D_Channels) CONV2D_CHANNEL_SIZES; +BENCHMARK(BM_Convolve2D_ThreadPool) CONV2D_THREADPOOL_SIZES;
diff --git a/unsupported/benchmarks/Tensor/bench_image_patch.cpp b/unsupported/benchmarks/Tensor/bench_image_patch.cpp index 2ae31f6..fb8a113 100644 --- a/unsupported/benchmarks/Tensor/bench_image_patch.cpp +++ b/unsupported/benchmarks/Tensor/bench_image_patch.cpp
@@ -198,111 +198,48 @@ state.counters["threads"] = threads; } -// --- Size generators --- +// --- Size configurations --- -static void PatchSizes(::benchmark::internal::Benchmark* b) { - // channels, H, W, kH, kW - for (int c : {3, 32, 64}) { - for (int hw : {32, 64, 128}) { - for (int k : {3, 5, 7}) { - b->Args({c, hw, hw, k, k}); - } - } - } -} +// channels, H, W, kH, kW (H==W and kH==kW); explicit because of duplicated dims. +// clang-format off +#define PATCH_SIZES \ + ->Args({3, 32, 32, 3, 3})->Args({3, 32, 32, 5, 5})->Args({3, 32, 32, 7, 7}) \ + ->Args({3, 64, 64, 3, 3})->Args({3, 64, 64, 5, 5})->Args({3, 64, 64, 7, 7}) \ + ->Args({3, 128, 128, 3, 3})->Args({3, 128, 128, 5, 5})->Args({3, 128, 128, 7, 7}) \ + ->Args({32, 32, 32, 3, 3})->Args({32, 32, 32, 5, 5})->Args({32, 32, 32, 7, 7}) \ + ->Args({32, 64, 64, 3, 3})->Args({32, 64, 64, 5, 5})->Args({32, 64, 64, 7, 7}) \ + ->Args({32, 128, 128, 3, 3})->Args({32, 128, 128, 5, 5})->Args({32, 128, 128, 7, 7}) \ + ->Args({64, 32, 32, 3, 3})->Args({64, 32, 32, 5, 5})->Args({64, 32, 32, 7, 7}) \ + ->Args({64, 64, 64, 3, 3})->Args({64, 64, 64, 5, 5})->Args({64, 64, 64, 7, 7}) \ + ->Args({64, 128, 128, 3, 3})->Args({64, 128, 128, 5, 5})->Args({64, 128, 128, 7, 7}) -static void StridedSizes(::benchmark::internal::Benchmark* b) { - // channels, H, kH, stride - for (int c : {3, 64}) { - for (int hw : {56, 112, 224}) { - for (int k : {3, 5}) { - for (int s : {1, 2}) { - b->Args({c, hw, k, s}); - } - } - } - } -} +// channels, H, W, kH (H==W); explicit because of duplicated H/W dim. +#define EXPLICIT_PADDING_SIZES \ + ->Args({3, 32, 32, 3})->Args({3, 32, 32, 5})->Args({3, 64, 64, 3})->Args({3, 64, 64, 5}) \ + ->Args({3, 128, 128, 3})->Args({3, 128, 128, 5})->Args({64, 32, 32, 3})->Args({64, 32, 32, 5}) \ + ->Args({64, 64, 64, 3})->Args({64, 64, 64, 5})->Args({64, 128, 128, 3})->Args({64, 128, 128, 5}) -static void DilatedSizes(::benchmark::internal::Benchmark* b) { - // channels, H, kH, dilation - for (int c : {3, 64}) { - for (int hw : {32, 64}) { - for (int k : {3, 5}) { - for (int d : {2, 4}) { - b->Args({c, hw, k, d}); - } - } - } - } -} +// {channels, spatial, kernel, stride/dilation/threads/batch}: pure Cartesian products. +#define STRIDED_SIZES ->ArgsProduct({{3, 64}, {56, 112, 224}, {3, 5}, {1, 2}}) +#define DILATED_SIZES ->ArgsProduct({{3, 64}, {32, 64}, {3, 5}, {2, 4}}) +#define BATCHED_SIZES ->ArgsProduct({{3, 64}, {32, 56}, {3, 5}, {4, 16, 32}}) +#define THREAD_POOL_SIZES ->ArgsProduct({{64, 128}, {56, 112}, {3, 5}, {2, 4, 8}}) -static void ExplicitPaddingSizes(::benchmark::internal::Benchmark* b) { - // channels, H, W, kH - for (int c : {3, 64}) { - for (int hw : {32, 64, 128}) { - for (int k : {3, 5}) { - b->Args({c, hw, hw, k}); - } - } - } -} +// Realistic CNN layer configurations: channels, spatial_size, kernel, stride. +// AlexNet conv1; VGG, VGG deeper x2; ResNet, ResNet downsample, ResNet deeper x2; +// MobileNet depthwise; Inception 1x1 (degenerate patch). +#define IMAGENET_SIZES \ + ->Args({3, 227, 11, 4}) \ + ->Args({64, 224, 3, 1})->Args({128, 112, 3, 1})->Args({256, 56, 3, 1}) \ + ->Args({64, 56, 3, 1})->Args({128, 56, 3, 2})->Args({256, 28, 3, 1})->Args({512, 14, 3, 1}) \ + ->Args({32, 112, 3, 1})->Args({192, 28, 1, 1}) +// clang-format on -static void BatchedSizes(::benchmark::internal::Benchmark* b) { - // channels, H, kH, batch - for (int c : {3, 64}) { - for (int hw : {32, 56}) { - for (int k : {3, 5}) { - for (int batch : {4, 16, 32}) { - b->Args({c, hw, k, batch}); - } - } - } - } -} - -static void ImageNetSizes(::benchmark::internal::Benchmark* b) { - // Realistic CNN layer configurations: channels, spatial_size, kernel, stride - // AlexNet conv1: 3x227x227, 11x11, stride 4 - b->Args({3, 227, 11, 4}); - // VGG-style: 64x224x224, 3x3, stride 1 - b->Args({64, 224, 3, 1}); - // VGG deeper: 128x112x112, 3x3, stride 1 - b->Args({128, 112, 3, 1}); - // VGG deeper: 256x56x56, 3x3, stride 1 - b->Args({256, 56, 3, 1}); - // ResNet: 64x56x56, 3x3, stride 1 - b->Args({64, 56, 3, 1}); - // ResNet downsample: 128x56x56, 3x3, stride 2 - b->Args({128, 56, 3, 2}); - // ResNet: 256x28x28, 3x3, stride 1 - b->Args({256, 28, 3, 1}); - // ResNet: 512x14x14, 3x3, stride 1 - b->Args({512, 14, 3, 1}); - // MobileNet depthwise: 32x112x112, 3x3, stride 1 - b->Args({32, 112, 3, 1}); - // Inception 1x1 (degenerate patch): 192x28x28, 1x1, stride 1 - b->Args({192, 28, 1, 1}); -} - -static void ThreadPoolSizes(::benchmark::internal::Benchmark* b) { - // channels, H, kH, threads - for (int c : {64, 128}) { - for (int hw : {56, 112}) { - for (int k : {3, 5}) { - for (int threads : {2, 4, 8}) { - b->Args({c, hw, k, threads}); - } - } - } - } -} - -BENCHMARK(BM_ImagePatch_Valid)->Apply(PatchSizes); -BENCHMARK(BM_ImagePatch_Same)->Apply(PatchSizes); -BENCHMARK(BM_ImagePatch_Strided)->Apply(StridedSizes); -BENCHMARK(BM_ImagePatch_Dilated)->Apply(DilatedSizes); -BENCHMARK(BM_ImagePatch_ExplicitPadding)->Apply(ExplicitPaddingSizes); -BENCHMARK(BM_ImagePatch_Batched)->Apply(BatchedSizes); -BENCHMARK(BM_ImagePatch_ImageNet)->Apply(ImageNetSizes); -BENCHMARK(BM_ImagePatch_ThreadPool)->Apply(ThreadPoolSizes); +BENCHMARK(BM_ImagePatch_Valid) PATCH_SIZES; +BENCHMARK(BM_ImagePatch_Same) PATCH_SIZES; +BENCHMARK(BM_ImagePatch_Strided) STRIDED_SIZES; +BENCHMARK(BM_ImagePatch_Dilated) DILATED_SIZES; +BENCHMARK(BM_ImagePatch_ExplicitPadding) EXPLICIT_PADDING_SIZES; +BENCHMARK(BM_ImagePatch_Batched) BATCHED_SIZES; +BENCHMARK(BM_ImagePatch_ImageNet) IMAGENET_SIZES; +BENCHMARK(BM_ImagePatch_ThreadPool) THREAD_POOL_SIZES;
diff --git a/unsupported/benchmarks/Tensor/bench_layout_swap.cpp b/unsupported/benchmarks/Tensor/bench_layout_swap.cpp index 8caeb57..c51d1de 100644 --- a/unsupported/benchmarks/Tensor/bench_layout_swap.cpp +++ b/unsupported/benchmarks/Tensor/bench_layout_swap.cpp
@@ -60,18 +60,10 @@ state.SetBytesProcessed(state.iterations() * 3ll * static_cast<int64_t>(M) * N * sizeof(Scalar)); } -static void LayoutSwapSizes(::benchmark::Benchmark* b) { - for (int size : {64, 256, 1024}) { - b->Args({size, size}); - } -} +// {n, n} and {n, n, n}: explicit because dims are repeated. +#define LAYOUT_SWAP_SIZES ->Args({64, 64})->Args({256, 256})->Args({1024, 1024}) +#define LAYOUT_SWAP_3D_SIZES ->Args({32, 32, 32})->Args({64, 64, 64})->Args({128, 128, 128}) -static void LayoutSwap3DSizes(::benchmark::Benchmark* b) { - b->Args({32, 32, 32}); - b->Args({64, 64, 64}); - b->Args({128, 128, 128}); -} - -BENCHMARK(BM_LayoutSwap_2D)->Apply(LayoutSwapSizes); -BENCHMARK(BM_LayoutSwap_3D)->Apply(LayoutSwap3DSizes); -BENCHMARK(BM_LayoutSwap_Composed)->Apply(LayoutSwapSizes); +BENCHMARK(BM_LayoutSwap_2D) LAYOUT_SWAP_SIZES; +BENCHMARK(BM_LayoutSwap_3D) LAYOUT_SWAP_3D_SIZES; +BENCHMARK(BM_LayoutSwap_Composed) LAYOUT_SWAP_SIZES;
diff --git a/unsupported/benchmarks/Tensor/bench_morphing.cpp b/unsupported/benchmarks/Tensor/bench_morphing.cpp index ff7e17f..fa2b5f2 100644 --- a/unsupported/benchmarks/Tensor/bench_morphing.cpp +++ b/unsupported/benchmarks/Tensor/bench_morphing.cpp
@@ -168,46 +168,32 @@ state.counters["threads"] = threads; } -static void MorphSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - b->Args({size, size}); - } -} +// clang-format off +#define MORPH_SIZES \ + ->Args({256, 256})->Args({1024, 1024}) -static void ChipSizes(::benchmark::Benchmark* b) { - b->Args({32, 256, 256}); - b->Args({64, 128, 128}); - b->Args({8, 512, 512}); -} +#define CHIP_SIZES \ + ->Args({32, 256, 256})->Args({64, 128, 128})->Args({8, 512, 512}) -static void PadSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - for (int pad : {1, 4, 16}) { - b->Args({size, size, pad}); - } - } -} +#define PAD_SIZES \ + ->Args({256, 256, 1})->Args({256, 256, 4})->Args({256, 256, 16}) \ + ->Args({1024, 1024, 1})->Args({1024, 1024, 4})->Args({1024, 1024, 16}) -static void StrideSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - for (int stride : {2, 4}) { - b->Args({size, size, stride}); - } - } -} +#define STRIDE_SIZES \ + ->Args({256, 256, 2})->Args({256, 256, 4}) \ + ->Args({1024, 1024, 2})->Args({1024, 1024, 4}) -static void MorphThreadPoolSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - for (int threads : {1, 2, 4, 8, 12, 16}) { - b->Args({size, size, threads}); - } - } -} +#define MORPH_THREADPOOL_SIZES \ + ->Args({256, 256, 1})->Args({256, 256, 2})->Args({256, 256, 4}) \ + ->Args({256, 256, 8})->Args({256, 256, 12})->Args({256, 256, 16}) \ + ->Args({1024, 1024, 1})->Args({1024, 1024, 2})->Args({1024, 1024, 4}) \ + ->Args({1024, 1024, 8})->Args({1024, 1024, 12})->Args({1024, 1024, 16}) +// clang-format on -BENCHMARK(BM_Reshape)->Apply(MorphSizes); -BENCHMARK(BM_Slice)->Apply(MorphSizes); -BENCHMARK(BM_Chip)->Apply(ChipSizes); -BENCHMARK(BM_Pad)->Apply(PadSizes); -BENCHMARK(BM_Stride)->Apply(StrideSizes); -BENCHMARK(BM_Slice_ThreadPool)->Apply(MorphThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_Pad_ThreadPool)->Apply(MorphThreadPoolSizes)->UseRealTime(); +BENCHMARK(BM_Reshape) MORPH_SIZES; +BENCHMARK(BM_Slice) MORPH_SIZES; +BENCHMARK(BM_Chip) CHIP_SIZES; +BENCHMARK(BM_Pad) PAD_SIZES; +BENCHMARK(BM_Stride) STRIDE_SIZES; +BENCHMARK(BM_Slice_ThreadPool) MORPH_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_Pad_ThreadPool) MORPH_THREADPOOL_SIZES->UseRealTime();
diff --git a/unsupported/benchmarks/Tensor/bench_reduction.cpp b/unsupported/benchmarks/Tensor/bench_reduction.cpp index 795c95c..cc7f5ce 100644 --- a/unsupported/benchmarks/Tensor/bench_reduction.cpp +++ b/unsupported/benchmarks/Tensor/bench_reduction.cpp
@@ -125,34 +125,22 @@ state.SetBytesProcessed(state.iterations() * M * N * sizeof(Scalar)); } -static void ReductionSizes(::benchmark::Benchmark* b) { - for (int size : {64, 256, 1024}) { - b->Args({size, size}); - } -} +// clang-format off +#define REDUCTION_SIZES \ + ->Args({64, 64})->Args({256, 256})->Args({1024, 1024}) -static void ThreadPoolReductionSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - for (int threads : {2, 4, 8}) { - b->Args({size, size, threads}); - } - } -} +#define THREADPOOL_REDUCTION_SIZES \ + ->Args({256, 256, 2})->Args({256, 256, 4})->Args({256, 256, 8}) \ + ->Args({1024, 1024, 2})->Args({1024, 1024, 4})->Args({1024, 1024, 8}) -static void SpatialSizes(::benchmark::Benchmark* b) { - for (int batch : {1, 8, 32}) { - for (int c : {64, 128}) { - for (int h : {16, 32}) { - b->Args({batch, c, h}); - } - } - } -} +// {batch, channels, h}: pure Cartesian product. +#define SPATIAL_SIZES ->ArgsProduct({{1, 8, 32}, {64, 128}, {16, 32}}) +// clang-format on -BENCHMARK(BM_FullReduction<internal::SumReducer<Scalar>>)->Apply(ReductionSizes)->Name("SumReduction"); -BENCHMARK(BM_FullReduction<internal::MaxReducer<Scalar>>)->Apply(ReductionSizes)->Name("MaxReduction_Full"); -BENCHMARK(BM_MaxReduction)->Apply(ReductionSizes); -BENCHMARK(BM_ReduceInner)->Apply(ReductionSizes); -BENCHMARK(BM_ReduceOuter)->Apply(ReductionSizes); -BENCHMARK(BM_ReduceSpatial)->Apply(SpatialSizes); -BENCHMARK(BM_FullReduction_ThreadPool)->Apply(ThreadPoolReductionSizes); +BENCHMARK(BM_FullReduction<internal::SumReducer<Scalar>>) REDUCTION_SIZES->Name("SumReduction"); +BENCHMARK(BM_FullReduction<internal::MaxReducer<Scalar>>) REDUCTION_SIZES->Name("MaxReduction_Full"); +BENCHMARK(BM_MaxReduction) REDUCTION_SIZES; +BENCHMARK(BM_ReduceInner) REDUCTION_SIZES; +BENCHMARK(BM_ReduceOuter) REDUCTION_SIZES; +BENCHMARK(BM_ReduceSpatial) SPATIAL_SIZES; +BENCHMARK(BM_FullReduction_ThreadPool) THREADPOOL_REDUCTION_SIZES;
diff --git a/unsupported/benchmarks/Tensor/bench_reverse.cpp b/unsupported/benchmarks/Tensor/bench_reverse.cpp index 852df68..999266c 100644 --- a/unsupported/benchmarks/Tensor/bench_reverse.cpp +++ b/unsupported/benchmarks/Tensor/bench_reverse.cpp
@@ -85,19 +85,16 @@ // 64x64 = 16 KB (L1) // 256x256 = 256 KB (L2) // 1024x1024 = 4 MB (LLC / DRAM) -static void ReverseSizes(::benchmark::Benchmark* b) { - for (int size : {64, 256, 1024}) { - b->Args({size, size}); - } -} +// clang-format off +#define REVERSE_SIZES \ + ->Args({64, 64})->Args({256, 256})->Args({1024, 1024}) -static void Reverse3DSizes(::benchmark::Benchmark* b) { - b->Args({32, 32, 32}); // 128 KB - b->Args({64, 64, 64}); // 1 MB - b->Args({128, 128, 128}); // 8 MB -} +// 128 KB / 1 MB / 8 MB +#define REVERSE_3D_SIZES \ + ->Args({32, 32, 32})->Args({64, 64, 64})->Args({128, 128, 128}) +// clang-format on -BENCHMARK(BM_Reverse_Inner)->Apply(ReverseSizes); -BENCHMARK(BM_Reverse_Outer)->Apply(ReverseSizes); -BENCHMARK(BM_Reverse_All)->Apply(ReverseSizes); -BENCHMARK(BM_Reverse_3D_Inner)->Apply(Reverse3DSizes); +BENCHMARK(BM_Reverse_Inner) REVERSE_SIZES; +BENCHMARK(BM_Reverse_Outer) REVERSE_SIZES; +BENCHMARK(BM_Reverse_All) REVERSE_SIZES; +BENCHMARK(BM_Reverse_3D_Inner) REVERSE_3D_SIZES;
diff --git a/unsupported/benchmarks/Tensor/bench_roll.cpp b/unsupported/benchmarks/Tensor/bench_roll.cpp index 219a8ff..6724374 100644 --- a/unsupported/benchmarks/Tensor/bench_roll.cpp +++ b/unsupported/benchmarks/Tensor/bench_roll.cpp
@@ -83,21 +83,17 @@ state.SetBytesProcessed(state.iterations() * static_cast<int64_t>(D0) * D1 * D2 * sizeof(Scalar)); } -static void RollSizes(::benchmark::Benchmark* b) { - for (int size : {64, 256, 1024}) { - for (int shift : {1, 13}) { - b->Args({size, size, shift}); - } - } -} +// clang-format off +#define ROLL_SIZES \ + ->Args({64, 64, 1})->Args({64, 64, 13}) \ + ->Args({256, 256, 1})->Args({256, 256, 13}) \ + ->Args({1024, 1024, 1})->Args({1024, 1024, 13}) -static void Roll3DSizes(::benchmark::Benchmark* b) { - b->Args({32, 32, 32}); - b->Args({64, 64, 64}); - b->Args({128, 128, 128}); -} +#define ROLL_3D_SIZES \ + ->Args({32, 32, 32})->Args({64, 64, 64})->Args({128, 128, 128}) +// clang-format on -BENCHMARK(BM_Roll_Inner)->Apply(RollSizes); -BENCHMARK(BM_Roll_Outer)->Apply(RollSizes); -BENCHMARK(BM_Roll_All)->Apply(RollSizes); -BENCHMARK(BM_Roll_3D_Inner)->Apply(Roll3DSizes); +BENCHMARK(BM_Roll_Inner) ROLL_SIZES; +BENCHMARK(BM_Roll_Outer) ROLL_SIZES; +BENCHMARK(BM_Roll_All) ROLL_SIZES; +BENCHMARK(BM_Roll_3D_Inner) ROLL_3D_SIZES;
diff --git a/unsupported/benchmarks/Tensor/bench_shuffling.cpp b/unsupported/benchmarks/Tensor/bench_shuffling.cpp index 6de76a8..b824676 100644 --- a/unsupported/benchmarks/Tensor/bench_shuffling.cpp +++ b/unsupported/benchmarks/Tensor/bench_shuffling.cpp
@@ -138,53 +138,30 @@ state.counters["threads"] = threads; } -static void Shuffle2DSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - b->Args({size, size}); - } - b->Args({64, 4096}); - b->Args({4096, 64}); -} +// clang-format off +#define SHUFFLE_2D_SIZES \ + ->Args({256, 256})->Args({1024, 1024}) \ + ->Args({64, 4096})->Args({4096, 64}) -static void Shuffle3DSizes(::benchmark::Benchmark* b) { - b->Args({64, 64, 64}); - b->Args({128, 128, 64}); - b->Args({32, 256, 256}); -} +#define SHUFFLE_3D_SIZES \ + ->Args({64, 64, 64})->Args({128, 128, 64})->Args({32, 256, 256}) -static void Shuffle4DSizes(::benchmark::Benchmark* b) { - for (int batch : {1, 8}) { - for (int c : {3, 64}) { - for (int h : {32, 64}) { - b->Args({batch, c, h}); - } - } - } -} +// {batch, channels, h}: pure Cartesian product. +#define SHUFFLE_4D_SIZES ->ArgsProduct({{1, 8}, {3, 64}, {32, 64}}) -static void Shuffle2DThreadPoolSizes(::benchmark::Benchmark* b) { - for (int size : {256, 1024}) { - for (int threads : {1, 2, 4, 8, 12, 16}) { - b->Args({size, size, threads}); - } - } -} +#define SHUFFLE_2D_THREADPOOL_SIZES \ + ->Args({256, 256, 1})->Args({256, 256, 2})->Args({256, 256, 4}) \ + ->Args({256, 256, 8})->Args({256, 256, 12})->Args({256, 256, 16}) \ + ->Args({1024, 1024, 1})->Args({1024, 1024, 2})->Args({1024, 1024, 4}) \ + ->Args({1024, 1024, 8})->Args({1024, 1024, 12})->Args({1024, 1024, 16}) -static void Shuffle4DThreadPoolSizes(::benchmark::Benchmark* b) { - for (int batch : {1, 8}) { - for (int c : {64}) { - for (int h : {32, 64}) { - for (int threads : {1, 2, 4, 8, 12, 16}) { - b->Args({batch, c, h, threads}); - } - } - } - } -} +// {batch, channels, h, threads}: pure Cartesian product. +#define SHUFFLE_4D_THREADPOOL_SIZES ->ArgsProduct({{1, 8}, {64}, {32, 64}, {1, 2, 4, 8, 12, 16}}) +// clang-format on -BENCHMARK(BM_Shuffle2D)->Apply(Shuffle2DSizes); -BENCHMARK(BM_ShuffleIdentity)->Apply(Shuffle2DSizes); -BENCHMARK(BM_Shuffle3D)->Apply(Shuffle3DSizes); -BENCHMARK(BM_Shuffle4D_NCHW_to_NHWC)->Apply(Shuffle4DSizes); -BENCHMARK(BM_Shuffle2D_ThreadPool)->Apply(Shuffle2DThreadPoolSizes)->UseRealTime(); -BENCHMARK(BM_Shuffle4D_NCHW_to_NHWC_ThreadPool)->Apply(Shuffle4DThreadPoolSizes)->UseRealTime(); +BENCHMARK(BM_Shuffle2D) SHUFFLE_2D_SIZES; +BENCHMARK(BM_ShuffleIdentity) SHUFFLE_2D_SIZES; +BENCHMARK(BM_Shuffle3D) SHUFFLE_3D_SIZES; +BENCHMARK(BM_Shuffle4D_NCHW_to_NHWC) SHUFFLE_4D_SIZES; +BENCHMARK(BM_Shuffle2D_ThreadPool) SHUFFLE_2D_THREADPOOL_SIZES->UseRealTime(); +BENCHMARK(BM_Shuffle4D_NCHW_to_NHWC_ThreadPool) SHUFFLE_4D_THREADPOOL_SIZES->UseRealTime();
diff --git a/unsupported/benchmarks/Tensor/bench_tensor_fft.cpp b/unsupported/benchmarks/Tensor/bench_tensor_fft.cpp index 26ff64b..39850eb 100644 --- a/unsupported/benchmarks/Tensor/bench_tensor_fft.cpp +++ b/unsupported/benchmarks/Tensor/bench_tensor_fft.cpp
@@ -69,12 +69,10 @@ benchmark::Counter(mflops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::kIs1000); } -static void FFTSizes(::benchmark::Benchmark* b) { - for (int n : {64, 256, 1024, 4096}) { - b->Arg(n); - } -} +// clang-format off +#define FFT_SIZES ->Arg(64)->Arg(256)->Arg(1024)->Arg(4096) +// clang-format on -BENCHMARK(BM_TensorFFT_1D)->Apply(FFTSizes); -BENCHMARK(BM_TensorFFT_2D)->Apply(FFTSizes); -BENCHMARK(BM_TensorIFFT_1D)->Apply(FFTSizes); +BENCHMARK(BM_TensorFFT_1D) FFT_SIZES; +BENCHMARK(BM_TensorFFT_2D) FFT_SIZES; +BENCHMARK(BM_TensorIFFT_1D) FFT_SIZES;