raah, results were corrupted by overflow. Now slice vectorization is about a +25% speedup which is still nice as i expected zero or even negative benefit.
diff --git a/bench/benchmarkSlice.cpp b/bench/benchmarkSlice.cpp index 197fe24..11cbf09 100644 --- a/bench/benchmarkSlice.cpp +++ b/bench/benchmarkSlice.cpp
@@ -29,6 +29,7 @@ m.block(r,c,nr,nc) += Mat::ones(nr,nc); m.block(r,c,nr,nc) *= SCALAR(10); m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10); + m.block(r,c,nr,nc) /= SCALAR(10); } cout << m[0] << endl; return 0;