raah, results were corrupted by overflow. Now slice vectorization is
about a +25% speedup which is still nice as i expected zero or even
negative benefit.
diff --git a/bench/benchmarkSlice.cpp b/bench/benchmarkSlice.cpp
index 197fe24..11cbf09 100644
--- a/bench/benchmarkSlice.cpp
+++ b/bench/benchmarkSlice.cpp
@@ -29,6 +29,7 @@
     m.block(r,c,nr,nc) += Mat::ones(nr,nc);
     m.block(r,c,nr,nc) *= SCALAR(10);
     m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
+    m.block(r,c,nr,nc) /= SCALAR(10);
   }
   cout << m[0] << endl;
   return 0;