* updated benchmark files according to recent renamings
* various improvements in BTL including trisolver and cholesky bench
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index b7791fc..f9fda03 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -29,6 +29,7 @@
 #include <sys/time.h>
 #include <unistd.h>
 #include <cstdlib>
+#include <numeric>
 
 namespace Eigen
 {
@@ -39,11 +40,11 @@
 {
 public:
 
-  BenchTimer() : m_best(1e12) {}
+  BenchTimer() { reset(); }
 
   ~BenchTimer() {}
 
-  inline void reset(void) {m_best = 1e12;}
+  inline void reset(void) {m_best = 1e6;}
   inline void start(void) {m_start = getTime();}
   inline void stop(void)
   {
diff --git a/bench/benchBlasGemm.cpp b/bench/benchBlasGemm.cpp
index 5455b6e..25458f8 100644
--- a/bench/benchBlasGemm.cpp
+++ b/bench/benchBlasGemm.cpp
@@ -99,9 +99,9 @@
 
   Scalar alpha, beta;
   MyMatrix ma(M,K), mb(K,N), mc(M,N);
-  ma = MyMatrix::random(M,K);
-  mb = MyMatrix::random(K,N);
-  mc = MyMatrix::random(M,N);
+  ma = MyMatrix::Random(M,K);
+  mb = MyMatrix::Random(K,N);
+  mc = MyMatrix::Random(M,N);
 
   Eigen::BenchTimer timer;
 
@@ -132,9 +132,9 @@
   }
 
   // clear
-  ma = MyMatrix::random(M,K);
-  mb = MyMatrix::random(K,N);
-  mc = MyMatrix::random(M,N);
+  ma = MyMatrix::Random(M,K);
+  mb = MyMatrix::Random(K,N);
+  mc = MyMatrix::Random(M,N);
 
   // eigen
 //   if (!(std::string(argv[1])=="auto"))
@@ -153,9 +153,9 @@
   }
 
   // clear
-  ma = MyMatrix::random(M,K);
-  mb = MyMatrix::random(K,N);
-  mc = MyMatrix::random(M,N);
+  ma = MyMatrix::Random(M,K);
+  mb = MyMatrix::Random(K,N);
+  mc = MyMatrix::Random(M,N);
 
   // eigen normal
   if (!(std::string(argv[1])=="auto"))
@@ -193,11 +193,11 @@
 void check_product(int M, int N, int K)
 {
   MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
-  ma = MyMatrix::random(M,K);
-  mb = MyMatrix::random(K,N);
+  ma = MyMatrix::Random(M,K);
+  mb = MyMatrix::Random(K,N);
   maT = ma.transpose();
   mbT = mb.transpose();
-  mc = MyMatrix::random(M,N);
+  mc = MyMatrix::Random(M,N);
 
   MyMatrix::Scalar eps = 1e-4;
 
diff --git a/bench/benchEigenSolver.cpp b/bench/benchEigenSolver.cpp
index a62ee41..395bff3 100644
--- a/bench/benchEigenSolver.cpp
+++ b/bench/benchEigenSolver.cpp
@@ -40,7 +40,7 @@
   typedef typename MatrixType::Scalar Scalar;
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
 
-  MatrixType a = MatrixType::random(rows,cols);
+  MatrixType a = MatrixType::Random(rows,cols);
   SquareMatrixType covMat =  a * a.adjoint();
 
   BenchTimer timerSa, timerStd;
diff --git a/bench/benchmark.cpp b/bench/benchmark.cpp
index 5aeb2d3..90e04dd 100644
--- a/bench/benchmark.cpp
+++ b/bench/benchmark.cpp
@@ -18,7 +18,7 @@
 
 int main(int argc, char *argv[])
 {
-    Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::ones();
+    Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones();
     Matrix<SCALAR,MATSIZE,MATSIZE> m;
     for(int i = 0; i < MATSIZE; i++)
         for(int j = 0; j < MATSIZE; j++)
@@ -28,7 +28,7 @@
     asm("#begin");
     for(int a = 0; a < REPEAT; a++)
     {
-        m = Matrix<SCALAR,MATSIZE,MATSIZE>::ones() + 0.00005 * (m + (m*m));
+        m = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones() + 0.00005 * (m + (m*m));
     }
     asm("#end");
     cout << m << endl;
diff --git a/bench/benchmarkSlice.cpp b/bench/benchmarkSlice.cpp
index 11cbf09..5595d0a 100644
--- a/bench/benchmarkSlice.cpp
+++ b/bench/benchmarkSlice.cpp
@@ -26,7 +26,7 @@
     c = Eigen::ei_random<int>(0,10);
     nr = Eigen::ei_random<int>(50,80);
     nc = Eigen::ei_random<int>(50,80);
-    m.block(r,c,nr,nc) += Mat::ones(nr,nc);
+    m.block(r,c,nr,nc) += Mat::Ones(nr,nc);
     m.block(r,c,nr,nc) *= SCALAR(10);
     m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
     m.block(r,c,nr,nc) /= SCALAR(10);
diff --git a/bench/benchmarkX.cpp b/bench/benchmarkX.cpp
index a269f69..18152e9 100644
--- a/bench/benchmarkX.cpp
+++ b/bench/benchmarkX.cpp
@@ -18,7 +18,7 @@
 
 int main(int argc, char *argv[])
 {
-	MATTYPE I = MATTYPE::ones(MATSIZE,MATSIZE);
+	MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE);
 	MATTYPE m(MATSIZE,MATSIZE);
 	for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
 	{
diff --git a/bench/benchmarkXcwise.cpp b/bench/benchmarkXcwise.cpp
index b54f1f4..bc9b8e7 100644
--- a/bench/benchmarkXcwise.cpp
+++ b/bench/benchmarkXcwise.cpp
@@ -19,7 +19,7 @@
 
 int main(int argc, char *argv[])
 {
-	VECTYPE I = VECTYPE::ones(VECSIZE);
+	VECTYPE I = VECTYPE::Ones(VECSIZE);
 	VECTYPE m(VECSIZE,1);
 	for(int i = 0; i < VECSIZE; i++)
 	{
@@ -27,7 +27,7 @@
 	}
 	for(int a = 0; a < REPEAT; a++)
 	{
-		m = VECTYPE::ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
+		m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
 	}
 	cout << m[0] << endl;
 	return 0;
diff --git a/bench/btl/CMakeLists.txt b/bench/btl/CMakeLists.txt
index 600fc38..a7f90a2 100644
--- a/bench/btl/CMakeLists.txt
+++ b/bench/btl/CMakeLists.txt
@@ -26,6 +26,11 @@
   ${PROJECT_SOURCE_DIR}/generic_bench/utils
   ${PROJECT_SOURCE_DIR}/libs/STL)
 
+# find_package(MKL)
+# if (MKL_FOUND)
+#   add_definitions(-DHAVE_MKL)
+#   set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
+# endif (MKL_FOUND)
 
 MACRO(BTL_ADD_BENCH targetname)
 
@@ -49,6 +54,7 @@
   IF(BUILD_${targetname})
     ADD_EXECUTABLE(${targetname} ${_sources})
     ADD_TEST(${targetname} "${targetname}")
+    target_link_libraries(${targetname} ${DEFAULT_LIBRARIES})
   ENDIF(BUILD_${targetname})
 
 ENDMACRO(BTL_ADD_BENCH)
diff --git a/bench/btl/README b/bench/btl/README
index 6d5712b..787002f 100644
--- a/bench/btl/README
+++ b/bench/btl/README
@@ -29,7 +29,18 @@
 
   $ ctest -V
 
-You can also run a single bench, e.g.: ctest -V -R eigen
+You can run the benchmarks only on libraries matching a given regular expression:
+  ctest -V -R <regexp>
+For instance:
+  ctest -V -R eigen2
+
+You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
+  BTL_CONFIG="-a action1{:action2}*" ctest -V
+An exemple:
+  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
+
+Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
+  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
 
 4 : Analyze the result. different data files (.dat) are produced in each libs directories.
  If gnuplot is available, choose a directory name in the data directory to store the results and type
diff --git a/bench/btl/actions/action_axpby.hh b/bench/btl/actions/action_axpby.hh
new file mode 100644
index 0000000..7a4f158
--- /dev/null
+++ b/bench/btl/actions/action_axpby.hh
@@ -0,0 +1,124 @@
+//=====================================================
+// File   :  action_axpby.hh
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+//
+#ifndef ACTION_AXPBY
+#define ACTION_AXPBY
+#include "utilities.h"
+#include "STL_interface.hh"
+#include <string>
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+#include "init/init_matrix.hh"
+
+using namespace std;
+
+template<class Interface>
+class Action_axpby {
+
+public :
+
+  // Ctor
+  Action_axpby( int size ):_size(size),_alpha(0.5),_beta(0.95)
+  {
+    MESSAGE("Action_axpby Ctor");
+
+    // STL vector initialization
+    init_vector<pseudo_random>(X_stl,_size);
+    init_vector<pseudo_random>(Y_stl,_size);
+    init_vector<null_function>(resu_stl,_size);
+
+    // generic matrix and vector initialization
+    Interface::vector_from_stl(X_ref,X_stl);
+    Interface::vector_from_stl(Y_ref,Y_stl);
+
+    Interface::vector_from_stl(X,X_stl);
+    Interface::vector_from_stl(Y,Y_stl);
+  }
+
+  // invalidate copy ctor
+  Action_axpby( const  Action_axpby & )
+  {
+    INFOS("illegal call to Action_axpby Copy Ctor");
+    exit(1);
+  }
+
+  // Dtor
+  ~Action_axpby( void ){
+    MESSAGE("Action_axpby Dtor");
+
+    // deallocation
+    Interface::free_vector(X_ref);
+    Interface::free_vector(Y_ref);
+
+    Interface::free_vector(X);
+    Interface::free_vector(Y);
+  }
+
+  // action name
+  static inline std::string name( void )
+  {
+    return "axpby_"+Interface::name();
+  }
+
+  double nb_op_base( void ){
+    return 3.0*_size;
+  }
+
+  inline void initialize( void ){
+    Interface::copy_vector(X_ref,X,_size);
+    Interface::copy_vector(Y_ref,Y,_size);
+  }
+
+  inline void calculate( void ) {
+    Interface::axpby(_alpha,X,_beta,Y,_size);
+  }
+
+  void check_result( void ){
+    // calculation check
+    Interface::vector_to_stl(Y,resu_stl);
+
+    STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
+
+    typename Interface::real_type error=
+      STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
+
+    if (error>1.e-6){
+      INFOS("WRONG CALCULATION...residual=" << error);
+      exit(2);
+    }
+  }
+
+private :
+
+  typename Interface::stl_vector X_stl;
+  typename Interface::stl_vector Y_stl;
+  typename Interface::stl_vector resu_stl;
+
+  typename Interface::gene_vector X_ref;
+  typename Interface::gene_vector Y_ref;
+
+  typename Interface::gene_vector X;
+  typename Interface::gene_vector Y;
+
+  typename Interface::real_type _alpha;
+  typename Interface::real_type _beta;
+
+  int _size;
+};
+
+#endif
diff --git a/bench/btl/actions/action_cholesky.hh b/bench/btl/actions/action_cholesky.hh
new file mode 100644
index 0000000..67495b9
--- /dev/null
+++ b/bench/btl/actions/action_cholesky.hh
@@ -0,0 +1,132 @@
+//=====================================================
+// File   :  action_cholesky.hh
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+//
+#ifndef ACTION_CHOLESKY
+#define ACTION_CHOLESKY
+#include "utilities.h"
+#include "STL_interface.hh"
+#include <string>
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+#include "init/init_matrix.hh"
+
+using namespace std;
+
+template<class Interface>
+class Action_cholesky {
+
+public :
+
+  // Ctor
+
+  Action_cholesky( int size ):_size(size)
+  {
+    MESSAGE("Action_cholesky Ctor");
+
+    // STL vector initialization
+    typename Interface::stl_matrix tmp;
+    init_matrix<pseudo_random>(tmp,_size);
+    init_matrix<null_function>(X_stl,_size);
+    STL_interface<typename Interface::real_type>::ata_product(tmp,X_stl,_size);
+    
+    init_matrix<null_function>(C_stl,_size);
+    init_matrix<null_function>(resu_stl,_size);
+
+    // generic matrix and vector initialization
+    Interface::matrix_from_stl(X_ref,X_stl);
+    Interface::matrix_from_stl(X,X_stl);
+    Interface::matrix_from_stl(C,C_stl);
+
+    _cost = 0;
+    for (int j=0; j<_size; ++j)
+    {
+      int r = std::max(_size - j -1,0);
+      _cost += 2*(r*j+r+j);
+    }
+  }
+
+  // invalidate copy ctor
+
+  Action_cholesky( const  Action_cholesky & )
+  {
+    INFOS("illegal call to Action_cholesky Copy Ctor");
+    exit(1);
+  }
+
+  // Dtor
+
+  ~Action_cholesky( void ){
+
+    MESSAGE("Action_cholesky Dtor");
+
+    // deallocation
+    Interface::free_matrix(X_ref,_size);
+    Interface::free_matrix(X,_size);
+    Interface::free_matrix(C,_size);
+  }
+
+  // action name
+
+  static inline std::string name( void )
+  {
+    return "cholesky_"+Interface::name();
+  }
+
+  double nb_op_base( void ){
+    return _cost;
+  }
+
+  inline void initialize( void ){
+    Interface::copy_matrix(X_ref,X,_size);
+  }
+
+  inline void calculate( void ) {
+      Interface::cholesky(X,C,_size);
+  }
+
+  void check_result( void ){
+    // calculation check
+    Interface::matrix_to_stl(C,resu_stl);
+
+//     STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
+// 
+//     typename Interface::real_type error=
+//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
+// 
+//     if (error>1.e-6){
+//       INFOS("WRONG CALCULATION...residual=" << error);
+//       exit(0);
+//     }
+
+  }
+
+private :
+
+  typename Interface::stl_matrix X_stl;
+  typename Interface::stl_matrix C_stl;
+  typename Interface::stl_matrix resu_stl;
+
+  typename Interface::gene_matrix X_ref;
+  typename Interface::gene_matrix X;
+  typename Interface::gene_matrix C;
+
+  int _size;
+  int _cost;
+};
+
+#endif
diff --git a/bench/btl/actions/action_matrix_vector_product.hh b/bench/btl/actions/action_matrix_vector_product.hh
index 3a172f2..202bc1a 100644
--- a/bench/btl/actions/action_matrix_vector_product.hh
+++ b/bench/btl/actions/action_matrix_vector_product.hh
@@ -49,11 +49,10 @@
     // generic matrix and vector initialization
 
     Interface::matrix_from_stl(A_ref,A_stl);
-    Interface::vector_from_stl(B_ref,B_stl);
-    Interface::vector_from_stl(X_ref,X_stl);
-
     Interface::matrix_from_stl(A,A_stl);
+    Interface::vector_from_stl(B_ref,B_stl);
     Interface::vector_from_stl(B,B_stl);
+    Interface::vector_from_stl(X_ref,X_stl);
     Interface::vector_from_stl(X,X_stl);
 
   }
diff --git a/bench/btl/actions/action_trisolve.hh b/bench/btl/actions/action_trisolve.hh
new file mode 100644
index 0000000..f97f7db
--- /dev/null
+++ b/bench/btl/actions/action_trisolve.hh
@@ -0,0 +1,136 @@
+//=====================================================
+// File   :  action_trisolve.hh
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+//
+#ifndef ACTION_TRISOLVE
+#define ACTION_TRISOLVE
+#include "utilities.h"
+#include "STL_interface.hh"
+#include <string>
+#include "init/init_function.hh"
+#include "init/init_vector.hh"
+#include "init/init_matrix.hh"
+
+using namespace std;
+
+template<class Interface>
+class Action_trisolve {
+
+public :
+
+  // Ctor
+
+  Action_trisolve( int size ):_size(size)
+  {
+    MESSAGE("Action_trisolve Ctor");
+
+    // STL vector initialization
+    init_matrix<pseudo_random>(L_stl,_size);
+    init_vector<pseudo_random>(B_stl,_size);
+    init_vector<null_function>(X_stl,_size);
+    for (int j=0; j<_size; ++j)
+    {
+      for (int i=0; i<j; ++i)
+        L_stl[j][i] = 0;
+      L_stl[j][j] += 3;
+    }
+
+    init_vector<null_function>(resu_stl,_size);
+
+    // generic matrix and vector initialization
+    Interface::matrix_from_stl(L,L_stl);
+    Interface::vector_from_stl(X,X_stl);
+    Interface::vector_from_stl(B,B_stl);
+
+    _cost = 0;
+    for (int j=0; j<_size; ++j)
+    {
+      _cost += 2*j + 1;
+    }
+  }
+
+  // invalidate copy ctor
+
+  Action_trisolve( const  Action_trisolve & )
+  {
+    INFOS("illegal call to Action_trisolve Copy Ctor");
+    exit(1);
+  }
+
+  // Dtor
+
+  ~Action_trisolve( void ){
+
+    MESSAGE("Action_trisolve Dtor");
+
+    // deallocation
+    Interface::free_matrix(L,_size);
+    Interface::free_vector(B);
+    Interface::free_vector(X);
+  }
+
+  // action name
+
+  static inline std::string name( void )
+  {
+    return "trisolve_"+Interface::name();
+  }
+
+  double nb_op_base( void ){
+    return _cost;
+  }
+
+  inline void initialize( void ){
+    //Interface::copy_vector(X_ref,X,_size);
+  }
+
+  inline void calculate( void ) {
+      Interface::trisolve_lower(L,B,X,_size);
+  }
+
+  void check_result( void ){
+    // calculation check
+    Interface::vector_to_stl(X,resu_stl);
+
+    STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size);
+
+    typename Interface::real_type error=
+      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
+
+    if (error>1.e-4){
+      INFOS("WRONG CALCULATION...residual=" << error);
+      exit(2);
+    } //else INFOS("CALCULATION OK...residual=" << error);
+
+  }
+
+private :
+
+  typename Interface::stl_matrix L_stl;
+  typename Interface::stl_vector X_stl;
+  typename Interface::stl_vector B_stl;
+  typename Interface::stl_vector resu_stl;
+
+  typename Interface::gene_matrix L;
+  typename Interface::gene_vector X;
+  typename Interface::gene_vector B;
+
+  int _size;
+  int _cost;
+};
+
+#endif
diff --git a/bench/btl/actions/basic_actions.hh b/bench/btl/actions/basic_actions.hh
new file mode 100644
index 0000000..fd2e749
--- /dev/null
+++ b/bench/btl/actions/basic_actions.hh
@@ -0,0 +1,15 @@
+
+#include "action_axpy.hh"
+#include "action_axpby.hh"
+
+#include "action_matrix_vector_product.hh"
+#include "action_atv_product.hh"
+
+#include "action_matrix_matrix_product.hh"
+#include "action_ata_product.hh"
+#include "action_aat_product.hh"
+
+#include "action_trisolve.hh"
+
+// #include "action_lu_solve.hh"
+
diff --git a/bench/btl/cmake/FindATLAS.cmake b/bench/btl/cmake/FindATLAS.cmake
new file mode 100644
index 0000000..1a7d7e8
--- /dev/null
+++ b/bench/btl/cmake/FindATLAS.cmake
@@ -0,0 +1,35 @@
+# include(FindLibraryWithDebug)
+
+if (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
+  set(ATLAS_FIND_QUIETLY TRUE)
+endif (ATLAS_INCLUDES AND ATLAS_LIBRARIES)
+
+find_path(ATLAS_INCLUDES
+  NAMES
+  cblas.h
+  PATHS
+  $ENV{ATLASDIR}/include
+  ${INCLUDE_INSTALL_DIR}
+)
+
+find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR})
+
+if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
+set(ATLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS} ${ATLAS_LAPACK} ${ATLAS_F77BLAS})
+endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(ATLAS DEFAULT_MSG
+                                  ATLAS_INCLUDES ATLAS_LIBRARIES)
+
+mark_as_advanced(ATLAS_INCLUDES ATLAS_LIBRARIES)
diff --git a/bench/btl/data/CMakeLists.txt b/bench/btl/data/CMakeLists.txt
index decfc45..9b7e627 100644
--- a/bench/btl/data/CMakeLists.txt
+++ b/bench/btl/data/CMakeLists.txt
@@ -1,7 +1,7 @@
 
 ADD_CUSTOM_TARGET(copy_scripts)
 
-SET(script_files go_mean aat.hh ata.hh axpy.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
+SET(script_files go_mean aat.hh ata.hh axpy.hh axpby.hh cholesky.hh trisolve.hh order_lib mk_mean_script.sh mk_new_gnuplot.sh mk_gnuplot_script.sh matrix_matrix.hh matrix_vector.hh atv.hh perlib_plot_settings.txt gnuplot_common_settings.hh )
 
 FOREACH(script_file ${script_files})
 ADD_CUSTOM_COMMAND(
diff --git a/bench/btl/data/axpby.hh b/bench/btl/data/axpby.hh
new file mode 100644
index 0000000..744e306
--- /dev/null
+++ b/bench/btl/data/axpby.hh
@@ -0,0 +1,3 @@
+set title "{/*1.5 Y = alpha * X + beta * Y}" 0.000000,0.000000
+set xlabel "vector size" 0.000000,0.000000
+set xrange [1:1000000]
diff --git a/bench/btl/data/cholesky.hh b/bench/btl/data/cholesky.hh
new file mode 100644
index 0000000..78336b4
--- /dev/null
+++ b/bench/btl/data/cholesky.hh
@@ -0,0 +1,3 @@
+set title "{/*1.5 Cholesky decomposition}" 0.000000,0.000000
+set xlabel "matrix size" 0.000000,0.000000
+set xrange [6:1000]
diff --git a/bench/btl/data/go_mean b/bench/btl/data/go_mean
index 422b357..05835a8 100755
--- a/bench/btl/data/go_mean
+++ b/bench/btl/data/go_mean
@@ -17,11 +17,13 @@
   '</p>'  >> $webpagefilename
 
 source mk_mean_script.sh axpy $1 11 2500 100000 250000 > $1/axpy.html
-source mk_mean_script.sh matrix_vector $1 11 50 300 500 > $1/matrix_vector.html
-source mk_mean_script.sh atv $1 11 50 300 500 > $1/atv.html
-# source mk_mean_script.sh matrix_matrix $1 11 100 300 500 > $1/matrix_matrix.html
-# source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
-# source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
+source mk_mean_script.sh matrix_vector $1 11 50 300 1000 > $1/matrix_vector.html
+source mk_mean_script.sh atv $1 11 50 300 1000 > $1/atv.html
+source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 > $1/matrix_matrix.html
+source mk_mean_script.sh aat $1 11 100 300 1000 > $1/aat.html
+source mk_mean_script.sh ata $1 11 100 300 1000 > $1/ata.html
+source mk_mean_script.sh trisolve $1 11 100 300 1000 > $1/trisolve.html
+source mk_mean_script.sh cholesky $1 11 100 300 1000 > $1/cholesky.html
 
 ## compile the web page ##
 
diff --git a/bench/btl/data/mean.cxx b/bench/btl/data/mean.cxx
index 96d37dc..3e1a82d 100644
--- a/bench/btl/data/mean.cxx
+++ b/bench/btl/data/mean.cxx
@@ -23,11 +23,11 @@
 #include <iostream>
 #include <fstream>
 #include "bench_parameter.hh"
+#include "utils/xy_file.hh"
 #include <set>
 
 using namespace std;
 
-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
 double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
 
 class Lib_Mean{
@@ -152,29 +152,6 @@
 
 }
 
-void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
-
-  ifstream input_file (filename.c_str(),ios::in) ;
-
-  if (!input_file){
-    INFOS("!!! Error opening "<<filename);
-    exit(0);
-  }
-  
-  int nb_point=0;
-  int size=0;
-  double mflops=0;
-
-  while (input_file >> size >> mflops ){
-    nb_point++;
-    tab_sizes.push_back(size);
-    tab_mflops.push_back(mflops);
-  }
-  SCRUTE(nb_point);
-
-  input_file.close();
-}
-
 double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
   
   int size=tab_sizes.size();
diff --git a/bench/btl/data/trisolve.hh b/bench/btl/data/trisolve.hh
new file mode 100644
index 0000000..6c3000f
--- /dev/null
+++ b/bench/btl/data/trisolve.hh
@@ -0,0 +1,3 @@
+set title "{/*1.5 triangular solver (X = inv(L) * X)}" 0.000000,0.000000
+set xlabel "matrix-vector size" 0.000000,0.000000
+set xrange [6:1000]
diff --git a/bench/btl/generic_bench/bench.hh b/bench/btl/generic_bench/bench.hh
index cace269..bb3ca66 100644
--- a/bench/btl/generic_bench/bench.hh
+++ b/bench/btl/generic_bench/bench.hh
@@ -25,14 +25,16 @@
 #include <iostream>
 #include "utilities.h"
 #include "size_lin_log.hh"
-#include "dump_file_x_y.hh"
+#include "xy_file.hh"
 #include <vector>
 #include <string>
 #include "timers/portable_perf_analyzer.hh"
-//#include "timers/mixed_perf_analyzer.hh"
-//#include "timers/x86_perf_analyzer.hh"
-//#include "timers/STL_perf_analyzer.hh"
-
+// #include "timers/mixed_perf_analyzer.hh"
+// #include "timers/x86_perf_analyzer.hh"
+// #include "timers/STL_perf_analyzer.hh"
+#ifdef HAVE_MKL
+extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
+#endif
 using namespace std;
 
 template <template<class> class Perf_Analyzer, class Action>
@@ -41,8 +43,6 @@
   if (BtlConfig::skipAction(Action::name()))
     return;
 
-  BTL_DISABLE_SSE_EXCEPTIONS();
-
   string filename="bench_"+Action::name()+".dat";
 
   INFOS("starting " <<filename);
@@ -64,14 +64,71 @@
   {
     //INFOS("size=" <<tab_sizes[i]<<"   ("<<nb_point-i<<"/"<<nb_point<<")");
     std::cout << " " << "size = " << tab_sizes[i] << "  " << std::flush;
+
+    BTL_DISABLE_SSE_EXCEPTIONS();
+    #ifdef HAVE_MKL
+    {
+      float dummy;
+      cblas_saxpy(1,0,&dummy,1,&dummy,1);
+    }
+    #endif
+
     tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
     std::cout << tab_mflops[i] << " MFlops    (" << nb_point-i << "/" << nb_point << ")" << std::endl;
   }
-//   std::cout << "\n";
+
+  if (!BtlConfig::Instance.overwriteResults)
+  {
+    std::vector<int> oldSizes;
+    std::vector<double> oldFlops;
+    if (read_xy_file(filename, oldSizes, oldFlops, true))
+    {
+      // merge the two data
+      std::vector<int> newSizes;
+      std::vector<double> newFlops;
+      int i=0;
+      int j=0;
+      while (i<tab_sizes.size() && j<oldSizes.size())
+      {
+        if (tab_sizes[i] == oldSizes[j])
+        {
+          newSizes.push_back(tab_sizes[i]);
+          newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
+          ++i;
+          ++j;
+        }
+        else if (tab_sizes[i] < oldSizes[j])
+        {
+          newSizes.push_back(tab_sizes[i]);
+          newFlops.push_back(tab_mflops[i]);
+          ++i;
+        }
+        else
+        {
+          newSizes.push_back(oldSizes[j]);
+          newFlops.push_back(oldFlops[j]);
+          ++j;
+        }
+      }
+      while (i<tab_sizes.size())
+      {
+        newSizes.push_back(tab_sizes[i]);
+        newFlops.push_back(tab_mflops[i]);
+        ++i;
+      }
+      while (j<oldSizes.size())
+      {
+        newSizes.push_back(oldSizes[j]);
+        newFlops.push_back(oldFlops[j]);
+        ++j;
+      }
+      tab_mflops = newFlops;
+      tab_sizes = newSizes;
+    }
+  }
 
   // dump the result in a file  :
-
-  dump_file_x_y(tab_sizes,tab_mflops,filename);
+  dump_xy_file(tab_sizes,tab_mflops,filename);
 
 }
 
@@ -87,8 +144,8 @@
 
 
   // Only for small problem size. Otherwize it will be too long
-  //bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
-  //bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+//   bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
+//   bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 
 }
 
diff --git a/bench/btl/generic_bench/bench_parameter.hh b/bench/btl/generic_bench/bench_parameter.hh
index 5714cb3..62ba20e 100644
--- a/bench/btl/generic_bench/bench_parameter.hh
+++ b/bench/btl/generic_bench/bench_parameter.hh
@@ -23,7 +23,7 @@
 // minimal time for each measurement
 #define REAL_TYPE float
 // minimal time for each measurement
-#define MIN_TIME 0.5
+#define MIN_TIME 1.0
 // nb of point on bench curves
 #define NB_POINT 100
 // min vector size for axpy bench
diff --git a/bench/btl/generic_bench/btl.hh b/bench/btl/generic_bench/btl.hh
index 7847024..6d6e048 100644
--- a/bench/btl/generic_bench/btl.hh
+++ b/bench/btl/generic_bench/btl.hh
@@ -48,7 +48,7 @@
   : : [aux] "m" (aux)); \
 }
 #else
-#define DISABLE_SSE_EXCEPTIONS()
+#define BTL_DISABLE_SSE_EXCEPTIONS()
 #endif
 
 /** Enhanced std::string
@@ -163,7 +163,7 @@
 {
 public:
   BtlConfig()
-    : m_runSingleAction(false)
+    : overwriteResults(false)
   {
     char * _config;
     _config = getenv ("BTL_CONFIG");
@@ -179,33 +179,38 @@
             std::cerr << "error processing option: " << config[i] << "\n";
             exit(2);
           }
-          Instance.m_runSingleAction = true;
-          Instance.m_singleActionName = config[i+1];
+          Instance.m_selectedActionNames = config[i+1].split(":");
 
           i += 1;
         }
+        else if (config[i].beginsWith("--overwrite"))
+        {
+          Instance.overwriteResults = true;
+        }
       }
     }
 
     BTL_DISABLE_SSE_EXCEPTIONS();
   }
 
-  BTL_DONT_INLINE static bool skipAction(const std::string& name)
+  BTL_DONT_INLINE static bool skipAction(const std::string& _name)
   {
-    if (Instance.m_runSingleAction)
-    {
-      return !BtlString(name).contains(Instance.m_singleActionName);
-    }
+    if (Instance.m_selectedActionNames.empty())
+      return false;
 
-    return false;
+    BtlString name(_name);
+    for (int i=0; i<Instance.m_selectedActionNames.size(); ++i)
+      if (name.contains(Instance.m_selectedActionNames[i]))
+        return false;
+
+    return true;
   }
 
+  static BtlConfig Instance;
+  bool overwriteResults;
 
 protected:
-  bool m_runSingleAction;
-  BtlString m_singleActionName;
-
-  static BtlConfig Instance;
+  std::vector<BtlString> m_selectedActionNames;
 };
 
 #define BTL_MAIN \
diff --git a/bench/btl/generic_bench/init/init_vector.hh b/bench/btl/generic_bench/init/init_vector.hh
index faf14f9..efaf0c9 100644
--- a/bench/btl/generic_bench/init/init_vector.hh
+++ b/bench/btl/generic_bench/init/init_vector.hh
@@ -1,14 +1,14 @@
 //=====================================================
 // File   :  init_vector.hh
-// Author :  L. Plagne <laurent.plagne@edf.fr)>        
+// Author :  L. Plagne <laurent.plagne@edf.fr)>
 // Copyright (C) EDF R&D,  lun sep 30 14:23:18 CEST 2002
 //=====================================================
-// 
+//
 // This program is free software; you can redistribute it and/or
 // modify it under the terms of the GNU General Public License
 // as published by the Free Software Foundation; either version 2
 // of the License, or (at your option) any later version.
-// 
+//
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
@@ -16,7 +16,7 @@
 // You should have received a copy of the GNU General Public License
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-// 
+//
 #ifndef INIT_VECTOR_HH
 #define INIT_VECTOR_HH
 
diff --git a/bench/btl/generic_bench/static/bench_static.hh b/bench/btl/generic_bench/static/bench_static.hh
index cdb645f..730590c 100644
--- a/bench/btl/generic_bench/static/bench_static.hh
+++ b/bench/btl/generic_bench/static/bench_static.hh
@@ -24,7 +24,7 @@
 #include "bench_parameter.hh"
 #include <iostream>
 #include "utilities.h"
-#include "dump_file_x_y.hh"
+#include "xy_file.hh"
 #include "static/static_size_generator.hh"
 #include "timers/portable_perf_analyzer.hh"
 #include "timers/mixed_perf_analyzer.hh"
diff --git a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
index d716154..d0fe95c 100644
--- a/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
+++ b/bench/btl/generic_bench/timers/portable_perf_analyzer.hh
@@ -38,16 +38,16 @@
     MESSAGE("Portable_Perf_Analyzer Dtor");
   };
 
-
-
   BTL_DONT_INLINE  double eval_mflops(int size)
   {
     Action action(size);
 
-
-    double time_action = time_calculate(action);
+    double time_action = 0;
+    action.initialize();
+    time_action = time_calculate(action);
     while (time_action < MIN_TIME)
     {
+      //Action action(size);
       _nb_calc *= 2;
       action.initialize();
       time_action = time_calculate(action);
@@ -56,8 +56,10 @@
     // optimize
     for (int i=1; i<NB_TRIES; ++i)
     {
-      action.initialize();
-      time_action = std::min(time_action, time_calculate(action));
+      Action _action(size);
+      std::cout << " " << _action.nb_op_base()*_nb_calc/(time_action*1e6) << " ";
+      _action.initialize();
+      time_action = std::min(time_action, time_calculate(_action));
     }
 
     time_action = time_action / (double(_nb_calc));
@@ -66,13 +68,13 @@
     action.initialize();
     action.calculate();
     action.check_result();
-
     return action.nb_op_base()/(time_action*1000000.0);
   }
 
   BTL_DONT_INLINE double time_calculate(Action & action)
   {
     // time measurement
+    action.calculate();
     _chronos.start();
     for (int ii=0;ii<_nb_calc;ii++)
     {
diff --git a/bench/btl/generic_bench/timers/portable_timer.hh b/bench/btl/generic_bench/timers/portable_timer.hh
index 7d40594..de50394 100755
--- a/bench/btl/generic_bench/timers/portable_timer.hh
+++ b/bench/btl/generic_bench/timers/portable_timer.hh
@@ -39,6 +39,38 @@
 
 //  A timer object measures CPU time.
 
+// class Portable_Timer
+// {
+//  public:
+//
+//   Portable_Timer( void )
+//   {
+//   }
+//
+//
+//   void start() { m_val = getTime(); }
+//
+//   void stop() { m_val = getTime() - m_val; }
+//
+//   double elapsed() { return  m_val; }
+//
+//   double user_time() { return elapsed(); }
+//
+//
+// private:
+//
+//   static inline double getTime(void)
+//   {
+//       struct timeval tv;
+//       struct timezone tz;
+//       gettimeofday(&tv, &tz);
+//       return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
+//   }
+//
+//   double m_val;
+//
+// }; // Portable_Timer
+
 class Portable_Timer
 {
  public:
@@ -46,42 +78,42 @@
   Portable_Timer( void ):_utime_sec_start(-1),
 		_utime_usec_start(-1),
 		_utime_sec_stop(-1),
-		_utime_usec_stop(-1)
+		_utime_usec_stop(-1)/*,
+        m_prev_cs(-1)*/
   {
   }
 
 
   void   start()
   {
-
     int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
-
-    _start_time = std::clock();
-
+//     _start_time = std::clock();
     _utime_sec_start  =  resourcesUsage.ru_utime.tv_sec ;
     _utime_usec_start =  resourcesUsage.ru_utime.tv_usec ;
+//     m_prev_cs = resourcesUsage.ru_nivcsw;
 
   }
 
   void stop()
   {
-
     int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
-
-    _stop_time = std::clock();
-
+//     _stop_time = std::clock();
     _utime_sec_stop  =  resourcesUsage.ru_utime.tv_sec ;
     _utime_usec_stop =  resourcesUsage.ru_utime.tv_usec ;
 
+//     m_prev_cs = resourcesUsage.ru_nivcsw - m_prev_cs;
+//     std::cerr << resourcesUsage.ru_nvcsw << " + " << resourcesUsage.ru_nivcsw << "\n";
+
   }
 
   double elapsed()
   {
-    return  double(_stop_time - _start_time) / CLOCKS_PER_SEC;
+    return  user_time();//double(_stop_time - _start_time) / CLOCKS_PER_SEC;
   }
 
   double user_time()
   {
+//     std::cout << m_prev_cs << "\n";
     long tot_utime_sec=_utime_sec_stop-_utime_sec_start;
     long tot_utime_usec=_utime_usec_stop-_utime_usec_start;
     return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ;
@@ -98,6 +130,8 @@
   long _utime_sec_stop ;
   long _utime_usec_stop ;
 
+//   long m_prev_cs;
+
   std::clock_t _start_time;
   std::clock_t _stop_time;
 
diff --git a/bench/btl/generic_bench/utils/dump_file_x_y.hh b/bench/btl/generic_bench/utils/xy_file.hh
similarity index 63%
rename from bench/btl/generic_bench/utils/dump_file_x_y.hh
rename to bench/btl/generic_bench/utils/xy_file.hh
index 6b99dcb..4571bed 100644
--- a/bench/btl/generic_bench/utils/dump_file_x_y.hh
+++ b/bench/btl/generic_bench/utils/xy_file.hh
@@ -17,10 +17,41 @@
 // along with this program; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 // 
-#ifndef DUMP_FILE_X_Y_HH
-#define DUMP_FILE_X_Y_HH
+#ifndef XY_FILE_HH
+#define XY_FILE_HH
 #include <fstream>
+#include <iostream>
 #include <string>
+#include <vector>
+using namespace std;
+
+bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
+                  std::vector<double> & tab_mflops, bool quiet = false)
+{
+
+  std::ifstream input_file (filename.c_str(),std::ios::in);
+
+  if (!input_file){
+    if (!quiet) {
+      INFOS("!!! Error opening "<<filename);
+    }
+    return false;
+  }
+
+  int nb_point=0;
+  int size=0;
+  double mflops=0;
+
+  while (input_file >> size >> mflops ){
+    nb_point++;
+    tab_sizes.push_back(size);
+    tab_mflops.push_back(mflops);
+  }
+  SCRUTE(nb_point);
+
+  input_file.close();
+  return true;
+}
 
 // The Vector class must satisfy the following part of STL vector concept :
 //            resize() method
@@ -30,16 +61,13 @@
 using namespace std;
 
 template<class Vector_A, class Vector_B>
-void dump_file_x_y(const Vector_A & X, const Vector_B & Y, const std::string & filename){
+void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){
   
   ofstream outfile (filename.c_str(),ios::out) ;
   int size=X.size();
   
-  for (int i=0;i<size;i++){
-
-      outfile << X[i] << " " << Y[i] << endl ;
-
-  }
+  for (int i=0;i<size;i++)
+    outfile << X[i] << " " << Y[i] << endl;
 
   outfile.close();
 } 
diff --git a/bench/btl/libs/C_BLAS/CMakeLists.txt b/bench/btl/libs/C_BLAS/CMakeLists.txt
index 73e5cc4..f72e9ca 100644
--- a/bench/btl/libs/C_BLAS/CMakeLists.txt
+++ b/bench/btl/libs/C_BLAS/CMakeLists.txt
@@ -1,13 +1,13 @@
 
-find_package(CBLAS)
-if (CBLAS_FOUND)
-  include_directories(${CBLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
-  btl_add_bench(btl_cblas main.cpp)
-  if(BUILD_btl_cblas)
-    target_link_libraries(btl_cblas ${CBLAS_LIBRARIES})
-    set_target_properties(btl_cblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS")
-  endif(BUILD_btl_cblas)
-endif (CBLAS_FOUND)
+find_package(ATLAS)
+if (ATLAS_FOUND)
+  include_directories(${ATLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
+  btl_add_bench(btl_atlas main.cpp)
+  if(BUILD_btl_atlas)
+    target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
+    set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
+  endif(BUILD_btl_atlas)
+endif (ATLAS_FOUND)
 
 find_package(MKL)
 if (MKL_FOUND)
@@ -15,6 +15,6 @@
   btl_add_bench(btl_mkl main.cpp)
   if(BUILD_btl_mkl)
     target_link_libraries(btl_mkl ${MKL_LIBRARIES})
-    set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL")
+    set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
   endif(BUILD_btl_mkl)
 endif (MKL_FOUND)
diff --git a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
index eadbf0d..49ae90f 100644
--- a/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
+++ b/bench/btl/libs/C_BLAS/C_BLAS_interface.hh
@@ -25,6 +25,11 @@
 extern "C"
 {
 #include "cblas.h"
+#ifdef HAS_LAPACK
+  // Cholesky Factorization
+  void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
+  void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
+#endif
 }
 
 #define MAKE_STRING2(S) #S
@@ -53,31 +58,31 @@
     cblas_dgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
   }
 
-  static  inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
-  {
+  static  inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
     cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
   }
 
-  static  inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
-  {
+  static  inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
     cblas_dgemm(CblasColMajor,CblasTrans,CblasTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
   }
 
-  static  inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
-  {
+  static  inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
     cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
   }
 
-  static  inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
-  {
+  static  inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
     cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
   }
 
-  static  inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
-  {
+  static  inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
     cblas_daxpy(N,coef,X,1,Y,1);
   }
 
+  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+    cblas_dscal(N,b,Y,1);
+    cblas_daxpy(N,a,X,1,Y,1);
+  }
+
 };
 
 template<>
@@ -90,41 +95,62 @@
     return MAKE_STRING(CBLASNAME);
   }
 
-  static  inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
+  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
     cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
   }
 
-  static  inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
-  {
+  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
     cblas_sgemv(CblasColMajor,CblasTrans,N,N,1.0,A,N,B,1,0.0,X,1);
   }
 
-  static  inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
-  {
+  static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
     cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
   }
 
-  static  inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
-  {
+  static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
     cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
   }
 
-  static  inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
-  {
+  static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
     cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
   }
 
-  static  inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
-  {
+  static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
     cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
   }
 
-  static  inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N)
-  {
+  static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N){
     cblas_saxpy(N,coef,X,1,Y,1);
   }
 
+  static inline void axpby(float a, const gene_vector & X, float b, gene_vector & Y, int N){
+    cblas_sscal(N,b,Y,1);
+    cblas_saxpy(N,a,X,1,Y,1);
+  }
+
+  #ifdef HAS_LAPACK
+  static inline void cholesky(const gene_vector & X, gene_vector & C, int N){
+    cblas_scopy(N*N, X, 1, C, 1);
+    char uplo = 'L';
+    int info = 0;
+    spotrf_(&uplo, &N, C, &N, &info);
+//     float tmp[N];
+//     for (int j = 1; j < N; ++j)
+//     {
+//       int endSize = N-j-1;
+//       if (endSize>0) {
+          //cblas_sgemv(CblasColMajor, CblasNoTrans, N-j-1, j, ATL_rnone, A+j+1, lda, A+j, lda, ATL_rone, Ac+j+1,       1);
+//          cblas_sgemv(CblasColMajor, CblasNoTrans,endSize,j, 1.0,     &(C[j+1]),N, &(C[j]),N, 0.0,      &(C[j+1+N*j]),1);
+//       }
+//     }
+  }
+  #endif
+
+  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+    cblas_scopy(N, B, 1, X, 1);
+    cblas_strsv(CblasColMajor, CblasLower, CblasNoTrans, CblasNonUnit, N, L, N, X, 1);
+  }
+
 };
 
 
diff --git a/bench/btl/libs/C_BLAS/main.cpp b/bench/btl/libs/C_BLAS/main.cpp
index bef5d30..3e38d69 100644
--- a/bench/btl/libs/C_BLAS/main.cpp
+++ b/bench/btl/libs/C_BLAS/main.cpp
@@ -20,13 +20,11 @@
 #include "utilities.h"
 #include "C_BLAS_interface.hh"
 #include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_atv_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
+#include "basic_actions.hh"
+
+#ifdef HAS_LAPACK
+#include "action_cholesky.hh"
+#endif
 
 BTL_MAIN;
 
@@ -34,17 +32,21 @@
 {
 
   bench<Action_axpy<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_axpby<C_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 
   bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-
   bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 
   bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-
   bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-
   bench<Action_aat_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 
+  bench<Action_trisolve<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
+  #ifdef HAS_LAPACK
+  bench<Action_cholesky<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  #endif
+
   //bench<Action_lu_solve<C_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 
   return 0;
diff --git a/bench/btl/libs/STL/STL_interface.hh b/bench/btl/libs/STL/STL_interface.hh
index 5b1a384..d7ef9a2 100644
--- a/bench/btl/libs/STL/STL_interface.hh
+++ b/bench/btl/libs/STL/STL_interface.hh
@@ -146,6 +146,22 @@
       Y[i]+=coef*X[i];
   }
 
+  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+    for (int i=0;i<N;i++)
+      Y[i] = a*X[i] + b*Y[i];
+  }
+
+  static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
+    copy_vector(B,X,N);
+    for(int i=0; i<N; ++i)
+    {
+      X[i] /= L[i][i];
+      real tmp = X[i];
+      for (int j=i+1; j<N; ++j)
+        X[j] -= tmp * L[i][j];
+    }
+  }
+
   static inline real norm_diff(const stl_vector & A, const stl_vector & B)
   {
     int N=A.size();
diff --git a/bench/btl/libs/STL/main.cpp b/bench/btl/libs/STL/main.cpp
index 4cb8466..35f1864 100644
--- a/bench/btl/libs/STL/main.cpp
+++ b/bench/btl/libs/STL/main.cpp
@@ -20,19 +20,14 @@
 #include "utilities.h"
 #include "STL_interface.hh"
 #include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
 
 BTL_MAIN;
 
 int main()
 {
   bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
   bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
   bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
   bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
diff --git a/bench/btl/libs/eigen2/CMakeLists.txt b/bench/btl/libs/eigen2/CMakeLists.txt
index ba55ef5..f452dc9 100644
--- a/bench/btl/libs/eigen2/CMakeLists.txt
+++ b/bench/btl/libs/eigen2/CMakeLists.txt
@@ -3,13 +3,28 @@
 if (EIGEN2_FOUND)
 
   include_directories(${EIGEN2_INCLUDE_DIR})
-  btl_add_bench(btl_eigen2 main.cpp)
+  btl_add_bench(btl_eigen2_linear main_linear.cpp)
+  btl_add_bench(btl_eigen2_vecmat main_vecmat.cpp)
+  btl_add_bench(btl_eigen2_matmat main_matmat.cpp)
+  btl_add_bench(btl_eigen2_adv main_adv.cpp)
 
   IF(NOT BTL_NOVEC)
-    btl_add_bench(btl_eigen2_novec main.cpp)
-    if(BUILD_btl_eigen2_novec)
-      set_target_properties(btl_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
-    endif(BUILD_btl_eigen2_novec)
+    btl_add_bench(btl_eigen2_novec_linear main_linear.cpp)
+    btl_add_bench(btl_eigen2_novec_vecmat main_vecmat.cpp)
+    btl_add_bench(btl_eigen2_novec_matmat main_matmat.cpp)
+    btl_add_bench(btl_eigen2_novec_adv main_adv.cpp)
+    if(BUILD_btl_eigen2_novec_linear)
+      set_target_properties(btl_eigen2_novec_linear PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+    endif(BUILD_btl_eigen2_novec_linear)
+    if(BUILD_btl_eigen2_novec_vecmat)
+      set_target_properties(btl_eigen2_novec_vecmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+    endif(BUILD_btl_eigen2_novec_vecmat)
+    if(BUILD_btl_eigen2_novec_matmat)
+      set_target_properties(btl_eigen2_novec_matmat PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+    endif(BUILD_btl_eigen2_novec_matmat)
+    if(BUILD_btl_eigen2_novec_adv)
+      set_target_properties(btl_eigen2_novec_adv PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
+    endif(BUILD_btl_eigen2_novec_adv)
   ENDIF(NOT BTL_NOVEC)
 
   btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)
diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh
index 4ce4af1..13c7058 100644
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@@ -19,6 +19,7 @@
 #define EIGEN2_INTERFACE_HH
 
 #include <Eigen/Core>
+#include <Eigen/Cholesky>
 #include <vector>
 #include "btl.hh"
 
@@ -107,17 +108,21 @@
   }
 
   static inline void matrix_vector_product(const gene_matrix &  __restrict__  A, const gene_vector & __restrict__ B, gene_vector &  __restrict__ X, int N){
-    X = (A*B).lazy();
+    X = (A*B)/*.lazy()*/;
   }
 
   static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = (A.transpose()*B).lazy();
+    X = (A.transpose()*B)/*.lazy()*/;
   }
 
-  static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
+  static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
     Y += coef * X;
   }
 
+  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+    Y = a*X + b*Y;
+  }
+
   static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
     cible = source;
   }
@@ -126,6 +131,16 @@
     cible = source;
   }
 
+  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){
+    X = L.template marked<Lower>().inverseProduct(B);
+  }
+
+  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
+//     C = X;
+//     Cholesky<gene_matrix>::computeInPlace(C);
+    C = X.cholesky().matrixL();
+  }
+
 };
 
 #endif
diff --git a/bench/btl/libs/eigen2/main_adv.cpp b/bench/btl/libs/eigen2/main_adv.cpp
new file mode 100644
index 0000000..db38b67
--- /dev/null
+++ b/bench/btl/libs/eigen2/main_adv.cpp
@@ -0,0 +1,34 @@
+//=====================================================
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+//
+#include "utilities.h"
+#include "eigen2_interface.hh"
+#include "bench.hh"
+#include "action_trisolve.hh"
+#include "action_cholesky.hh"
+
+BTL_MAIN;
+
+int main()
+{
+  bench<Action_trisolve<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_cholesky<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
+  return 0;
+}
+
+
diff --git a/bench/btl/libs/eigen2/main_linear.cpp b/bench/btl/libs/eigen2/main_linear.cpp
new file mode 100644
index 0000000..e79927b
--- /dev/null
+++ b/bench/btl/libs/eigen2/main_linear.cpp
@@ -0,0 +1,34 @@
+//=====================================================
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+//
+#include "utilities.h"
+#include "eigen2_interface.hh"
+#include "bench.hh"
+#include "basic_actions.hh"
+
+BTL_MAIN;
+
+int main()
+{
+
+  bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_axpby<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  
+  return 0;
+}
+
+
diff --git a/bench/btl/libs/eigen2/main_matmat.cpp b/bench/btl/libs/eigen2/main_matmat.cpp
new file mode 100644
index 0000000..bdebc08
--- /dev/null
+++ b/bench/btl/libs/eigen2/main_matmat.cpp
@@ -0,0 +1,34 @@
+//=====================================================
+// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+//=====================================================
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+//
+#include "utilities.h"
+#include "eigen2_interface.hh"
+#include "bench.hh"
+#include "basic_actions.hh"
+
+BTL_MAIN;
+
+int main()
+{
+  bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
+  return 0;
+}
+
+
diff --git a/bench/btl/libs/eigen2/main.cpp b/bench/btl/libs/eigen2/main_vecmat.cpp
similarity index 63%
rename from bench/btl/libs/eigen2/main.cpp
rename to bench/btl/libs/eigen2/main_vecmat.cpp
index 086dd75..eadc9a8 100644
--- a/bench/btl/libs/eigen2/main.cpp
+++ b/bench/btl/libs/eigen2/main_vecmat.cpp
@@ -18,29 +18,15 @@
 #include "utilities.h"
 #include "eigen2_interface.hh"
 #include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
 
 BTL_MAIN;
 
 int main()
 {
-
   bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
-
-  bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
-//  bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//  bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
-//  bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
   bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 
-  //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
-
   return 0;
 }
 
diff --git a/bench/btl/libs/gmm/gmm_interface.hh b/bench/btl/libs/gmm/gmm_interface.hh
index 6a81fe9..7ef41c0 100644
--- a/bench/btl/libs/gmm/gmm_interface.hh
+++ b/bench/btl/libs/gmm/gmm_interface.hh
@@ -106,6 +106,10 @@
     gmm::add(gmm::scaled(X,coef), Y);
   }
 
+  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+    gmm::add(gmm::scaled(X,a), gmm::scaled(Y,b), Y);
+  }
+
   static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
     gmm::copy(source,cible);
   }
@@ -114,6 +118,12 @@
     gmm::copy(source,cible);
   }
 
+  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+    gmm::copy(B,X);
+    gmm::lower_tri_solve(L, X, false);
+  }
+
+
 };
 
 #endif
diff --git a/bench/btl/libs/gmm/main.cpp b/bench/btl/libs/gmm/main.cpp
index a05fd1b..26fdf76 100644
--- a/bench/btl/libs/gmm/main.cpp
+++ b/bench/btl/libs/gmm/main.cpp
@@ -18,13 +18,7 @@
 #include "utilities.h"
 #include "gmm_interface.hh"
 #include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
 
 BTL_MAIN;
 
@@ -32,12 +26,16 @@
 {
 
   bench<Action_axpy<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_axpby<gmm_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+
   bench<Action_matrix_vector_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
   bench<Action_atv_product<gmm_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
+
   bench<Action_matrix_matrix_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
   bench<Action_ata_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
   bench<Action_aat_product<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 
+  bench<Action_trisolve<gmm_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
   //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 
   return 0;
diff --git a/bench/btl/libs/hand_vec/hand_vec_interface.hh b/bench/btl/libs/hand_vec/hand_vec_interface.hh
index 538c03b..fb2e61c 100755
--- a/bench/btl/libs/hand_vec/hand_vec_interface.hh
+++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh
@@ -68,96 +68,152 @@
     #endif
   }
 
-static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
+  static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
   {
     asm("#begin matrix_vector_product");
     int AN = (N/PacketSize)*PacketSize;
-    int ANP = (AN/(4*PacketSize))*4*PacketSize;
+    int ANP = (AN/(2*PacketSize))*2*PacketSize;
     int bound = (N/4)*4;
     for (int i=0;i<N;i++)
       X[i] = 0;
 
     for (int i=0;i<bound;i+=4)
     {
-      real tmp0 = B[i];
-      Packet ptmp0 = ei_pset1(tmp0);
-      real tmp1 = B[i+1];
-      Packet ptmp1 = ei_pset1(tmp1);
-      real tmp2 = B[i+2];
-      Packet ptmp2 = ei_pset1(tmp2);
-      real tmp3 = B[i+3];
-      Packet ptmp3 = ei_pset1(tmp3);
-      int iN0 = i*N;
-      int iN1 = (i+1)*N;
-      int iN2 = (i+2)*N;
-      int iN3 = (i+3)*N;
+      register real* __restrict__ A0 = A + i*N;
+      register real* __restrict__ A1 = A + (i+1)*N;
+      register real* __restrict__ A2 = A + (i+2)*N;
+      register real* __restrict__ A3 = A + (i+3)*N;
+
+      Packet ptmp0 = ei_pset1(B[i]);
+      Packet ptmp1 = ei_pset1(B[i+1]);
+      Packet ptmp2 = ei_pset1(B[i+2]);
+      Packet ptmp3 = ei_pset1(B[i+3]);
+//       register Packet ptmp0, ptmp1, ptmp2, ptmp3;
+//       asm(
+//
+//           "movss     (%[B],%[j],4), %[ptmp0]  \n\t"
+//           "shufps   $0,%[ptmp0],%[ptmp0] \n\t"
+//           "movss    4(%[B],%[j],4), %[ptmp1]  \n\t"
+//           "shufps   $0,%[ptmp1],%[ptmp1] \n\t"
+//           "movss    8(%[B],%[j],4), %[ptmp2]  \n\t"
+//           "shufps   $0,%[ptmp2],%[ptmp2] \n\t"
+//           "movss   12(%[B],%[j],4), %[ptmp3]  \n\t"
+//           "shufps   $0,%[ptmp3],%[ptmp3] \n\t"
+//           : [ptmp0] "=x" (ptmp0),
+//             [ptmp1] "=x" (ptmp1),
+//             [ptmp2] "=x" (ptmp2),
+//             [ptmp3] "=x" (ptmp3)
+//           : [B] "r" (B),
+//             [j] "r" (size_t(i))
+//           : );
+
       if (AN>0)
       {
-//         int aligned0 = (iN0 % PacketSize);
-        int aligned1 = (iN1 % PacketSize);
+//         for (size_t j = 0;j<ANP;j+=8)
+//         {
+//           asm(
+//
+//           "movaps     (%[A0],%[j],4), %%xmm8  \n\t"
+//           "movaps   16(%[A0],%[j],4), %%xmm12 \n\t"
+//           "movups     (%[A3],%[j],4), %%xmm11 \n\t"
+//           "movups   16(%[A3],%[j],4), %%xmm15 \n\t"
+//           "movups     (%[A2],%[j],4), %%xmm10 \n\t"
+//           "movups   16(%[A2],%[j],4), %%xmm14 \n\t"
+//           "movups     (%[A1],%[j],4), %%xmm9  \n\t"
+//           "movups   16(%[A1],%[j],4), %%xmm13 \n\t"
+//
+//           "mulps %[ptmp0], %%xmm8  \n\t"
+//           "addps (%[res0],%[j],4), %%xmm8  \n\t"
+//           "mulps %[ptmp3], %%xmm11 \n\t"
+//           "addps %%xmm11, %%xmm8  \n\t"
+//           "mulps %[ptmp2], %%xmm10 \n\t"
+//           "addps %%xmm10, %%xmm8  \n\t"
+//           "mulps %[ptmp1], %%xmm9  \n\t"
+//           "addps %%xmm9, %%xmm8   \n\t"
+//           "movaps %%xmm8, (%[res0],%[j],4)  \n\t"
+//
+//           "mulps %[ptmp0], %%xmm12 \n\t"
+//           "addps 16(%[res0],%[j],4), %%xmm12  \n\t"
+//           "mulps %[ptmp3], %%xmm15 \n\t"
+//           "addps %%xmm15, %%xmm12  \n\t"
+//           "mulps %[ptmp2], %%xmm14 \n\t"
+//           "addps %%xmm14, %%xmm12  \n\t"
+//           "mulps %[ptmp1], %%xmm13 \n\t"
+//           "addps %%xmm13, %%xmm12  \n\t"
+//           "movaps %%xmm12, 16(%[res0],%[j],4) \n\t"
+//           :
+//           : [res0] "r" (X), [j] "r" (j),[A0] "r" (A0),
+//             [A1] "r" (A1),
+//             [A2] "r" (A2),
+//             [A3] "r" (A3),
+//             [ptmp0] "x" (ptmp0),
+//             [ptmp1] "x" (ptmp1),
+//             [ptmp2] "x" (ptmp2),
+//             [ptmp3] "x" (ptmp3)
+//           : "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "%r14");
+//         }
+          register Packet A00;
+          register Packet A01;
+          register Packet A02;
+          register Packet A03;
+          register Packet A10;
+          register Packet A11;
+          register Packet A12;
+          register Packet A13;
+          for (int j = 0;j<ANP;j+=2*PacketSize)
+          {
+//             A00 = ei_pload(&A0[j]);
+//             A01 = ei_ploadu(&A1[j]);
+//             A02 = ei_ploadu(&A2[j]);
+//             A03 = ei_ploadu(&A3[j]);
+//             A10 = ei_pload(&A0[j+PacketSize]);
+//             A11 = ei_ploadu(&A1[j+PacketSize]);
+//             A12 = ei_ploadu(&A2[j+PacketSize]);
+//             A13 = ei_ploadu(&A3[j+PacketSize]);
+//
+//             A00 = ei_pmul(ptmp0, A00);
+//             A01 = ei_pmul(ptmp1, A01);
+//             A02 = ei_pmul(ptmp2, A02);
+//             A03 = ei_pmul(ptmp3, A03);
+//             A10 = ei_pmul(ptmp0, A10);
+//             A11 = ei_pmul(ptmp1, A11);
+//             A12 = ei_pmul(ptmp2, A12);
+//             A13 = ei_pmul(ptmp3, A13);
+//
+//             A00 = ei_padd(A00,A01);
+//             A02 = ei_padd(A02,A03);
+//             A00 = ei_padd(A00,ei_pload(&X[j]));
+//             A00 = ei_padd(A00,A02);
+//             ei_pstore(&X[j],A00);
+//
+//             A10 = ei_padd(A10,A11);
+//             A12 = ei_padd(A12,A13);
+//             A10 = ei_padd(A10,ei_pload(&X[j+PacketSize]));
+//             A10 = ei_padd(A10,A12);
+//             ei_pstore(&X[j+PacketSize],A10);
 
-        if (aligned1==0)
-        {
-          for (int j = 0;j<AN;j+=PacketSize)
-          {
             ei_pstore(&X[j],
               ei_padd(ei_pload(&X[j]),
                 ei_padd(
-                  ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))),
-                  ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) )));
-          }
-        }
-        else if (aligned1==2)
-        {
-          for (int j = 0;j<AN;j+=PacketSize)
-          {
-            ei_pstore(&X[j],
-              ei_padd(ei_pload(&X[j]),
-                ei_padd(
-                  ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
-                  ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
-          }
-        }
-        else
-        {
-          for (int j = 0;j<ANP;j+=4*PacketSize)
-          {
-            ei_pstore(&X[j],
-              ei_padd(ei_pload(&X[j]),
-                ei_padd(
-                  ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
-                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+                  ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
+                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
 
             ei_pstore(&X[j+PacketSize],
               ei_padd(ei_pload(&X[j+PacketSize]),
                 ei_padd(
-                  ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))),
-                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) )));
-
-            ei_pstore(&X[j+2*PacketSize],
-              ei_padd(ei_pload(&X[j+2*PacketSize]),
-                ei_padd(
-                  ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
-                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
-
-            ei_pstore(&X[j+3*PacketSize],
-              ei_padd(ei_pload(&X[j+3*PacketSize]),
-                ei_padd(
-                  ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
-                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
-
+                  ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j+PacketSize])),ei_pmul(ptmp1,ei_ploadu(&A1[j+PacketSize]))),
+                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j+PacketSize])),ei_pmul(ptmp3,ei_ploadu(&A3[j+PacketSize]))) )));
           }
           for (int j = ANP;j<AN;j+=PacketSize)
             ei_pstore(&X[j],
               ei_padd(ei_pload(&X[j]),
                 ei_padd(
-                  ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
-                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
-        }
+                  ei_padd(ei_pmul(ptmp0,ei_pload(&A0[j])),ei_pmul(ptmp1,ei_ploadu(&A1[j]))),
+                  ei_padd(ei_pmul(ptmp2,ei_ploadu(&A2[j])),ei_pmul(ptmp3,ei_ploadu(&A3[j]))) )));
       }
       // process remaining scalars
       for (int j=AN;j<N;j++)
-        X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1];
+        X[j] += ei_pfirst(ptmp0) * A0[j] + ei_pfirst(ptmp1) * A1[j] + ei_pfirst(ptmp2) * A2[j] + ei_pfirst(ptmp3) * A3[j];
     }
     for (int i=bound;i<N;i++)
     {
@@ -185,6 +241,119 @@
 //   {
 //     asm("#begin matrix_vector_product");
 //     int AN = (N/PacketSize)*PacketSize;
+//     int ANP = (AN/(2*PacketSize))*2*PacketSize;
+//     int bound = (N/4)*4;
+//     for (int i=0;i<N;i++)
+//       X[i] = 0;
+//
+//     for (int i=0;i<bound;i+=4)
+//     {
+//       real tmp0 = B[i];
+//       Packet ptmp0 = ei_pset1(tmp0);
+//       real tmp1 = B[i+1];
+//       Packet ptmp1 = ei_pset1(tmp1);
+//       real tmp2 = B[i+2];
+//       Packet ptmp2 = ei_pset1(tmp2);
+//       real tmp3 = B[i+3];
+//       Packet ptmp3 = ei_pset1(tmp3);
+//       int iN0 = i*N;
+//       int iN1 = (i+1)*N;
+//       int iN2 = (i+2)*N;
+//       int iN3 = (i+3)*N;
+//       if (AN>0)
+//       {
+// //         int aligned0 = (iN0 % PacketSize);
+//         int aligned1 = (iN1 % PacketSize);
+//
+//         if (aligned1==0)
+//         {
+//           for (int j = 0;j<AN;j+=PacketSize)
+//           {
+//             ei_pstore(&X[j],
+//               ei_padd(ei_pload(&X[j]),
+//                 ei_padd(
+//                   ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_pload(&A[j+iN1]))),
+//                   ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_pload(&A[j+iN3]))) )));
+//           }
+//         }
+//         else if (aligned1==2)
+//         {
+//           for (int j = 0;j<AN;j+=PacketSize)
+//           {
+//             ei_pstore(&X[j],
+//               ei_padd(ei_pload(&X[j]),
+//                 ei_padd(
+//                   ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
+//                   ei_padd(ei_pmul(ptmp2,ei_pload(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+//           }
+//         }
+//         else
+//         {
+//           for (int j = 0;j<ANP;j+=2*PacketSize)
+//           {
+//             ei_pstore(&X[j],
+//               ei_padd(ei_pload(&X[j]),
+//                 ei_padd(
+//                   ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
+//                   ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+//
+//             ei_pstore(&X[j+PacketSize],
+//               ei_padd(ei_pload(&X[j+PacketSize]),
+//                 ei_padd(
+//                   ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+PacketSize+iN1]))),
+//                   ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+PacketSize+iN3]))) )));
+//
+// //             ei_pstore(&X[j+2*PacketSize],
+// //               ei_padd(ei_pload(&X[j+2*PacketSize]),
+// //                 ei_padd(
+// //                   ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+2*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+2*PacketSize+iN1]))),
+// //                   ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+2*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+2*PacketSize+iN3]))) )));
+// //
+// //             ei_pstore(&X[j+3*PacketSize],
+// //               ei_padd(ei_pload(&X[j+3*PacketSize]),
+// //                 ei_padd(
+// //                   ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+3*PacketSize+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+3*PacketSize+iN1]))),
+// //                   ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+3*PacketSize+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+3*PacketSize+iN3]))) )));
+//
+//           }
+//           for (int j = ANP;j<AN;j+=PacketSize)
+//             ei_pstore(&X[j],
+//               ei_padd(ei_pload(&X[j]),
+//                 ei_padd(
+//                   ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pmul(ptmp1,ei_ploadu(&A[j+iN1]))),
+//                   ei_padd(ei_pmul(ptmp2,ei_ploadu(&A[j+iN2])),ei_pmul(ptmp3,ei_ploadu(&A[j+iN3]))) )));
+//         }
+//       }
+//       // process remaining scalars
+//       for (int j=AN;j<N;j++)
+//         X[j] += tmp0 * A[j+iN0] + tmp1 * A[j+iN1] + tmp2 * A[j+iN2] + tmp3 * A[j+iN3];
+//     }
+//     for (int i=bound;i<N;i++)
+//     {
+//       real tmp0 = B[i];
+//       Packet ptmp0 = ei_pset1(tmp0);
+//       int iN0 = i*N;
+//       if (AN>0)
+//       {
+//         bool aligned0 = (iN0 % PacketSize) == 0;
+//         if (aligned0)
+//           for (int j = 0;j<AN;j+=PacketSize)
+//             ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_pload(&A[j+iN0])),ei_pload(&X[j])));
+//         else
+//           for (int j = 0;j<AN;j+=PacketSize)
+//             ei_pstore(&X[j], ei_padd(ei_pmul(ptmp0,ei_ploadu(&A[j+iN0])),ei_pload(&X[j])));
+//       }
+//       // process remaining scalars
+//       for (int j=AN;j<N;j++)
+//         X[j] += tmp0 * A[j+iN0];
+//     }
+//     asm("#end matrix_vector_product");
+//   }
+
+//   static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
+//   {
+//     asm("#begin matrix_vector_product");
+//     int AN = (N/PacketSize)*PacketSize;
 //     for (int i=0;i<N;i++)
 //       X[i] = 0;
 //
diff --git a/bench/btl/libs/mtl4/main.cpp b/bench/btl/libs/mtl4/main.cpp
index 8cf1f6f..94c81be 100644
--- a/bench/btl/libs/mtl4/main.cpp
+++ b/bench/btl/libs/mtl4/main.cpp
@@ -18,13 +18,8 @@
 #include "utilities.h"
 #include "mtl4_interface.hh"
 #include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_lu_solve.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
+#include "action_cholesky.hh"
 
 BTL_MAIN;
 
@@ -32,12 +27,17 @@
 {
 
   bench<Action_axpy<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_axpby<mtl4_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+
   bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
   bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
   bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 //   bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 //   bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 
+  bench<Action_trisolve<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+  bench<Action_cholesky<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
   return 0;
 }
 
diff --git a/bench/btl/libs/mtl4/mtl4_interface.hh b/bench/btl/libs/mtl4/mtl4_interface.hh
index 5beb936..a299fa8 100644
--- a/bench/btl/libs/mtl4/mtl4_interface.hh
+++ b/bench/btl/libs/mtl4/mtl4_interface.hh
@@ -19,6 +19,7 @@
 #define MTL4_INTERFACE_HH
 
 #include <boost/numeric/mtl/mtl.hpp>
+#include <boost/numeric/mtl/operation/cholesky.hpp>
 #include <vector>
 
 using namespace mtl;
@@ -81,6 +82,9 @@
 
   static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
     X = (A*B);
+//     morton_dense<double, doppled_64_row_mask> C(N,N);
+//     C = B;
+//     X = (A*C);
   }
 
   static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
@@ -88,11 +92,11 @@
   }
 
   static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
-//     X = (trans(A)*A);
+    X = (trans(A)*A);
   }
 
   static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
-//     X = (A*trans(A));
+    X = (A*trans(A));
   }
 
   static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
@@ -107,6 +111,19 @@
     Y += coef * X;
   }
 
+  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+    Y = a*X + b*Y;
+  }
+
+  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
+    C = X;
+    recursive_cholesky(C);
+  }
+
+  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+    X = lower_trisolve(L, B);
+  }
+
   static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
     cible = source;
   }
diff --git a/bench/btl/libs/ublas/main.cpp b/bench/btl/libs/ublas/main.cpp
index acc5936..6d286e1 100644
--- a/bench/btl/libs/ublas/main.cpp
+++ b/bench/btl/libs/ublas/main.cpp
@@ -20,18 +20,15 @@
 #include "utilities.h"
 #include "ublas_interface.hh"
 #include "bench.hh"
-#include "action_matrix_vector_product.hh"
-#include "action_matrix_matrix_product.hh"
-#include "action_axpy.hh"
-#include "action_ata_product.hh"
-#include "action_aat_product.hh"
-#include "action_atv_product.hh"
+#include "basic_actions.hh"
+#include "basic_actions.hh"
 
 BTL_MAIN;
 
 int main()
 {
   bench<Action_axpy<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
+  bench<Action_axpby<ublas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 
   bench<Action_matrix_vector_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
   bench<Action_atv_product<ublas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
@@ -40,6 +37,8 @@
   bench<Action_ata_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
   bench<Action_aat_product<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 
+  bench<Action_trisolve<ublas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
+
   return 0;
 }
 
diff --git a/bench/btl/libs/ublas/ublas_interface.hh b/bench/btl/libs/ublas/ublas_interface.hh
index 2572f8c..95cad51 100644
--- a/bench/btl/libs/ublas/ublas_interface.hh
+++ b/bench/btl/libs/ublas/ublas_interface.hh
@@ -23,7 +23,9 @@
 #include <boost/numeric/ublas/vector.hpp>
 #include <boost/numeric/ublas/matrix.hpp>
 #include <boost/numeric/ublas/io.hpp>
+#include <boost/numeric/ublas/triangular.hpp>
 
+using namespace boost::numeric;
 
 template <class real>
 class ublas_interface{
@@ -116,6 +118,10 @@
     Y.plus_assign(coef*X);
   }
 
+  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
+    Y = a*X + b*Y;
+  }
+
   static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
     // X =  prod(trans(A),A);
     X.assign(prod(trans(A),A));
@@ -126,6 +132,10 @@
     X.assign(prod(A,trans(A)));
   }
 
+  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
+    X = solve(L, B, ublas::lower_tag ());
+  }
+
 };
 
 #endif
diff --git a/bench/sparse_trisolver.cpp b/bench/sparse_trisolver.cpp
index 27362c1..c6b29e8 100644
--- a/bench/sparse_trisolver.cpp
+++ b/bench/sparse_trisolver.cpp
@@ -60,8 +60,8 @@
   BenchTimer timer;
   #if 1
   EigenSparseTriMatrix sm1(rows,cols);
-  VectorXf b = VectorXf::random(cols);
-  VectorXf x = VectorXf::random(cols);
+  VectorXf b = VectorXf::Random(cols);
+  VectorXf x = VectorXf::Random(cols);
 
   bool densedone = false;
 
@@ -171,9 +171,9 @@
     {
       timer.reset();
       for (int _j=0; _j<10; ++_j) {
-        Matrix4f m = Matrix4f::random();
-        Vector4f b = Vector4f::random();
-        Vector4f x = Vector4f::random();
+        Matrix4f m = Matrix4f::Random();
+        Vector4f b = Vector4f::Random();
+        Vector4f x = Vector4f::Random();
         timer.start();
         for (int _k=0; _k<1000000; ++_k) {
           b = m.inverseProduct(b);
@@ -186,9 +186,9 @@
     {
       timer.reset();
       for (int _j=0; _j<10; ++_j) {
-        Matrix4f m = Matrix4f::random();
-        Vector4f b = Vector4f::random();
-        Vector4f x = Vector4f::random();
+        Matrix4f m = Matrix4f::Random();
+        Vector4f b = Vector4f::Random();
+        Vector4f x = Vector4f::Random();
         timer.start();
         for (int _k=0; _k<1000000; ++_k) {
           m.inverseProductInPlace(x);