Add a ThreadPoolInterface* getter for ThreadPoolDevice.

diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index ec6802e..ca9ba40 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h

@@ -169,7 +169,7 @@
 
   // parallelFor executes f with [0, n) arguments in parallel and waits for
   // completion. F accepts a half-open interval [first, last).
-  // Block size is choosen based on the iteration cost and resulting parallel
+  // Block size is chosen based on the iteration cost and resulting parallel
   // efficiency. If block_align is not nullptr, it is called to round up the
   // block size.
   void parallelFor(Index n, const TensorOpCost& cost,
@@ -261,6 +261,9 @@
     parallelFor(n, cost, nullptr, std::move(f));
   }
 
+  // Thread pool accessor.
+  ThreadPoolInterface* getPool() const { return pool_; }
+
  private:
   ThreadPoolInterface* pool_;
   int num_threads_;