Fix a race in async tensor evaluation: Don't run on_done() until after device.deallocate() / evaluator.cleanup() complete, since the device might be destroyed after on_done() runs.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 4f72156..0fb0a92 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -592,8 +592,8 @@ : evaluator(expr, thread_pool), on_done(std::move(done)) {} ~TensorAsyncExecutorContext() { - on_done(); evaluator.cleanup(); + on_done(); } Evaluator evaluator; @@ -674,9 +674,9 @@ on_done(std::move(done)) {} ~TensorAsyncExecutorContext() { - on_done(); device.deallocate(tiling.buffer); evaluator.cleanup(); + on_done(); } const ThreadPoolDevice& device; @@ -755,9 +755,9 @@ on_done(std::move(done)) {} ~TensorAsyncExecutorContext() { - on_done(); device.deallocate(tiling.buffer); evaluator.cleanup(); + on_done(); } const ThreadPoolDevice& device;