From 5e21e0fe750a8f4eb5644a4d5637a9bb19a3929c Mon Sep 17 00:00:00 2001 From: Dmytro Dzhulgakov Date: Mon, 28 Jan 2019 23:39:17 -0800 Subject: [PATCH] Improve c2-aten tensor interop and add proper testing (#15860) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/15860 Few changes (which are harder to split in separate diffs, so together): - make conversion explicit (as they can throw to avoid surprises) - fix tensor legacy dispatch not initialized when tensor is created on C2 side - add a bunch of invariants to enforce Reviewed By: ezyang Differential Revision: D13596031 fbshipit-source-id: d20b601e06ba47aeff2f6e8e15769840e2d46108 --- aten/src/ATen/core/LegacyTypeDispatch.h | 17 +- aten/src/ATen/core/Tensor.cpp | 21 ++ aten/src/ATen/core/Tensor.h | 27 ++- aten/src/ATen/templates/Tensor.h | 27 ++- aten/src/ATen/test/CMakeLists.txt | 3 +- .../ATen/test/cuda_tensor_interop_test.cpp | 164 +++++++++++++++ aten/src/ATen/test/tensor_interop_test.cpp | 190 ++++++++++++++---- .../src/THC/generic/THCTensorMathPointwise.cu | 2 +- aten/tools/run_tests.sh | 3 + caffe2/core/tensor.cc | 16 ++ caffe2/core/tensor.h | 44 ++-- caffe2/operators/layer_norm_op.cc | 6 +- 12 files changed, 450 insertions(+), 70 deletions(-) create mode 100644 aten/src/ATen/test/cuda_tensor_interop_test.cpp diff --git a/aten/src/ATen/core/LegacyTypeDispatch.h b/aten/src/ATen/core/LegacyTypeDispatch.h index 9c9d44e84f..66bfcd65d5 100644 --- a/aten/src/ATen/core/LegacyTypeDispatch.h +++ b/aten/src/ATen/core/LegacyTypeDispatch.h @@ -176,9 +176,20 @@ struct CAFFE2_API AutoNonVariableTypeMode { inline Type& legacyTensorType(const TensorImpl& tensor) { // NB: It's valid to use getTypeRaw here, because the TensorImpl // could not have been created without initializing the Type first. - // TODO: This is not actually true via the Caffe2 codepath! Make - // it so. - return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable() && !at::NonVariableTypeMode::is_enabled()); + // NB: This is not actually true via the Caffe2 codepath! But we call + // initializeLegacyTypeDispatchFor in the right place. + return *globalLegacyTypeDispatch().getTypeRaw( + tensorTypeIdToBackend(tensor.type_id()), + typeMetaToScalarType(tensor.dtype()), + tensor.is_variable() && !at::NonVariableTypeMode::is_enabled()); +} + +inline void initializeLegacyTypeDispatchFor(const TensorImpl& tensor) { + // getType calls the right initialization + globalLegacyTypeDispatch().getType( + tensorTypeIdToBackend(tensor.type_id()), + typeMetaToScalarType(tensor.dtype()), + tensor.is_variable() && !at::NonVariableTypeMode::is_enabled()); } } // namespace at diff --git a/aten/src/ATen/core/Tensor.cpp b/aten/src/ATen/core/Tensor.cpp index 924688d40b..a6489c0a4e 100644 --- a/aten/src/ATen/core/Tensor.cpp +++ b/aten/src/ATen/core/Tensor.cpp @@ -6,6 +6,27 @@ namespace at { +void Tensor::enforce_invariants() { + if (impl_.get() == nullptr) { + throw std::runtime_error("TensorImpl with nullptr is not supported"); + } + // Following line throws if the method is not a POD data type or is not + // supported by ATen + scalar_type(); + if (defined()) { + AT_ASSERTM( + impl_->dtype_initialized(), + "Partially-initialized tensor not supported by at::Tensor"); + AT_ASSERTM( + impl_->storage_initialized(), + "Partially-initialized tensor not supported by at::Tensor"); + // Ensure LegacyTypeDispatch is initialized. In ATen it's done in tensor + // factory functions, but when we get a tensor from Caffe2 we might bypass + // those factory functions. + initializeLegacyTypeDispatchFor(*impl_); + } +} + void Tensor::print() const { if (defined()) { std::cerr << "[" << type().toString() << " " << sizes() << "]" << std::endl; diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index 6abf5fcbe9..ba4ccb4393 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -46,20 +46,34 @@ using TensorList = ArrayRef; // Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and // special care must be taken to handle this. class CAFFE2_API Tensor { -public: + public: Tensor(){}; - Tensor(c10::intrusive_ptr tensor_impl) + // This constructor should not be used by end users and is an implementation + // detail invoked by autogenerated code. + explicit Tensor( + c10::intrusive_ptr tensor_impl) : impl_(std::move(tensor_impl)) { if (impl_.get() == nullptr) { - throw std::runtime_error("TensorBaseImpl with nullptr not supported"); + throw std::runtime_error("TensorImpl with nullptr is not supported"); } } - Tensor(const Tensor&) = default; Tensor(Tensor&&) = default; - explicit Tensor(C10Tensor tensor) - : impl_(std::move(tensor).impl()) {} + + public: + // Creates a new wrapper from TensorImpl. Intentionally a free method because + // it should be used with care. Checks necessary invariants + static Tensor wrap_tensor_impl( + c10::intrusive_ptr tensor_impl) { + Tensor r(std::move(tensor_impl)); + r.enforce_invariants(); + return r; + } + + explicit Tensor(C10Tensor tensor) : impl_(std::move(tensor).impl()) { + enforce_invariants(); + } explicit operator C10Tensor() const & { return C10Tensor(impl_); @@ -708,6 +722,7 @@ class CAFFE2_API Tensor { friend struct WeakTensor; protected: + void enforce_invariants(); c10::intrusive_ptr impl_; }; diff --git a/aten/src/ATen/templates/Tensor.h b/aten/src/ATen/templates/Tensor.h index fe0cd0c157..ce69ef4bd5 100644 --- a/aten/src/ATen/templates/Tensor.h +++ b/aten/src/ATen/templates/Tensor.h @@ -46,20 +46,34 @@ using TensorList = ArrayRef; // Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and // special care must be taken to handle this. class CAFFE2_API Tensor { -public: + public: Tensor(){}; - Tensor(c10::intrusive_ptr tensor_impl) + // This constructor should not be used by end users and is an implementation + // detail invoked by autogenerated code. + explicit Tensor( + c10::intrusive_ptr tensor_impl) : impl_(std::move(tensor_impl)) { if (impl_.get() == nullptr) { - throw std::runtime_error("TensorBaseImpl with nullptr not supported"); + throw std::runtime_error("TensorImpl with nullptr is not supported"); } } - Tensor(const Tensor&) = default; Tensor(Tensor&&) = default; - explicit Tensor(C10Tensor tensor) - : impl_(std::move(tensor).impl()) {} + + public: + // Creates a new wrapper from TensorImpl. Intentionally a free method because + // it should be used with care. Checks necessary invariants + static Tensor wrap_tensor_impl( + c10::intrusive_ptr tensor_impl) { + Tensor r(std::move(tensor_impl)); + r.enforce_invariants(); + return r; + } + + explicit Tensor(C10Tensor tensor) : impl_(std::move(tensor).impl()) { + enforce_invariants(); + } explicit operator C10Tensor() const & { return C10Tensor(impl_); @@ -313,6 +327,7 @@ class CAFFE2_API Tensor { friend struct WeakTensor; protected: + void enforce_invariants(); c10::intrusive_ptr impl_; }; diff --git a/aten/src/ATen/test/CMakeLists.txt b/aten/src/ATen/test/CMakeLists.txt index d3f0d0134b..1d2457154b 100644 --- a/aten/src/ATen/test/CMakeLists.txt +++ b/aten/src/ATen/test/CMakeLists.txt @@ -29,7 +29,8 @@ list(APPEND ATen_CUDA_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/cuda_stream_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cuda_half_test.cu ${CMAKE_CURRENT_SOURCE_DIR}/cuda_optional_test.cu - ${CMAKE_CURRENT_SOURCE_DIR}/cuda_packedtensoraccessor_test.cu) + ${CMAKE_CURRENT_SOURCE_DIR}/cuda_packedtensoraccessor_test.cu + ${CMAKE_CURRENT_SOURCE_DIR}/cuda_tensor_interop_test.cpp) if (CUDNN_FOUND) list(APPEND ATen_CUDA_TEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/cuda_cudnn_test.cpp) diff --git a/aten/src/ATen/test/cuda_tensor_interop_test.cpp b/aten/src/ATen/test/cuda_tensor_interop_test.cpp new file mode 100644 index 0000000000..9877fd2e03 --- /dev/null +++ b/aten/src/ATen/test/cuda_tensor_interop_test.cpp @@ -0,0 +1,164 @@ +#include "gtest/gtest.h" + +#include "ATen/ATen.h" +#include +#include +#include +#include +#include + +// dumbest possible copies +template +T cuda_get(T* addr) { + T result; + CUDA_ENFORCE(cudaMemcpy(&result, addr, sizeof(T), cudaMemcpyDefault)); + return result; +} + +template +void cuda_set(T* addr, T value) { + CUDA_ENFORCE(cudaMemcpy(addr, &value, sizeof(T), cudaMemcpyDefault)); +} + +TEST(CUDACaffe2ToPytorch, SimpleLegacy) { + if (!at::cuda::is_available()) return; + caffe2::Tensor c2_tensor(caffe2::CUDA); + c2_tensor.Resize(4, 4); + auto data = c2_tensor.mutable_data(); + { + caffe2::CUDAContext context; + caffe2::math::Set(16, 777, data, &context); + } + at::Tensor at_tensor(c2_tensor); + ASSERT_TRUE(&at_tensor.type() != nullptr); + ASSERT_TRUE(at_tensor.is_cuda()); + + auto at_cpu = at_tensor.cpu(); + auto it = at_cpu.data(); + for (int64_t i = 0; i < 16; i++) { + ASSERT_EQ(it[i], 777); + } +} + +TEST(CUDACaffe2ToPytorch, Simple) { + if (!at::cuda::is_available()) return; + caffe2::Tensor c2_tensor = + caffe2::empty({4, 4}, at::dtype().device(caffe2::CUDA)); + auto data = c2_tensor.mutable_data(); + { + caffe2::CUDAContext context; + caffe2::math::Set(16, 777, data, &context); + } + at::Tensor at_tensor(c2_tensor); + ASSERT_TRUE(&at_tensor.type() != nullptr); + ASSERT_TRUE(at_tensor.is_cuda()); + + auto at_cpu = at_tensor.cpu(); + auto it = at_cpu.data(); + for (int64_t i = 0; i < 16; i++) { + ASSERT_EQ(it[i], 777); + } +} + +TEST(CUDACaffe2ToPytorch, Op) { + if (!at::cuda::is_available()) return; + caffe2::Tensor c2_tensor = + caffe2::empty({3, 3}, at::dtype().device(caffe2::CUDA)); + auto data = c2_tensor.mutable_data(); + { + caffe2::CUDAContext context; + caffe2::math::Set(9, 111, data, &context); + } + at::Tensor at_tensor(c2_tensor); + ASSERT_TRUE(at_tensor.is_cuda()); + + ASSERT_EQ(at::sum(at_tensor).item(), 999); +} + +TEST(CUDAPytorchToCaffe2, Op) { + if (!at::cuda::is_available()) return; + caffe2::Workspace workspace; + caffe2::NetDef net; + + auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA)); + auto at_tensor_b = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA)); + auto at_tensor_c = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA)); + + auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), caffe2::Tensor(at_tensor_a)); + auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), caffe2::Tensor(at_tensor_b)); + + // Test Alias + { + caffe2::Tensor c2_tensor_from_aten(at_tensor_c); + BlobSetTensor(workspace.CreateBlob("c"), c2_tensor_from_aten.Alias()); + } + + { + auto op = net.add_op(); + op->set_type("Sum"); + op->add_input("a"); + op->add_input("b"); + op->add_input("c"); + op->add_output("d"); + op->mutable_device_option()->set_device_type(caffe2::PROTO_CUDA); + } + + workspace.RunNetOnce(net); + + const auto& result = workspace.GetBlob("d")->Get(); + ASSERT_EQ(result.GetDeviceType(), caffe2::CUDA); + + auto data = result.data(); + for (int64_t i = 0; i < 25; i++) { + ASSERT_EQ(cuda_get(data + i), 3.0); + } + at::Tensor at_result(result); + ASSERT_TRUE(at_result.is_cuda()); + ASSERT_EQ(at::sum(at_result).item(), 75); +} + +TEST(CUDAPytorchToCaffe2, SharedStorageWrite) { + if (!at::cuda::is_available()) return; + auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA)); + auto at_tensor_b = at_tensor_a.view({25}); + + caffe2::Tensor c2_tensor_a(at_tensor_a); + caffe2::Tensor c2_tensor_b(at_tensor_b); + + // change is visible everywhere + cuda_set(c2_tensor_a.mutable_data() + 1, 123); + ASSERT_EQ(cuda_get(c2_tensor_b.mutable_data() + 1), 123); + ASSERT_EQ(at_tensor_a[0][1].item().to(), 123); + ASSERT_EQ(at_tensor_b[1].item().to(), 123); +} + +TEST(CUDAPytorchToCaffe2, MutualResizes) { + if (!at::cuda::is_available()) return; + auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA)); + + caffe2::Tensor c2_tensor(at_tensor); + + // change is visible + cuda_set(c2_tensor.mutable_data(), 123); + ASSERT_EQ(at_tensor[0][0].item().to(), 123); + + // resize PT tensor in smaller direction - storage is preserved + at_tensor.resize_({4, 4}); + cuda_set(c2_tensor.mutable_data() + 1, 234); + ASSERT_EQ(at_tensor[0][1].item().to(), 234); + + // resize PT tensor in larger direction - storage is preserved + at_tensor.resize_({6, 6}); + cuda_set(c2_tensor.mutable_data() + 2, 345); + ASSERT_EQ(at_tensor[0][2].item().to(), 345); + ASSERT_EQ(c2_tensor.sizes()[0], 6); + ASSERT_EQ(c2_tensor.sizes()[1], 6); + + // resize Caffe2 tensor - semantics are to NOT preserve the data, but the + // TensorImpl is still shared + c2_tensor.Resize(7, 7); + cuda_set(c2_tensor.mutable_data() + 3, 456); + ASSERT_EQ(at_tensor[0][3].item().to(), 456); + ASSERT_EQ(at_tensor.sizes()[0], 7); + ASSERT_EQ(at_tensor.sizes()[1], 7); +} diff --git a/aten/src/ATen/test/tensor_interop_test.cpp b/aten/src/ATen/test/tensor_interop_test.cpp index f926312b5f..6016142db1 100644 --- a/aten/src/ATen/test/tensor_interop_test.cpp +++ b/aten/src/ATen/test/tensor_interop_test.cpp @@ -4,58 +4,83 @@ #include #include -TEST(TestTensorInterop, Caffe2ToPytorchSimpleLegacy) { +TEST(Caffe2ToPytorch, SimpleLegacy) { caffe2::Tensor c2_tensor(caffe2::CPU); c2_tensor.Resize(4, 4); auto data = c2_tensor.mutable_data(); for (int64_t i = 0; i < 16; i++) { data[i] = i; } + at::Tensor at_tensor(c2_tensor); + ASSERT_TRUE(&at_tensor.type() != nullptr); - // TODO: find out why calling data on tensor doesn't work - at::Tensor at_tensor(c2_tensor.getIntrusivePtr()); - at::TensorImpl* impl = at_tensor.unsafeGetTensorImpl(); - - auto it = impl->data(); + auto it = at_tensor.data(); for (int64_t i = 0; i < 16; i++) { ASSERT_EQ(it[i], i); } } -TEST(TestTensorInterop, Caffe2ToPytorchSimple) { +TEST(Caffe2ToPytorch, Simple) { caffe2::Tensor c2_tensor = caffe2::empty({4, 4}, at::kLong); auto data = c2_tensor.mutable_data(); for (int64_t i = 0; i < 16; i++) { data[i] = i; } - at::Tensor at_tensor(c2_tensor.getIntrusivePtr()); - at::TensorImpl* impl = at_tensor.unsafeGetTensorImpl(); + at::Tensor at_tensor(c2_tensor); + ASSERT_TRUE(&at_tensor.type() != nullptr); - auto it = impl->data(); + auto it = at_tensor.data(); for (int64_t i = 0; i < 16; i++) { ASSERT_EQ(it[i], i); } } -TEST(TestTensorInterop, Caffe2ToPytorchOp) { +TEST(Caffe2ToPytorch, Op) { caffe2::Tensor c2_tensor(caffe2::CPU); c2_tensor.Resize(3, 3); auto data = c2_tensor.mutable_data(); for (int64_t i = 0; i < 9; i++) { data[i] = i; } - at::Tensor at_tensor(c2_tensor.getIntrusivePtr()); + at::Tensor at_tensor(c2_tensor); ASSERT_EQ(at::sum(at_tensor).item(), 36); } -TEST(TestTensorInterop, Caffe2ToPytorchUnsupportedDevice) { - caffe2::Tensor c2_tensor(caffe2::IDEEP); - at::Tensor at_tensor(c2_tensor.getIntrusivePtr()); - ASSERT_ANY_THROW(at::sum(at_tensor)); +// Caffe2 doesn't actually have another always-on backend that is not CPU or GPU +// TEST(Caffe2ToPytorch, UnsupportedDevice) { +// caffe2::Tensor c2_tensor(caffe2::OPENGL); +// c2_tensor.Resize(4, 4); +// c2_tensor.mutable_data(); +// at::Tensor at_tensor(c2_tensor); +// ASSERT_ANY_THROW(at::sum(at_tensor)); +// } + +TEST(Caffe2ToPytorch, PartiallyInitialized) { + // These APIs for partially initialized tensors should go away soon, in the + // meantime ensure they are caught + { + // no dtype, no storage + caffe2::Tensor c2_tensor(caffe2::CPU); + ASSERT_ANY_THROW(at::Tensor at_tensor(c2_tensor)); + } + { + // storage, no dtype + caffe2::Tensor c2_tensor(caffe2::CPU); + c2_tensor.Resize(4,4); + ASSERT_ANY_THROW(at::Tensor at_tensor(c2_tensor)); + } + { + // dtype, no storage + caffe2::Tensor c2_tensor(caffe2::CPU); + c2_tensor.Resize(4,4); + c2_tensor.mutable_data(); + c2_tensor.FreeMemory(); + ASSERT_ANY_THROW(at::Tensor at_tensor(c2_tensor)); + } } -TEST(TestTensorInterop, PytorchToCaffe2Op) { +TEST(PytorchToCaffe2, Op) { caffe2::Workspace workspace; caffe2::NetDef net; @@ -63,14 +88,13 @@ TEST(TestTensorInterop, PytorchToCaffe2Op) { auto at_tensor_b = at::ones({5, 5}, at::dtype(at::kFloat)); auto at_tensor_c = at::ones({5, 5}, at::dtype(at::kFloat)); - auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), at_tensor_a.getIntrusivePtr()); - auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), at_tensor_b.getIntrusivePtr()); + auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), caffe2::Tensor(at_tensor_a)); + auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), caffe2::Tensor(at_tensor_b)); // Test Alias { - caffe2::Tensor c2_tensor_from_aten(at_tensor_c.getIntrusivePtr()); + caffe2::Tensor c2_tensor_from_aten(at_tensor_c); BlobSetTensor(workspace.CreateBlob("c"), c2_tensor_from_aten.Alias()); - } { @@ -90,19 +114,19 @@ TEST(TestTensorInterop, PytorchToCaffe2Op) { for (int64_t i = 0; i < 25; i++) { ASSERT_EQ(it[i], 3.0); } - at::Tensor at_result(result.getIntrusivePtr()); + at::Tensor at_result(result); ASSERT_EQ(at::sum(at_result).item(), 75); } -TEST(TestTensorInterop, PytorchToCaffe2SharedStorage) { +TEST(PytorchToCaffe2, SharedStorageRead) { caffe2::Workspace workspace; caffe2::NetDef net; auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat)); auto at_tensor_b = at_tensor_a.view({5, 5}); - auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), at_tensor_a.getIntrusivePtr()); - auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), at_tensor_b.getIntrusivePtr()); + auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), caffe2::Tensor(at_tensor_a)); + auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), caffe2::Tensor(at_tensor_b)); { auto op = net.add_op(); @@ -119,23 +143,115 @@ TEST(TestTensorInterop, PytorchToCaffe2SharedStorage) { for (int64_t i = 0; i < 25; i++) { ASSERT_EQ(it[i], 2.0); } - at::Tensor at_result(result.getIntrusivePtr()); + at::Tensor at_result(result); ASSERT_EQ(at::sum(at_result).item(), 50); } -TEST(TestTensorInterop, PytorchToCaffe2Strided) { - caffe2::Workspace workspace; - caffe2::NetDef net; +TEST(PytorchToCaffe2, SharedStorageWrite) { + auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat)); + auto at_tensor_b = at_tensor_a.view({25}); - auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat)).t(); - auto* c2_tensor = BlobSetTensor(workspace.CreateBlob("blob"), at_tensor.getIntrusivePtr()); + caffe2::Tensor c2_tensor_a(at_tensor_a); + caffe2::Tensor c2_tensor_b(at_tensor_b); - { - auto op = net.add_op(); - op->set_type("Sum"); - op->add_input("blob"); - op->add_output("out"); + // change is visible everywhere + c2_tensor_a.mutable_data()[1] = 123; + ASSERT_EQ(c2_tensor_b.mutable_data()[1], 123); + ASSERT_EQ(at_tensor_a[0][1].item().to(), 123); + ASSERT_EQ(at_tensor_b[1].item().to(), 123); +} + +TEST(PytorchToCaffe2, MutualResizes) { + auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat)); + + caffe2::Tensor c2_tensor(at_tensor); + + // change is visible + c2_tensor.mutable_data()[0] = 123; + ASSERT_EQ(at_tensor[0][0].item().to(), 123); + + // resize PT tensor in smaller direction - storage is preserved + at_tensor.resize_({4, 4}); + c2_tensor.mutable_data()[1] = 234; + ASSERT_EQ(at_tensor[0][1].item().to(), 234); + + // resize PT tensor in larger direction - storage is preserved + at_tensor.resize_({6, 6}); + c2_tensor.mutable_data()[2] = 345; + ASSERT_EQ(at_tensor[0][2].item().to(), 345); + ASSERT_EQ(c2_tensor.sizes()[0], 6); + ASSERT_EQ(c2_tensor.sizes()[1], 6); + + // resize Caffe2 tensor - semantics are to NOT preserve the data, but the + // TensorImpl is still shared + c2_tensor.Resize(7, 7); + c2_tensor.mutable_data()[3] = 456; + ASSERT_EQ(at_tensor[0][3].item().to(), 456); + ASSERT_EQ(at_tensor.sizes()[0], 7); + ASSERT_EQ(at_tensor.sizes()[1], 7); +} + +TEST(PytorchToCaffe2, Strided) { + auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat)).t(); + ASSERT_ANY_THROW(caffe2::Tensor c2_tensor(at_tensor)); + // but calling contiguous is fine + caffe2::Tensor c2_tensor(at_tensor.contiguous()); + for (int64_t i = 0; i < 25; i++) { + ASSERT_EQ(c2_tensor.data()[i], 1.0); } +} + +TEST(PytorchToCaffe2, InplaceStrided) { + auto at_tensor = at::zeros({2, 5}, at::dtype(at::kFloat)); + caffe2::Tensor c2_tensor(at_tensor); + ASSERT_EQ(c2_tensor.sizes()[0], 2); + ASSERT_EQ(c2_tensor.sizes()[1], 5); + + c2_tensor.mutable_data()[1] = 234; + ASSERT_EQ(at_tensor[0][1].item().to(), 234); + + at_tensor.t_(); + ASSERT_EQ(c2_tensor.sizes()[0], 5); + ASSERT_EQ(c2_tensor.sizes()[1], 2); + // This is BROKEN situation, however checking is_contiguous on every data + // access is expensive. We rely on user to not do crazy stuff. + ASSERT_EQ(at_tensor[1][0].item().to(), 234); + ASSERT_EQ(c2_tensor.data()[1], 234); +} + +TEST(PytorchToCaffe2, NonRegularTensor) { + at::Tensor at_tensor = + at::empty({2, 3}, at::dtype().layout(at::kSparse)); + ASSERT_TRUE(at_tensor.is_sparse()); + ASSERT_ANY_THROW(caffe2::Tensor c2_tensor(at_tensor)); +} + +// With current build system it's too bothersome to set it up, but the test +// passes +// TEST(PytorchToCaffe2, Variable) { +// at::Tensor var = +// torch::autograd::make_variable(at::empty({2, 3}, at::dtype())); +// ASSERT_TRUE(var.is_variable()); +// ASSERT_ANY_THROW(caffe2::Tensor c2_tensor(var)); +// } + +TEST(Caffe2ToPytorch, NonPOD) { + caffe2::Tensor c2_tensor = caffe2::empty({1}, at::dtype()); + auto data = c2_tensor.mutable_data(); + *data = "test"; + ASSERT_ANY_THROW(at::Tensor at_tensor(c2_tensor)); +} + +TEST(Caffe2ToPytorch, Nullptr) { + caffe2::Tensor c2_tensor; + ASSERT_FALSE(c2_tensor.defined()); + at::Tensor at_tensor(c2_tensor); + ASSERT_FALSE(at_tensor.defined()); +} - ASSERT_ANY_THROW(workspace.RunNetOnce(net)); +TEST(PytorchToCaffe2, Nullptr) { + at::Tensor at_tensor; + ASSERT_FALSE(at_tensor.defined()); + caffe2::Tensor c2_tensor(at_tensor); + ASSERT_FALSE(c2_tensor.defined()); } diff --git a/aten/src/THC/generic/THCTensorMathPointwise.cu b/aten/src/THC/generic/THCTensorMathPointwise.cu index e3ef097515..82fdd2ac1d 100644 --- a/aten/src/THC/generic/THCTensorMathPointwise.cu +++ b/aten/src/THC/generic/THCTensorMathPointwise.cu @@ -241,7 +241,7 @@ void THCTensor_(cadd)(THCState *state, THCTensor *self_, THCTensor* src1, scalar #else auto alpha = value; #endif - at::add_out(out, retainTensorImpl(src1), retainTensorImpl(src2), alpha); + at::add_out(out, at::Tensor(retainTensorImpl(src1)), at::Tensor(retainTensorImpl(src2)), alpha); } void THCTensor_(csub)(THCState *state, THCTensor *self_, THCTensor* src1, scalar_t value, THCTensor *src2) diff --git a/aten/tools/run_tests.sh b/aten/tools/run_tests.sh index c2a0d2f47f..8ed5a21e2d 100755 --- a/aten/tools/run_tests.sh +++ b/aten/tools/run_tests.sh @@ -35,6 +35,9 @@ fi if [[ -x ./cuda_optional_test ]]; then ./cuda_optional_test fi +if [[ -x ./cuda_tensor_interop_test ]]; then + ./cuda_tensor_interop_test +fi if [ "$VALGRIND" == "ON" ] then valgrind --suppressions="$VALGRIND_SUP" --error-exitcode=1 ./basic "[cpu]" diff --git a/caffe2/core/tensor.cc b/caffe2/core/tensor.cc index 86b591ea73..8bd5190710 100644 --- a/caffe2/core/tensor.cc +++ b/caffe2/core/tensor.cc @@ -179,6 +179,22 @@ void ReinitializeAndCopyFrom( t->CopyFrom(src, async); } +void Tensor::enforce_invariants() { + if (impl_.get() == nullptr) { + throw std::runtime_error("TensorImpl with nullptr is not supported"); + } + CAFFE_ENFORCE( + !impl_->is_variable(), + "Caffe2 tensor wrapper doesn't support autograd variables"); + CAFFE_ENFORCE_EQ( + impl_->layout(), + at::kStrided, + "Caffe2 tensor wrapper supports only regular non-sparse tensors"); + CAFFE_ENFORCE( + impl_->is_contiguous(), + "Caffe2 tensor wrapper supports only contiguous tensors"); +} + namespace { struct TensorStatGetter : BlobStatGetter { diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h index aca14feea0..58a8f29e4d 100644 --- a/caffe2/core/tensor.h +++ b/caffe2/core/tensor.h @@ -31,14 +31,10 @@ class CAFFE2_API Tensor final { using TensorImplPtr = c10::intrusive_ptr; TensorImplPtr impl_; + void enforce_invariants(); + public: Tensor() : impl_() {} - Tensor(c10::intrusive_ptr tensor_impl) - : impl_(std::move(tensor_impl)) { - if (impl_.get() == nullptr) { - throw std::runtime_error("TensorBaseImpl with nullptr not supported"); - } - } // caffe2::Tensor is explicitly marked as moveable-only because before // the refactoring the class used to be a value type and a lot of user code @@ -81,12 +77,6 @@ class CAFFE2_API Tensor final { )) { } - /** - * @brief Creates a caffe2 tensor from an ATen tensor - */ - explicit Tensor(const at::Tensor& tensor) - : impl_(std::move(tensor.getIntrusivePtr())) {} - /** * @brief Creates a tensor of the given dimension. * @@ -121,8 +111,34 @@ class CAFFE2_API Tensor final { CopyFrom(src); } - explicit Tensor(C10Tensor tensor) - : impl_(std::move(tensor).impl()) {} + /** + * @brief Mutual conversion with at::Tensor + * + * The tensor will share the same instance (data, strides, sizes, etc) but + * a different subset of APIs would be available + */ + explicit Tensor(const at::Tensor& tensor) + : impl_(std::move(tensor.getIntrusivePtr())) { + enforce_invariants(); + } + + explicit operator at::Tensor() const& { + return at::Tensor::wrap_tensor_impl(impl_); + } + + explicit operator at::Tensor() && { + return at::Tensor::wrap_tensor_impl(std::move(impl_)); + } + + /** + * @brief Mutual conversion with C10Tensor + * + * The tensor will share the same instance (data, strides, sizes, etc) but + * a different subset of APIs would be available + */ + explicit Tensor(C10Tensor tensor) : impl_(std::move(tensor).impl()) { + enforce_invariants(); + } explicit operator C10Tensor() const & { return C10Tensor(impl_); diff --git a/caffe2/operators/layer_norm_op.cc b/caffe2/operators/layer_norm_op.cc index c06bbdc844..9364883fcf 100644 --- a/caffe2/operators/layer_norm_op.cc +++ b/caffe2/operators/layer_norm_op.cc @@ -1,5 +1,7 @@ #include "caffe2/operators/layer_norm_op.h" #include "caffe2/utils/eigen_utils.h" +#include +#include #include #include #include @@ -202,10 +204,10 @@ c10::IValue layer_norm_c10(c10::ArrayRef inputs, c10::KernelState* caffe2::CPUContext context; State* cache = static_cast(state); if (!cache->scale.has_value()) { - cache->scale = at::Tensor(c10::C10Tensor(caffe2::Tensor{caffe2::CPU})); + cache->scale = at::empty({0}, at::dtype()); } if (!cache->bias.has_value()) { - cache->bias = at::Tensor(c10::C10Tensor(caffe2::Tensor{caffe2::CPU})); + cache->bias = at::empty({0}, at::dtype()); } caffe2::Tensor scale(*cache->scale); caffe2::Tensor bias(*cache->bias);