Skip to content

Commit

Permalink
Improve c2-aten tensor interop and add proper testing (#15860)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch/pytorch#15860

Few changes (which are harder to split in separate diffs, so together):
- make conversion explicit (as they can throw to avoid surprises)
- fix tensor legacy dispatch not initialized when tensor is created on C2 side
- add a bunch of invariants to enforce

Reviewed By: ezyang

Differential Revision: D13596031

fbshipit-source-id: d20b601e06ba47aeff2f6e8e15769840e2d46108
  • Loading branch information
Dmytro Dzhulgakov authored and facebook-github-bot committed Jan 29, 2019
1 parent 9d6be6a commit 5e21e0f
Show file tree
Hide file tree
Showing 12 changed files with 450 additions and 70 deletions.
17 changes: 14 additions & 3 deletions aten/src/ATen/core/LegacyTypeDispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,20 @@ struct CAFFE2_API AutoNonVariableTypeMode {
inline Type& legacyTensorType(const TensorImpl& tensor) {
// NB: It's valid to use getTypeRaw here, because the TensorImpl
// could not have been created without initializing the Type first.
// TODO: This is not actually true via the Caffe2 codepath! Make
// it so.
return *globalLegacyTypeDispatch().getTypeRaw(tensorTypeIdToBackend(tensor.type_id()), typeMetaToScalarType(tensor.dtype()), tensor.is_variable() && !at::NonVariableTypeMode::is_enabled());
// NB: This is not actually true via the Caffe2 codepath! But we call
// initializeLegacyTypeDispatchFor in the right place.
return *globalLegacyTypeDispatch().getTypeRaw(
tensorTypeIdToBackend(tensor.type_id()),
typeMetaToScalarType(tensor.dtype()),
tensor.is_variable() && !at::NonVariableTypeMode::is_enabled());
}

inline void initializeLegacyTypeDispatchFor(const TensorImpl& tensor) {
// getType calls the right initialization
globalLegacyTypeDispatch().getType(
tensorTypeIdToBackend(tensor.type_id()),
typeMetaToScalarType(tensor.dtype()),
tensor.is_variable() && !at::NonVariableTypeMode::is_enabled());
}

} // namespace at
21 changes: 21 additions & 0 deletions aten/src/ATen/core/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,27 @@

namespace at {

void Tensor::enforce_invariants() {
if (impl_.get() == nullptr) {
throw std::runtime_error("TensorImpl with nullptr is not supported");
}
// Following line throws if the method is not a POD data type or is not
// supported by ATen
scalar_type();
if (defined()) {
AT_ASSERTM(
impl_->dtype_initialized(),
"Partially-initialized tensor not supported by at::Tensor");
AT_ASSERTM(
impl_->storage_initialized(),
"Partially-initialized tensor not supported by at::Tensor");
// Ensure LegacyTypeDispatch is initialized. In ATen it's done in tensor
// factory functions, but when we get a tensor from Caffe2 we might bypass
// those factory functions.
initializeLegacyTypeDispatchFor(*impl_);
}
}

void Tensor::print() const {
if (defined()) {
std::cerr << "[" << type().toString() << " " << sizes() << "]" << std::endl;
Expand Down
27 changes: 21 additions & 6 deletions aten/src/ATen/core/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,34 @@ using TensorList = ArrayRef<Tensor>;
// Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and
// special care must be taken to handle this.
class CAFFE2_API Tensor {
public:
public:
Tensor(){};
Tensor(c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
// This constructor should not be used by end users and is an implementation
// detail invoked by autogenerated code.
explicit Tensor(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
: impl_(std::move(tensor_impl)) {
if (impl_.get() == nullptr) {
throw std::runtime_error("TensorBaseImpl with nullptr not supported");
throw std::runtime_error("TensorImpl with nullptr is not supported");
}
}

Tensor(const Tensor&) = default;
Tensor(Tensor&&) = default;

explicit Tensor(C10Tensor tensor)
: impl_(std::move(tensor).impl()) {}

public:
// Creates a new wrapper from TensorImpl. Intentionally a free method because
// it should be used with care. Checks necessary invariants
static Tensor wrap_tensor_impl(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl) {
Tensor r(std::move(tensor_impl));
r.enforce_invariants();
return r;
}

explicit Tensor(C10Tensor tensor) : impl_(std::move(tensor).impl()) {
enforce_invariants();
}

explicit operator C10Tensor() const & {
return C10Tensor(impl_);
Expand Down Expand Up @@ -708,6 +722,7 @@ class CAFFE2_API Tensor {
friend struct WeakTensor;

protected:
void enforce_invariants();
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> impl_;
};

Expand Down
27 changes: 21 additions & 6 deletions aten/src/ATen/templates/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,34 @@ using TensorList = ArrayRef<Tensor>;
// Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and
// special care must be taken to handle this.
class CAFFE2_API Tensor {
public:
public:
Tensor(){};
Tensor(c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
// This constructor should not be used by end users and is an implementation
// detail invoked by autogenerated code.
explicit Tensor(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl)
: impl_(std::move(tensor_impl)) {
if (impl_.get() == nullptr) {
throw std::runtime_error("TensorBaseImpl with nullptr not supported");
throw std::runtime_error("TensorImpl with nullptr is not supported");
}
}

Tensor(const Tensor&) = default;
Tensor(Tensor&&) = default;

explicit Tensor(C10Tensor tensor)
: impl_(std::move(tensor).impl()) {}

public:
// Creates a new wrapper from TensorImpl. Intentionally a free method because
// it should be used with care. Checks necessary invariants
static Tensor wrap_tensor_impl(
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> tensor_impl) {
Tensor r(std::move(tensor_impl));
r.enforce_invariants();
return r;
}

explicit Tensor(C10Tensor tensor) : impl_(std::move(tensor).impl()) {
enforce_invariants();
}

explicit operator C10Tensor() const & {
return C10Tensor(impl_);
Expand Down Expand Up @@ -313,6 +327,7 @@ class CAFFE2_API Tensor {
friend struct WeakTensor;

protected:
void enforce_invariants();
c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl> impl_;
};

Expand Down
3 changes: 2 additions & 1 deletion aten/src/ATen/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ list(APPEND ATen_CUDA_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cuda_stream_test.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cuda_half_test.cu
${CMAKE_CURRENT_SOURCE_DIR}/cuda_optional_test.cu
${CMAKE_CURRENT_SOURCE_DIR}/cuda_packedtensoraccessor_test.cu)
${CMAKE_CURRENT_SOURCE_DIR}/cuda_packedtensoraccessor_test.cu
${CMAKE_CURRENT_SOURCE_DIR}/cuda_tensor_interop_test.cpp)
if (CUDNN_FOUND)
list(APPEND ATen_CUDA_TEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cuda_cudnn_test.cpp)
Expand Down
164 changes: 164 additions & 0 deletions aten/src/ATen/test/cuda_tensor_interop_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#include "gtest/gtest.h"

#include "ATen/ATen.h"
#include <ATen/cuda/CUDAContext.h>
#include <caffe2/core/init.h>
#include <caffe2/core/operator.h>
#include <caffe2/core/context_gpu.h>
#include <caffe2/utils/math.h>

// dumbest possible copies
template<typename T>
T cuda_get(T* addr) {
T result;
CUDA_ENFORCE(cudaMemcpy(&result, addr, sizeof(T), cudaMemcpyDefault));
return result;
}

template<typename T>
void cuda_set(T* addr, T value) {
CUDA_ENFORCE(cudaMemcpy(addr, &value, sizeof(T), cudaMemcpyDefault));
}

TEST(CUDACaffe2ToPytorch, SimpleLegacy) {
if (!at::cuda::is_available()) return;
caffe2::Tensor c2_tensor(caffe2::CUDA);
c2_tensor.Resize(4, 4);
auto data = c2_tensor.mutable_data<int64_t>();
{
caffe2::CUDAContext context;
caffe2::math::Set<int64_t>(16, 777, data, &context);
}
at::Tensor at_tensor(c2_tensor);
ASSERT_TRUE(&at_tensor.type() != nullptr);
ASSERT_TRUE(at_tensor.is_cuda());

auto at_cpu = at_tensor.cpu();
auto it = at_cpu.data<int64_t>();
for (int64_t i = 0; i < 16; i++) {
ASSERT_EQ(it[i], 777);
}
}

TEST(CUDACaffe2ToPytorch, Simple) {
if (!at::cuda::is_available()) return;
caffe2::Tensor c2_tensor =
caffe2::empty({4, 4}, at::dtype<int64_t>().device(caffe2::CUDA));
auto data = c2_tensor.mutable_data<int64_t>();
{
caffe2::CUDAContext context;
caffe2::math::Set<int64_t>(16, 777, data, &context);
}
at::Tensor at_tensor(c2_tensor);
ASSERT_TRUE(&at_tensor.type() != nullptr);
ASSERT_TRUE(at_tensor.is_cuda());

auto at_cpu = at_tensor.cpu();
auto it = at_cpu.data<int64_t>();
for (int64_t i = 0; i < 16; i++) {
ASSERT_EQ(it[i], 777);
}
}

TEST(CUDACaffe2ToPytorch, Op) {
if (!at::cuda::is_available()) return;
caffe2::Tensor c2_tensor =
caffe2::empty({3, 3}, at::dtype<int64_t>().device(caffe2::CUDA));
auto data = c2_tensor.mutable_data<int64_t>();
{
caffe2::CUDAContext context;
caffe2::math::Set<int64_t>(9, 111, data, &context);
}
at::Tensor at_tensor(c2_tensor);
ASSERT_TRUE(at_tensor.is_cuda());

ASSERT_EQ(at::sum(at_tensor).item<int64_t>(), 999);
}

TEST(CUDAPytorchToCaffe2, Op) {
if (!at::cuda::is_available()) return;
caffe2::Workspace workspace;
caffe2::NetDef net;

auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
auto at_tensor_b = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
auto at_tensor_c = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));

auto* c2_tensor_a = BlobSetTensor(workspace.CreateBlob("a"), caffe2::Tensor(at_tensor_a));
auto* c2_tensor_b = BlobSetTensor(workspace.CreateBlob("b"), caffe2::Tensor(at_tensor_b));

// Test Alias
{
caffe2::Tensor c2_tensor_from_aten(at_tensor_c);
BlobSetTensor(workspace.CreateBlob("c"), c2_tensor_from_aten.Alias());
}

{
auto op = net.add_op();
op->set_type("Sum");
op->add_input("a");
op->add_input("b");
op->add_input("c");
op->add_output("d");
op->mutable_device_option()->set_device_type(caffe2::PROTO_CUDA);
}

workspace.RunNetOnce(net);

const auto& result = workspace.GetBlob("d")->Get<caffe2::Tensor>();
ASSERT_EQ(result.GetDeviceType(), caffe2::CUDA);

auto data = result.data<float>();
for (int64_t i = 0; i < 25; i++) {
ASSERT_EQ(cuda_get(data + i), 3.0);
}
at::Tensor at_result(result);
ASSERT_TRUE(at_result.is_cuda());
ASSERT_EQ(at::sum(at_result).item<float>(), 75);
}

TEST(CUDAPytorchToCaffe2, SharedStorageWrite) {
if (!at::cuda::is_available()) return;
auto at_tensor_a = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));
auto at_tensor_b = at_tensor_a.view({25});

caffe2::Tensor c2_tensor_a(at_tensor_a);
caffe2::Tensor c2_tensor_b(at_tensor_b);

// change is visible everywhere
cuda_set<float>(c2_tensor_a.mutable_data<float>() + 1, 123);
ASSERT_EQ(cuda_get(c2_tensor_b.mutable_data<float>() + 1), 123);
ASSERT_EQ(at_tensor_a[0][1].item().to<float>(), 123);
ASSERT_EQ(at_tensor_b[1].item().to<float>(), 123);
}

TEST(CUDAPytorchToCaffe2, MutualResizes) {
if (!at::cuda::is_available()) return;
auto at_tensor = at::ones({5, 5}, at::dtype(at::kFloat).device(at::kCUDA));

caffe2::Tensor c2_tensor(at_tensor);

// change is visible
cuda_set<float>(c2_tensor.mutable_data<float>(), 123);
ASSERT_EQ(at_tensor[0][0].item().to<float>(), 123);

// resize PT tensor in smaller direction - storage is preserved
at_tensor.resize_({4, 4});
cuda_set<float>(c2_tensor.mutable_data<float>() + 1, 234);
ASSERT_EQ(at_tensor[0][1].item().to<float>(), 234);

// resize PT tensor in larger direction - storage is preserved
at_tensor.resize_({6, 6});
cuda_set<float>(c2_tensor.mutable_data<float>() + 2, 345);
ASSERT_EQ(at_tensor[0][2].item().to<float>(), 345);
ASSERT_EQ(c2_tensor.sizes()[0], 6);
ASSERT_EQ(c2_tensor.sizes()[1], 6);

// resize Caffe2 tensor - semantics are to NOT preserve the data, but the
// TensorImpl is still shared
c2_tensor.Resize(7, 7);
cuda_set<float>(c2_tensor.mutable_data<float>() + 3, 456);
ASSERT_EQ(at_tensor[0][3].item().to<float>(), 456);
ASSERT_EQ(at_tensor.sizes()[0], 7);
ASSERT_EQ(at_tensor.sizes()[1], 7);
}
Loading

0 comments on commit 5e21e0f

Please sign in to comment.