Skip to content

Commit

Permalink
Re-land sym_numel (pytorch#82374) (pytorch#82726) (pytorch#82731) (py…
Browse files Browse the repository at this point in the history
…torch#82855)

### Description
This is a reland of (pytorch#82374) (pytorch#82726) (pytorch#82731)
This PR has no extra fixes, it simply updates the **correct** pin to point to the XLA side that has the corresponding changes.

### Issue
<!-- Link to Issue ticket or RFP -->

### Testing
<!-- How did you test your change? -->

Pull Request resolved: pytorch#82855
Approved by: https://github.com/ezyang, https://github.com/qihqi
  • Loading branch information
Krovatkin authored and pytorchmergebot committed Aug 5, 2022
1 parent bdb0abb commit bfebf25
Show file tree
Hide file tree
Showing 16 changed files with 161 additions and 51 deletions.
2 changes: 1 addition & 1 deletion .github/ci_commit_pins/xla.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5d294a9ecf40d49ef68342e4ccdb70e23cae39f2
2c446ff14cf81f5bc31d8999e37c6a1d355882f8
5 changes: 5 additions & 0 deletions aten/src/ATen/NestedTensorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,11 @@ int64_t NestedTensorImpl::numel_custom() const {
return static_cast<int64_t>(num_elements);
}


c10::SymInt NestedTensorImpl::sym_numel_custom() const {
return NestedTensorImpl::numel_custom();
}

bool NestedTensorImpl::is_contiguous_custom(MemoryFormat) const {
TORCH_CHECK(false, "is_contiguous is disabled.");
}
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/NestedTensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl {
// TODO: numel_custom and is_contiguous_custom can be profitably overridden
// with real implementations
int64_t numel_custom() const override;
c10::SymInt sym_numel_custom() const override;
bool is_contiguous_custom(MemoryFormat) const override;
int64_t size_custom(int64_t d) const override {
return this->size(d);
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/core/TensorBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,10 @@ class TORCH_API TensorBase {
return impl_->numel();
}

c10::SymInt sym_numel() const {
return impl_->sym_numel();
}

// Length of one array element in bytes. This is the traditional
// Numpy naming.
size_t itemsize() const {
Expand Down
32 changes: 23 additions & 9 deletions c10/core/TensorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,16 +209,20 @@ void TensorImpl::HandleResize() {
// If needed, we will free the data. the next mutable_data() call
// will create the data storage.
bool reset_tensor = false;

TORCH_CHECK(!numel_.is_symbolic(), "CAFFE2 doesn't support SymInts");
int concrete_numel = numel_.as_int_unchecked();
if (reserved_) {
// If tensor is reserved then don't claim its memeory unless nbytes()
// is smaller than new size
reset_tensor =
storage_.nbytes() < (storage_offset_ + numel_) * data_type_.itemsize();
reset_tensor = storage_.nbytes() <
(storage_offset_ + concrete_numel) * data_type_.itemsize();
} else {
reset_tensor = storage_.nbytes() <
(storage_offset_ + numel_) * data_type_.itemsize() ||
(storage_offset_ + concrete_numel) * data_type_.itemsize() ||
!FLAGS_caffe2_keep_on_shrink ||
storage_.nbytes() - (storage_offset_ + numel_) * data_type_.itemsize() >
storage_.nbytes() -
(storage_offset_ + concrete_numel) * data_type_.itemsize() >
static_cast<size_t>(FLAGS_caffe2_max_keep_on_shrink_memory);
}

Expand Down Expand Up @@ -419,6 +423,13 @@ c10::SymIntArrayRef TensorImpl::sym_sizes_custom() const {
return sym_sizes_default();
}

c10::SymInt TensorImpl::sym_numel_custom() const {
if (C10_UNLIKELY(is_python_dispatch())) {
return load_pyobj_interpreter()->sym_numel(this);
}
return sym_numel_default();
}

c10::Device TensorImpl::device_custom() const {
if (is_python_dispatch()) {
return load_pyobj_interpreter()->device(this);
Expand Down Expand Up @@ -690,7 +701,7 @@ void TensorImpl::Extend(int64_t num, float growthPct) {
sizes_and_strides_.size_at_unchecked(0).as_int_unchecked() *
(1 + growthPct / 100))));
auto oldData = std::move(storage_.data_ptr());
auto oldSize = numel_;
auto oldSize = numel_.as_int_unchecked();
Resize(newCapacity);
auto* newData = raw_mutable_data(data_type_);
if (data_type_.copy()) {
Expand Down Expand Up @@ -726,7 +737,7 @@ void TensorImpl::ReserveSpace(int64_t outer_dim) {
"Right now ReserveSpace is only supported for contiguous Tensor.");
TORCH_CHECK(
!has_symbolic_sizes_strides_,
"ReserveSpace() called on tensor with symbolic shape")
"ReserveSpace() called on tensor with symbolic shape");

TORCH_CHECK(storage_.unique(), "Can't call ReserveSpace on shared storage.");
// TODO: eliminate newCapacity.
Expand Down Expand Up @@ -758,15 +769,15 @@ void TensorImpl::Reshape(const std::vector<int64_t>& dims) {
"Right now Reshape is only supported for contiguous Tensor.");
TORCH_CHECK(
!has_symbolic_sizes_strides_,
"Reshape() called on tensor with symbolic shape")
"Reshape() called on tensor with symbolic shape");

int64_t new_size = 1;
for (auto d : dims) {
TORCH_CHECK(d >= 0);
new_size *= d;
}
TORCH_CHECK(
new_size == numel_,
new_size == numel_.as_int_unchecked(),
"New size and old size are not equal. You cannot use Reshape, "
"but should use Resize."
// TODO(jiayq): remove the following warning after pending diffs
Expand Down Expand Up @@ -828,8 +839,11 @@ void TensorImpl::ShareExternalPointer(
data_type != ScalarType::Undefined,
"To share with a raw external pointer you need to pass in an "
"initialized data_type(TypeMeta).");
TORCH_CHECK(
!has_symbolic_sizes_strides_,
"ReserveSpace() called on tensor with symbolic shape");
if (!size_bytes) {
size_bytes = numel_ * data_type.itemsize();
size_bytes = numel_.as_int_unchecked() * data_type.itemsize();
}
if (storage_.unique()) {
storage_.UniqueStorageShareExternalPointer(std::move(data_ptr), size_bytes);
Expand Down
43 changes: 31 additions & 12 deletions c10/core/TensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,21 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {

virtual c10::SymIntArrayRef sym_sizes_custom() const;

c10::SymInt sym_numel() const {
if (C10_UNLIKELY(
sizes_strides_policy_ >=
static_cast<uint8_t>(SizesStridesPolicy::CustomSizes))) {
return sym_numel_custom();
}
return sym_numel_default();
}

inline c10::SymInt sym_numel_default() const {
return numel_;
}

virtual c10::SymInt sym_numel_custom() const;

/**
* Return a reference to the strides of this tensor. This reference remains
* valid as long as the tensor is live and not restrided.
Expand Down Expand Up @@ -746,9 +761,9 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {

inline int64_t numel_default() const {
#ifdef DEBUG
TORCH_INTERNAL_ASSERT(compute_numel() == numel_);
TORCH_INTERNAL_ASSERT(compute_numel() == numel_.as_int_unchecked());
#endif
return numel_;
return numel_.as_int_unchecked();
}

public:
Expand Down Expand Up @@ -1926,6 +1941,10 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
* and a new storage will be created.
*/
inline void* raw_mutable_data(const caffe2::TypeMeta meta) {
auto concrete_numel = numel_.expect_int();
#ifdef DEBUG
TORCH_INTERNAL_ASSERT(compute_numel() == concrete_numel);
#endif
// For 0-size tensors it's fine to return any pointer (including nullptr)
if (data_type_ == meta && storage_initialized()) {
return static_cast<void*>(
Expand All @@ -1940,9 +1959,9 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
// We can reuse the existing buffer if the current data does not have
// a special destructor and the new data doesn't have a special
// constructor.
if (numel_ == 0 ||
if (concrete_numel == 0 ||
(meta.placementNew() == nullptr && !had_special_dtor &&
(storage_.nbytes() >= (numel_ * data_type_.itemsize())))) {
(storage_.nbytes() >= (concrete_numel * data_type_.itemsize())))) {
TORCH_INTERNAL_ASSERT(
storage_offset_ == 0); // because we just reallocated
return storage_.data();
Expand All @@ -1959,18 +1978,18 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
// For types that need placement new, we will call it, as well as
// making sure that when the data is freed, it calls the right
// destruction procedure.
auto size = numel_;
auto dtor = data_type_.placementDelete();
auto data_ptr = allocator->allocate(numel_ * data_type_.itemsize());
auto data_ptr =
allocator->allocate(concrete_numel * data_type_.itemsize());
storage_.set_data_ptr_noswap(PlacementDeleteContext::makeDataPtr(
std::move(data_ptr), dtor, size, storage_.device()));
data_type_.placementNew()(storage_.data(), numel_);
std::move(data_ptr), dtor, concrete_numel, storage_.device()));
data_type_.placementNew()(storage_.data(), concrete_numel);
} else {
// For fundamental type, new and delete is easier.
storage_.set_data_ptr_noswap(
allocator->allocate(numel_ * data_type_.itemsize()));
allocator->allocate(concrete_numel * data_type_.itemsize()));
}
storage_.set_nbytes(numel_ * data_type_.itemsize());
storage_.set_nbytes(concrete_numel * data_type_.itemsize());
TORCH_INTERNAL_ASSERT(
storage_offset_ == 0); // because we just reallocated
device_opt_ = storage_.device();
Expand Down Expand Up @@ -2045,7 +2064,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
"empty_tensor_restride() called on tensor with symbolic shape")
#ifdef DEBUG
TORCH_INTERNAL_ASSERT(
compute_numel() == numel_,
compute_numel() == numel_.as_int_unchecked(),
"If you are seeing this error, that means empty_tensor_restride was "
"called before setting correct numel");
#endif
Expand Down Expand Up @@ -2469,7 +2488,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
// time, we will immediately set sizes to {0} and reset numel to 0.
// (Can't do that in the default initializers, because there's no way to
// spell "allocate a one-element array" for strides_).
int64_t numel_ = 1;
SymInt numel_ = c10::SymInt(1);

// INVARIANT: When storage is non-null, this type meta must
// agree with the type meta in storage
Expand Down
7 changes: 7 additions & 0 deletions c10/core/impl/PyInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ static c10::Layout noop_layout_fn(const PyInterpreter*, const TensorImpl*) {
"attempted to call `layout` on Tensor with nontrivial PyObject after corresponding interpreter died");
}

static c10::SymInt noop_sym_numel_fn(const PyInterpreter*, const TensorImpl*) {
TORCH_INTERNAL_ASSERT(
0,
"attempted to call `sym_numel` on Tensor with nontrivial PyObject after corresponding interpreter died");
}

void PyInterpreter::disarm() noexcept {
name_fn_ = &noop_name_fn;
decref_fn_ = &noop_decref_fn;
Expand All @@ -88,6 +94,7 @@ void PyInterpreter::disarm() noexcept {
sizes_fn_ = &noop_sizes_fn;
sym_sizes_fn_ = &noop_sym_sizes_fn;
layout_fn_ = &noop_layout_fn;
sym_numel_fn_ = &noop_sym_numel_fn;
}

} // namespace impl
Expand Down
13 changes: 11 additions & 2 deletions c10/core/impl/PyInterpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ struct C10_API PyInterpreter {
using sym_sizes_sig =
c10::SymIntArrayRef(const PyInterpreter*, const TensorImpl*);
using layout_sig = c10::Layout(const PyInterpreter*, const TensorImpl*);
using sym_numel_sig = c10::SymInt(const PyInterpreter*, const TensorImpl*);

PyInterpreter(
name_sig* name_fn,
Expand All @@ -148,7 +149,8 @@ struct C10_API PyInterpreter {
strides_sig* strides,
sizes_sig* sizes,
sym_sizes_sig* sym_sizes,
layout_sig* layout)
layout_sig* layout,
sym_numel_sig* sym_numel)
: name_fn_(name_fn),
decref_fn_(decref_fn),
detach_fn_(detach),
Expand All @@ -159,7 +161,8 @@ struct C10_API PyInterpreter {
strides_fn_(strides),
sizes_fn_(sizes),
sym_sizes_fn_(sym_sizes),
layout_fn_(layout) {}
layout_fn_(layout),
sym_numel_fn_(sym_numel) {}

name_sig* name_fn_;
decref_sig* decref_fn_;
Expand All @@ -172,6 +175,7 @@ struct C10_API PyInterpreter {
sizes_sig* sizes_fn_;
sym_sizes_sig* sym_sizes_fn_;
layout_sig* layout_fn_;
sym_numel_sig* sym_numel_fn_;

// UBSAN suppression fixes: "call to function
// (anonymous namespace)::concrete_decref_fn(c10::impl::PyInterpreter const*,
Expand Down Expand Up @@ -236,6 +240,11 @@ struct C10_API PyInterpreter {
return (*layout_fn_)(this, self);
}

__ubsan_ignore_function__ c10::SymInt sym_numel(
const TensorImpl* self) const {
return (*sym_numel_fn_)(this, self);
}

// Disarm this PyInterpreter, making all of its methods noops.
// Because the function pointers are raw pointers (not atomics),
// a disarm() invocation that is concurrent with active destructors
Expand Down
4 changes: 4 additions & 0 deletions caffe2/core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,10 @@ class TORCH_API Tensor final {
return impl_->sym_sizes();
}

inline c10::SymInt sym_numel() const {
return impl_->sym_numel();
}

inline int64_t size_from_dim(int k) const {
return size_from_dim_(k, impl_->sizes());
}
Expand Down
Loading

0 comments on commit bfebf25

Please sign in to comment.