Re-land sym_numel (pytorch#82374) (pytorch#82726) (pytorch#82731) (py…

…torch#82855) ### Description This is a reland of (pytorch#82374) (pytorch#82726) (pytorch#82731) This PR has no extra fixes, it simply updates the **correct** pin to point to the XLA side that has the corresponding changes. ### Issue  ### Testing  Pull Request resolved: pytorch#82855 Approved by: https://github.com/ezyang, https://github.com/qihqi
FZenjoys · Aug 5, 2022 · bfebf25 · bfebf25
1 parent bdb0abb
commit bfebf25
Show file tree

Hide file tree

Showing 16 changed files with 161 additions and 51 deletions.
diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt
@@ -1 +1 @@
-5d294a9ecf40d49ef68342e4ccdb70e23cae39f2
+2c446ff14cf81f5bc31d8999e37c6a1d355882f8
diff --git a/aten/src/ATen/NestedTensorImpl.cpp b/aten/src/ATen/NestedTensorImpl.cpp
@@ -192,6 +192,11 @@ int64_t NestedTensorImpl::numel_custom() const {
   return static_cast<int64_t>(num_elements);
 }
 
+
+c10::SymInt NestedTensorImpl::sym_numel_custom() const {
+  return NestedTensorImpl::numel_custom();
+}
+
 bool NestedTensorImpl::is_contiguous_custom(MemoryFormat) const {
   TORCH_CHECK(false, "is_contiguous is disabled.");
 }

diff --git a/aten/src/ATen/NestedTensorImpl.h b/aten/src/ATen/NestedTensorImpl.h
@@ -70,6 +70,7 @@ struct TORCH_API NestedTensorImpl : public c10::TensorImpl {
   // TODO: numel_custom and is_contiguous_custom can be profitably overridden
   // with real implementations
   int64_t numel_custom() const override;
+  c10::SymInt sym_numel_custom() const override;
   bool is_contiguous_custom(MemoryFormat) const override;
   int64_t size_custom(int64_t d) const override {
     return this->size(d);

diff --git a/aten/src/ATen/core/TensorBase.h b/aten/src/ATen/core/TensorBase.h
@@ -286,6 +286,10 @@ class TORCH_API TensorBase {
     return impl_->numel();
   }
 
+  c10::SymInt sym_numel() const {
+    return impl_->sym_numel();
+  }
+
   // Length of one array element in bytes.  This is the traditional
   // Numpy naming.
   size_t itemsize() const {

diff --git a/c10/core/TensorImpl.cpp b/c10/core/TensorImpl.cpp
@@ -209,16 +209,20 @@ void TensorImpl::HandleResize() {
   // If needed, we will free the data. the next mutable_data() call
   // will create the data storage.
   bool reset_tensor = false;
+
+  TORCH_CHECK(!numel_.is_symbolic(), "CAFFE2 doesn't support SymInts");
+  int concrete_numel = numel_.as_int_unchecked();
   if (reserved_) {
     // If tensor is reserved then don't claim its memeory unless nbytes()
     // is smaller than new size
-    reset_tensor =
-        storage_.nbytes() < (storage_offset_ + numel_) * data_type_.itemsize();
+    reset_tensor = storage_.nbytes() <
+        (storage_offset_ + concrete_numel) * data_type_.itemsize();
   } else {
     reset_tensor = storage_.nbytes() <
-            (storage_offset_ + numel_) * data_type_.itemsize() ||
+            (storage_offset_ + concrete_numel) * data_type_.itemsize() ||
         !FLAGS_caffe2_keep_on_shrink ||
-        storage_.nbytes() - (storage_offset_ + numel_) * data_type_.itemsize() >
+        storage_.nbytes() -
+                (storage_offset_ + concrete_numel) * data_type_.itemsize() >
             static_cast<size_t>(FLAGS_caffe2_max_keep_on_shrink_memory);
   }
 
@@ -419,6 +423,13 @@ c10::SymIntArrayRef TensorImpl::sym_sizes_custom() const {
   return sym_sizes_default();
 }
 
+c10::SymInt TensorImpl::sym_numel_custom() const {
+  if (C10_UNLIKELY(is_python_dispatch())) {
+    return load_pyobj_interpreter()->sym_numel(this);
+  }
+  return sym_numel_default();
+}
+
 c10::Device TensorImpl::device_custom() const {
   if (is_python_dispatch()) {
     return load_pyobj_interpreter()->device(this);
@@ -690,7 +701,7 @@ void TensorImpl::Extend(int64_t num, float growthPct) {
           sizes_and_strides_.size_at_unchecked(0).as_int_unchecked() *
           (1 + growthPct / 100))));
   auto oldData = std::move(storage_.data_ptr());
-  auto oldSize = numel_;
+  auto oldSize = numel_.as_int_unchecked();
   Resize(newCapacity);
   auto* newData = raw_mutable_data(data_type_);
   if (data_type_.copy()) {
@@ -726,7 +737,7 @@ void TensorImpl::ReserveSpace(int64_t outer_dim) {
       "Right now ReserveSpace is only supported for contiguous Tensor.");
   TORCH_CHECK(
       !has_symbolic_sizes_strides_,
-      "ReserveSpace() called on tensor with symbolic shape")
+      "ReserveSpace() called on tensor with symbolic shape");
 
   TORCH_CHECK(storage_.unique(), "Can't call ReserveSpace on shared storage.");
   // TODO: eliminate newCapacity.
@@ -758,15 +769,15 @@ void TensorImpl::Reshape(const std::vector<int64_t>& dims) {
       "Right now Reshape is only supported for contiguous Tensor.");
   TORCH_CHECK(
       !has_symbolic_sizes_strides_,
-      "Reshape() called on tensor with symbolic shape")
+      "Reshape() called on tensor with symbolic shape");
 
   int64_t new_size = 1;
   for (auto d : dims) {
     TORCH_CHECK(d >= 0);
     new_size *= d;
   }
   TORCH_CHECK(
-      new_size == numel_,
+      new_size == numel_.as_int_unchecked(),
       "New size and old size are not equal. You cannot use Reshape, "
       "but should use Resize."
       // TODO(jiayq): remove the following warning after pending diffs
@@ -828,8 +839,11 @@ void TensorImpl::ShareExternalPointer(
       data_type != ScalarType::Undefined,
       "To share with a raw external pointer you need to pass in an "
       "initialized data_type(TypeMeta).");
+  TORCH_CHECK(
+      !has_symbolic_sizes_strides_,
+      "ReserveSpace() called on tensor with symbolic shape");
   if (!size_bytes) {
-    size_bytes = numel_ * data_type.itemsize();
+    size_bytes = numel_.as_int_unchecked() * data_type.itemsize();
   }
   if (storage_.unique()) {
     storage_.UniqueStorageShareExternalPointer(std::move(data_ptr), size_bytes);

diff --git a/c10/core/TensorImpl.h b/c10/core/TensorImpl.h
@@ -564,6 +564,21 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
 
   virtual c10::SymIntArrayRef sym_sizes_custom() const;
 
+  c10::SymInt sym_numel() const {
+    if (C10_UNLIKELY(
+            sizes_strides_policy_ >=
+            static_cast<uint8_t>(SizesStridesPolicy::CustomSizes))) {
+      return sym_numel_custom();
+    }
+    return sym_numel_default();
+  }
+
+  inline c10::SymInt sym_numel_default() const {
+    return numel_;
+  }
+
+  virtual c10::SymInt sym_numel_custom() const;
+
   /**
    * Return a reference to the strides of this tensor.  This reference remains
    * valid as long as the tensor is live and not restrided.
@@ -746,9 +761,9 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
 
   inline int64_t numel_default() const {
 #ifdef DEBUG
-    TORCH_INTERNAL_ASSERT(compute_numel() == numel_);
+    TORCH_INTERNAL_ASSERT(compute_numel() == numel_.as_int_unchecked());
 #endif
-    return numel_;
+    return numel_.as_int_unchecked();
   }
 
  public:
@@ -1926,6 +1941,10 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
    * and a new storage will be created.
    */
   inline void* raw_mutable_data(const caffe2::TypeMeta meta) {
+    auto concrete_numel = numel_.expect_int();
+#ifdef DEBUG
+    TORCH_INTERNAL_ASSERT(compute_numel() == concrete_numel);
+#endif
     // For 0-size tensors it's fine to return any pointer (including nullptr)
     if (data_type_ == meta && storage_initialized()) {
       return static_cast<void*>(
@@ -1940,9 +1959,9 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
       // We can reuse the existing buffer if the current data does not have
       // a special destructor and the new data doesn't have a special
       // constructor.
-      if (numel_ == 0 ||
+      if (concrete_numel == 0 ||
           (meta.placementNew() == nullptr && !had_special_dtor &&
-           (storage_.nbytes() >= (numel_ * data_type_.itemsize())))) {
+           (storage_.nbytes() >= (concrete_numel * data_type_.itemsize())))) {
         TORCH_INTERNAL_ASSERT(
             storage_offset_ == 0); // because we just reallocated
         return storage_.data();
@@ -1959,18 +1978,18 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
         // For types that need placement new, we will call it, as well as
         // making sure that when the data is freed, it calls the right
         // destruction procedure.
-        auto size = numel_;
         auto dtor = data_type_.placementDelete();
-        auto data_ptr = allocator->allocate(numel_ * data_type_.itemsize());
+        auto data_ptr =
+            allocator->allocate(concrete_numel * data_type_.itemsize());
         storage_.set_data_ptr_noswap(PlacementDeleteContext::makeDataPtr(
-            std::move(data_ptr), dtor, size, storage_.device()));
-        data_type_.placementNew()(storage_.data(), numel_);
+            std::move(data_ptr), dtor, concrete_numel, storage_.device()));
+        data_type_.placementNew()(storage_.data(), concrete_numel);
       } else {
         // For fundamental type, new and delete is easier.
         storage_.set_data_ptr_noswap(
-            allocator->allocate(numel_ * data_type_.itemsize()));
+            allocator->allocate(concrete_numel * data_type_.itemsize()));
       }
-      storage_.set_nbytes(numel_ * data_type_.itemsize());
+      storage_.set_nbytes(concrete_numel * data_type_.itemsize());
       TORCH_INTERNAL_ASSERT(
           storage_offset_ == 0); // because we just reallocated
       device_opt_ = storage_.device();
@@ -2045,7 +2064,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
         "empty_tensor_restride() called on tensor with symbolic shape")
 #ifdef DEBUG
     TORCH_INTERNAL_ASSERT(
-        compute_numel() == numel_,
+        compute_numel() == numel_.as_int_unchecked(),
         "If you are seeing this error, that means empty_tensor_restride was "
         "called before setting correct numel");
 #endif
@@ -2469,7 +2488,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
   // time, we will immediately set sizes to {0} and reset numel to 0.
   // (Can't do that in the default initializers, because there's no way to
   // spell "allocate a one-element array" for strides_).
-  int64_t numel_ = 1;
+  SymInt numel_ = c10::SymInt(1);
 
   // INVARIANT: When storage is non-null, this type meta must
   // agree with the type meta in storage

diff --git a/c10/core/impl/PyInterpreter.cpp b/c10/core/impl/PyInterpreter.cpp
@@ -76,6 +76,12 @@ static c10::Layout noop_layout_fn(const PyInterpreter*, const TensorImpl*) {
       "attempted to call `layout` on Tensor with nontrivial PyObject after corresponding interpreter died");
 }
 
+static c10::SymInt noop_sym_numel_fn(const PyInterpreter*, const TensorImpl*) {
+  TORCH_INTERNAL_ASSERT(
+      0,
+      "attempted to call `sym_numel` on Tensor with nontrivial PyObject after corresponding interpreter died");
+}
+
 void PyInterpreter::disarm() noexcept {
   name_fn_ = &noop_name_fn;
   decref_fn_ = &noop_decref_fn;
@@ -88,6 +94,7 @@ void PyInterpreter::disarm() noexcept {
   sizes_fn_ = &noop_sizes_fn;
   sym_sizes_fn_ = &noop_sym_sizes_fn;
   layout_fn_ = &noop_layout_fn;
+  sym_numel_fn_ = &noop_sym_numel_fn;
 }
 
 } // namespace impl

diff --git a/c10/core/impl/PyInterpreter.h b/c10/core/impl/PyInterpreter.h
@@ -136,6 +136,7 @@ struct C10_API PyInterpreter {
   using sym_sizes_sig =
       c10::SymIntArrayRef(const PyInterpreter*, const TensorImpl*);
   using layout_sig = c10::Layout(const PyInterpreter*, const TensorImpl*);
+  using sym_numel_sig = c10::SymInt(const PyInterpreter*, const TensorImpl*);
 
   PyInterpreter(
       name_sig* name_fn,
@@ -148,7 +149,8 @@ struct C10_API PyInterpreter {
       strides_sig* strides,
       sizes_sig* sizes,
       sym_sizes_sig* sym_sizes,
-      layout_sig* layout)
+      layout_sig* layout,
+      sym_numel_sig* sym_numel)
       : name_fn_(name_fn),
         decref_fn_(decref_fn),
         detach_fn_(detach),
@@ -159,7 +161,8 @@ struct C10_API PyInterpreter {
         strides_fn_(strides),
         sizes_fn_(sizes),
         sym_sizes_fn_(sym_sizes),
-        layout_fn_(layout) {}
+        layout_fn_(layout),
+        sym_numel_fn_(sym_numel) {}
 
   name_sig* name_fn_;
   decref_sig* decref_fn_;
@@ -172,6 +175,7 @@ struct C10_API PyInterpreter {
   sizes_sig* sizes_fn_;
   sym_sizes_sig* sym_sizes_fn_;
   layout_sig* layout_fn_;
+  sym_numel_sig* sym_numel_fn_;
 
   // UBSAN suppression fixes: "call to function
   // (anonymous namespace)::concrete_decref_fn(c10::impl::PyInterpreter const*,
@@ -236,6 +240,11 @@ struct C10_API PyInterpreter {
     return (*layout_fn_)(this, self);
   }
 
+  __ubsan_ignore_function__ c10::SymInt sym_numel(
+      const TensorImpl* self) const {
+    return (*sym_numel_fn_)(this, self);
+  }
+
   // Disarm this PyInterpreter, making all of its methods noops.
   // Because the function pointers are raw pointers (not atomics),
   // a disarm() invocation that is concurrent with active destructors

diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h
@@ -439,6 +439,10 @@ class TORCH_API Tensor final {
     return impl_->sym_sizes();
   }
 
+  inline c10::SymInt sym_numel() const {
+    return impl_->sym_numel();
+  }
+
   inline int64_t size_from_dim(int k) const {
     return size_from_dim_(k, impl_->sizes());
   }
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		5d294a9ecf40d49ef68342e4ccdb70e23cae39f2
		2c446ff14cf81f5bc31d8999e37c6a1d355882f8