Revert "Enable Intel® VTune™ Profiler's Instrumentation and Tracing T…

…echnology APIs (ITT) to PyTorch (pytorch#63289)" This reverts commit f988aa2. Reverted pytorch#63289 on behalf of https://github.com/malfet due to broke trunk, see https://hud.pytorch.org/pytorch/pytorch/commit/f988aa2b3ff77d5aa010bdaae4e52c6ee345c04d
kguerda-idris · Jun 30, 2022 · 1454515 · 1454515
1 parent c980fc3
commit 1454515
Show file tree

Hide file tree

Showing 39 changed files with 50 additions and 534 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -139,9 +139,6 @@
 [submodule "third_party/pocketfft"]
 	path = third_party/pocketfft
 	url = https://github.com/mreineck/pocketfft
-[submodule "third_party/ittapi"]
-	path = third_party/ittapi
-	url = https://github.com/intel/ittapi.git
 [submodule "third_party/flatbuffers"]
 	path = third_party/flatbuffers
 	url = https://github.com/google/flatbuffers.git

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -291,10 +291,6 @@ if(NOT USE_XNNPACK AND CMAKE_VERSION VERSION_LESS ${XNNPACK_MIN_CMAKE_VER})
 endif()
 option(USE_ZMQ "Use ZMQ" OFF)
 option(USE_ZSTD "Use ZSTD" OFF)
-# Ensure that an ITT build is the default for x86 CPUs
-cmake_dependent_option(
-  USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON
-  "CPU_INTEL" OFF)
 # Ensure that an MKLDNN build is the default for x86 CPUs
 # but optional for AArch64 (dependent on -DUSE_MKLDNN).
 cmake_dependent_option(

diff --git a/build_variables.bzl b/build_variables.bzl
@@ -132,7 +132,6 @@ libtorch_profiler_sources = [
     "torch/csrc/profiler/kineto_shim.cpp",
     "torch/csrc/profiler/nvtx_observer.cpp",
     "torch/csrc/profiler/kineto_client_interface.cpp",
-    "torch/csrc/profiler/itt_observer.cpp",
     "torch/csrc/monitor/counters.cpp",
     "torch/csrc/monitor/events.cpp",
 ]

diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
@@ -605,13 +605,6 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
     )
   endif()
 
-  if(${USE_ITT})
-    list(APPEND TORCH_SRCS
-      ${TORCH_SRC_DIR}/csrc/itt_wrapper.cpp
-      ${TORCH_SRC_DIR}/csrc/profiler/itt.cpp
-    )
-  endif()
-
   if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER)
     list(APPEND TORCH_SRCS
       ${TORCH_SRC_DIR}/csrc/api/src/jit.cpp

diff --git a/caffe2/core/macros.h.in b/caffe2/core/macros.h.in
@@ -42,7 +42,6 @@ static_assert(
 #cmakedefine CAFFE2_USE_MKL
 #cmakedefine CAFFE2_USE_MKLDNN
 #cmakedefine CAFFE2_USE_NVTX
-#cmakedefine CAFFE2_USE_ITT
 #cmakedefine CAFFE2_USE_TRT
 
 #ifndef EIGEN_MPL2_ONLY
@@ -83,6 +82,5 @@ static_assert(
   {"USE_MKL", "${CAFFE2_USE_MKL}"}, \
   {"USE_MKLDNN", "${CAFFE2_USE_MKLDNN}"}, \
   {"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
-  {"USE_ITT", "${CAFFE2_USE_ITT}"}, \
   {"USE_TRT", "${CAFFE2_USE_TRT}"}, \
 }
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
@@ -961,19 +961,6 @@ if(USE_FFMPEG)
   endif()
 endif()
 
-if(USE_ITT)
-  find_package(ITT)
-  if(ITT_FOUND)
-    include_directories(SYSTEM ${ITT_INCLUDE_DIR})
-    list(APPEND Caffe2_DEPENDENCY_LIBS ${ITT_LIBRARIES})
-    list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${ITT_LIBRARIES})
-  else()
-    message(WARNING "Not compiling with ITT. Suppress this warning with -DUSE_ITT=OFF")
-    set(USE_ITT OFF CACHE BOOL "" FORCE)
-    caffe2_update_option(USE_ITT OFF)
-  endif()
-endif()
-
 # ---[ Caffe2 depends on FP16 library for half-precision conversions
 if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
   set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party")

diff --git a/cmake/Modules/FindITT.cmake b/cmake/Modules/FindITT.cmake
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
@@ -146,7 +146,6 @@ function(caffe2_print_configuration_summary)
     message(STATUS "  USE_MKLDNN_ACL        : ${USE_MKLDNN_ACL}")
     message(STATUS "  USE_MKLDNN_CBLAS      : ${USE_MKLDNN_CBLAS}")
   endif()
-  message(STATUS "  USE_ITT               : ${USE_ITT}")
   message(STATUS "  USE_NCCL              : ${USE_NCCL}")
   if(${USE_NCCL})
     message(STATUS "    USE_SYSTEM_NCCL     : ${USE_SYSTEM_NCCL}")

diff --git a/docs/source/autograd.rst b/docs/source/autograd.rst
@@ -223,12 +223,10 @@ Profiler
 ^^^^^^^^
 
 Autograd includes a profiler that lets you inspect the cost of different
-operators inside your model - both on the CPU and GPU. There are three modes
+operators inside your model - both on the CPU and GPU. There are two modes
 implemented at the moment - CPU-only using :class:`~torch.autograd.profiler.profile`.
-nvprof based (registers both CPU and GPU activity) using
+and nvprof based (registers both CPU and GPU activity) using
 :class:`~torch.autograd.profiler.emit_nvtx`.
-and vtune profiler based using
-:class:`~torch.autograd.profiler.emit_itt`.
 
 .. autoclass:: torch.autograd.profiler.profile
 
@@ -242,7 +240,6 @@ and vtune profiler based using
     profiler.profile.total_average
 
 .. autoclass:: torch.autograd.profiler.emit_nvtx
-.. autoclass:: torch.autograd.profiler.emit_itt
 
 
 .. autosummary::

diff --git a/docs/source/bottleneck.rst b/docs/source/bottleneck.rst
@@ -47,9 +47,7 @@ where [args] are any number of arguments to `script.py`, or run
     evaluating. If the profiler outputs don't help, you could try looking at
     the result of :func:`torch.autograd.profiler.emit_nvtx()` with ``nvprof``.
     However, please take into account that the NVTX overhead is very high and
-    often gives a heavily skewed timeline. Similarly, Intel VTune Profiler helps
-    to analyze performance on Intel platforms further with
-    :func:`torch.autograd.profiler.emit_nvtx()`.
+    often gives a heavily skewed timeline.
 
 .. warning::
     If you are profiling CUDA code, the first profiler that ``bottleneck`` runs

diff --git a/scripts/build_android.sh b/scripts/build_android.sh
@@ -135,7 +135,6 @@ else
 fi
 # Disable unused dependencies
 CMAKE_ARGS+=("-DUSE_CUDA=OFF")
-CMAKE_ARGS+=("-DUSE_ITT=OFF")
 CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
 CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
 CMAKE_ARGS+=("-DUSE_LMDB=OFF")

diff --git a/scripts/build_ios.sh b/scripts/build_ios.sh
@@ -104,7 +104,6 @@ CMAKE_ARGS+=("-DBUILD_PYTHON=OFF")
 
 # Disable unused dependencies
 CMAKE_ARGS+=("-DUSE_CUDA=OFF")
-CMAKE_ARGS+=("-DUSE_ITT=OFF")
 CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
 CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
 CMAKE_ARGS+=("-DUSE_LMDB=OFF")

diff --git a/scripts/build_mobile.sh b/scripts/build_mobile.sh
@@ -38,7 +38,6 @@ fi
 # Disable unused dependencies
 CMAKE_ARGS+=("-DUSE_ROCM=OFF")
 CMAKE_ARGS+=("-DUSE_CUDA=OFF")
-CMAKE_ARGS+=("-DUSE_ITT=OFF")
 CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
 CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
 CMAKE_ARGS+=("-DUSE_LMDB=OFF")

diff --git a/scripts/build_tizen.sh b/scripts/build_tizen.sh
@@ -112,7 +112,6 @@ cd $BUILD_ROOT
 cmake "$CAFFE2_ROOT" \
     -DCMAKE_VERBOSE_MAKEFILE=1 \
     -DUSE_CUDA=OFF \
-    -DUSE_ITT=OFF \
     -DUSE_OPENCV=OFF \
     -DUSE_LMDB=OFF \
     -DCAFFE2_CPU_FLAGS="-mfpu=neon -mfloat-abi=soft" \

diff --git a/setup.py b/setup.py
@@ -52,8 +52,6 @@
 #
 #   USE_STATIC_MKL
 #     Prefer to link with MKL statically - Unix only
-#   USE_ITT=0
-#     disable use of Intel(R) VTune Profiler's ITT functionality
 #
 #   USE_NNPACK=0
 #     disables NNPACK build
@@ -543,11 +541,6 @@ def run(self):
         if cmake_cache_vars['USE_LIGHTWEIGHT_DISPATCH']:
             report('-- Using lightweight dispatch')
 
-        if cmake_cache_vars['USE_ITT']:
-            report('-- Using ITT')
-        else:
-            report('-- Not using ITT')
-
         # Do not use clang to compile extensions if `-fstack-clash-protection` is defined
         # in system CFLAGS
         c_flags = str(os.getenv('CFLAGS', ''))

diff --git a/third_party/ittapi b/third_party/ittapi
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
@@ -117,13 +117,6 @@ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
       -Wno-writable-strings)
 endif()
 
-if(USE_ITT)
-  list(APPEND TORCH_PYTHON_SRCS
-    ${TORCH_SRC_DIR}/csrc/itt.cpp
-  )
-  list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_ITT)
-endif()
-
 if(USE_CUDA)
     include(${TORCH_ROOT}/cmake/public/cuda.cmake)
     append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)

diff --git a/torch/_C/_autograd.pyi b/torch/_C/_autograd.pyi
@@ -10,7 +10,6 @@ class ProfilerState(Enum):
     CPU = ...
     CUDA = ...
     NVTX = ...
-    ITT = ...
     KINETO = ...
     KINETO_GPU_FALLBACK = ...
 

diff --git a/torch/_C/_itt.pyi b/torch/_C/_itt.pyi
diff --git a/torch/autograd/profiler.py b/torch/autograd/profiler.py
@@ -479,70 +479,6 @@ def _call_end_callbacks_on_future(self, fut: Future[Any]) -> Future[Any]:
         return profiled_future
 
 
-class emit_itt(object):
-    """Context manager that makes every autograd operation emit an ITT range.
-
-    It is useful when running the program under Intel(R) VTune Profiler::
-
-        vtune <--vtune_flags> <regular command here>
-
-    The Instrumentation and Tracing Technology (ITT) API enables your application to generate and
-    control the collection of trace data during its execution across different Intel tools.
-    This context manager is to annotate Intel(R) VTune Profiling trace. With help of this context manager,
-    you will be able to see labled ranges in Intel(R) VTune Profiler GUI.
-
-    .. warning:
-        This context manager should not be called recursively, i.e. at most one
-        instance should be enabled at any given time.
-
-    Args:
-        enabled (bool, optional, default=True): Setting ``enabled=False`` makes this context manager a no-op.
-            Default: ``True``.
-        record_shapes (bool, optional, default=False): If ``record_shapes=True``, the itt range wrapping
-            each autograd op will append information about the sizes of Tensor arguments received
-            by that op, in the following format:
-            ``[[arg0.size(0), arg0.size(1), ...], [arg1.size(0), arg1.size(1), ...], ...]``
-            Non-tensor arguments will be represented by ``[]``.
-            Arguments will be listed in the order they are received by the backend op.
-            Please note that this order may not match the order in which those arguments were passed
-            on the Python side.  Also note that shape recording may increase the overhead of itt range creation.
-
-    Example:
-        >>> with torch.autograd.profiler.emit_itt():
-        ...     model(x)
-
-    """
-    def __init__(self, enabled=True, record_shapes=False):
-        self.enabled = enabled
-        self.entered = False
-        self.record_shapes = record_shapes
-
-    def __enter__(self):
-        if not self.enabled:
-            return
-        if self.entered:
-            raise RuntimeError("ITT annotation context manager is not reentrant")
-        self.entered = True
-        _enable_profiler(
-            ProfilerConfig(
-                ProfilerState.ITT,
-                self.record_shapes,
-                False,
-                False,
-                False,
-                False,
-                _ExperimentalConfig()),
-            set()
-        )
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if not self.enabled:
-            return
-        _disable_profiler()
-        return False
-
-
 class emit_nvtx(object):
     """Context manager that makes every autograd operation emit an NVTX range.
 

diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
@@ -910,14 +910,6 @@ void initModule(PyObject* module);
 } // namespace torch
 #endif
 
-#ifdef USE_ITT
-namespace torch {
-namespace profiler {
-void initIttBindings(PyObject* module);
-} // namespace profiler
-} // namespace torch
-#endif
-
 static std::vector<PyMethodDef> methods;
 
 // In Python we can't use the trick of C10_LOG_API_USAGE_ONCE
@@ -1016,9 +1008,6 @@ PyObject* initModule() {
   torch::autograd::init_legacy_variable(module);
   torch::python::init_bindings(module);
   torch::lazy::initLazyBindings(module);
-#ifdef USE_ITT
-  torch::profiler::initIttBindings(module);
-#endif
 #ifdef USE_CUDA
   torch::cuda::initModule(module);
 #endif

diff --git a/torch/csrc/autograd/init.cpp b/torch/csrc/autograd/init.cpp
@@ -71,7 +71,6 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
       .value("CPU", ProfilerState::CPU)
       .value("CUDA", ProfilerState::CUDA)
       .value("NVTX", ProfilerState::NVTX)
-      .value("ITT", ProfilerState::ITT)
       .value("KINETO", ProfilerState::KINETO)
       .value("KINETO_GPU_FALLBACK", ProfilerState::KINETO_GPU_FALLBACK);
 

diff --git a/torch/csrc/autograd/profiler_kineto.cpp b/torch/csrc/autograd/profiler_kineto.cpp
@@ -11,7 +11,6 @@
 #include <torch/csrc/profiler/api.h>
 #include <torch/csrc/profiler/collection.h>
 #include <torch/csrc/profiler/containers.h>
-#include <torch/csrc/profiler/itt_observer.h>
 #include <torch/csrc/profiler/kineto_shim.h>
 #include <torch/csrc/profiler/nvtx_observer.h>
 
@@ -624,8 +623,7 @@ void reportBackendEventToActiveKinetoProfiler(
 void prepareProfiler(
     const torch::profiler::impl::ProfilerConfig& config,
     const std::set<torch::profiler::impl::ActivityType>& activities) {
-  if (config.state == ProfilerState::NVTX ||
-      config.state == ProfilerState::ITT) {
+  if (config.state == ProfilerState::NVTX) {
     return;
   }
   TORCH_CHECK(
@@ -644,9 +642,6 @@ void enableProfilerWithEventPostProcess(
   TORCH_CHECK(
       config.state != ProfilerState::NVTX,
       "NVTX does not support post processing callback.");
-  TORCH_CHECK(
-      config.state != ProfilerState::ITT,
-      "ITT does not support post processing callback.");
   TORCH_INTERNAL_ASSERT(
       GlobalStateManager::get() == nullptr,
       "On-demand profiling does not support post processing callback");
@@ -664,9 +659,6 @@ void enableProfiler(
   if (config.state == ProfilerState::NVTX) {
     torch::profiler::impl::pushNVTXCallbacks(config, scopes);
     return;
-  } else if (config.state == ProfilerState::ITT) {
-    torch::profiler::impl::pushITTCallbacks(config, scopes);
-    return;
   }
 
   TORCH_CHECK(
@@ -710,8 +702,7 @@ std::unique_ptr<ProfilerResult> disableProfiler() {
           (config.state == ProfilerState::KINETO ||
            config.state == ProfilerState::KINETO_GPU_FALLBACK ||
            config.state == ProfilerState::KINETO_ONDEMAND ||
-           config.state == ProfilerState::NVTX ||
-           config.state == ProfilerState::ITT),
+           config.state == ProfilerState::NVTX),
       "Can't disable Kineto profiler when it's not running");
 
   if (state_ptr->hasCallbackHandle()) {

diff --git a/torch/csrc/autograd/profiler_kineto.h b/torch/csrc/autograd/profiler_kineto.h
@@ -273,8 +273,8 @@ struct TORCH_API KinetoEvent {
   int64_t debug_handle_{-1};
   std::string backend_;
 
-  torch::profiler::impl::ProfilerEventStub cuda_event_start_ = nullptr;
-  torch::profiler::impl::ProfilerEventStub cuda_event_end_ = nullptr;
+  torch::profiler::impl::CUDAEventStub cuda_event_start_ = nullptr;
+  torch::profiler::impl::CUDAEventStub cuda_event_end_ = nullptr;
   bool is_python_function_;
 };