Skip to content

Commit

Permalink
Revert "Enable Intel® VTune™ Profiler's Instrumentation and Tracing T…
Browse files Browse the repository at this point in the history
…echnology APIs (ITT) to PyTorch (pytorch#63289)"

This reverts commit f988aa2.

Reverted pytorch#63289 on behalf of https://github.com/malfet due to broke trunk, see https://hud.pytorch.org/pytorch/pytorch/commit/f988aa2b3ff77d5aa010bdaae4e52c6ee345c04d
  • Loading branch information
pytorchmergebot committed Jun 30, 2022
1 parent c980fc3 commit 1454515
Show file tree
Hide file tree
Showing 39 changed files with 50 additions and 534 deletions.
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@
[submodule "third_party/pocketfft"]
path = third_party/pocketfft
url = https://github.com/mreineck/pocketfft
[submodule "third_party/ittapi"]
path = third_party/ittapi
url = https://github.com/intel/ittapi.git
[submodule "third_party/flatbuffers"]
path = third_party/flatbuffers
url = https://github.com/google/flatbuffers.git
Expand Down
4 changes: 0 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -291,10 +291,6 @@ if(NOT USE_XNNPACK AND CMAKE_VERSION VERSION_LESS ${XNNPACK_MIN_CMAKE_VER})
endif()
option(USE_ZMQ "Use ZMQ" OFF)
option(USE_ZSTD "Use ZSTD" OFF)
# Ensure that an ITT build is the default for x86 CPUs
cmake_dependent_option(
USE_ITT "Use Intel(R) VTune Profiler ITT functionality" ON
"CPU_INTEL" OFF)
# Ensure that an MKLDNN build is the default for x86 CPUs
# but optional for AArch64 (dependent on -DUSE_MKLDNN).
cmake_dependent_option(
Expand Down
1 change: 0 additions & 1 deletion build_variables.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ libtorch_profiler_sources = [
"torch/csrc/profiler/kineto_shim.cpp",
"torch/csrc/profiler/nvtx_observer.cpp",
"torch/csrc/profiler/kineto_client_interface.cpp",
"torch/csrc/profiler/itt_observer.cpp",
"torch/csrc/monitor/counters.cpp",
"torch/csrc/monitor/events.cpp",
]
Expand Down
7 changes: 0 additions & 7 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -605,13 +605,6 @@ if(NOT INTERN_BUILD_MOBILE OR NOT BUILD_CAFFE2_MOBILE)
)
endif()

if(${USE_ITT})
list(APPEND TORCH_SRCS
${TORCH_SRC_DIR}/csrc/itt_wrapper.cpp
${TORCH_SRC_DIR}/csrc/profiler/itt.cpp
)
endif()

if(NOT INTERN_BUILD_MOBILE AND NOT BUILD_LITE_INTERPRETER)
list(APPEND TORCH_SRCS
${TORCH_SRC_DIR}/csrc/api/src/jit.cpp
Expand Down
2 changes: 0 additions & 2 deletions caffe2/core/macros.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ static_assert(
#cmakedefine CAFFE2_USE_MKL
#cmakedefine CAFFE2_USE_MKLDNN
#cmakedefine CAFFE2_USE_NVTX
#cmakedefine CAFFE2_USE_ITT
#cmakedefine CAFFE2_USE_TRT

#ifndef EIGEN_MPL2_ONLY
Expand Down Expand Up @@ -83,6 +82,5 @@ static_assert(
{"USE_MKL", "${CAFFE2_USE_MKL}"}, \
{"USE_MKLDNN", "${CAFFE2_USE_MKLDNN}"}, \
{"USE_NVTX", "${CAFFE2_USE_NVTX}"}, \
{"USE_ITT", "${CAFFE2_USE_ITT}"}, \
{"USE_TRT", "${CAFFE2_USE_TRT}"}, \
}
13 changes: 0 additions & 13 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -961,19 +961,6 @@ if(USE_FFMPEG)
endif()
endif()

if(USE_ITT)
find_package(ITT)
if(ITT_FOUND)
include_directories(SYSTEM ${ITT_INCLUDE_DIR})
list(APPEND Caffe2_DEPENDENCY_LIBS ${ITT_LIBRARIES})
list(APPEND TORCH_PYTHON_LINK_LIBRARIES ${ITT_LIBRARIES})
else()
message(WARNING "Not compiling with ITT. Suppress this warning with -DUSE_ITT=OFF")
set(USE_ITT OFF CACHE BOOL "" FORCE)
caffe2_update_option(USE_ITT OFF)
endif()
endif()

# ---[ Caffe2 depends on FP16 library for half-precision conversions
if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
set(CAFFE2_THIRD_PARTY_ROOT "${PROJECT_SOURCE_DIR}/third_party")
Expand Down
21 changes: 0 additions & 21 deletions cmake/Modules/FindITT.cmake

This file was deleted.

1 change: 0 additions & 1 deletion cmake/Summary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ function(caffe2_print_configuration_summary)
message(STATUS " USE_MKLDNN_ACL : ${USE_MKLDNN_ACL}")
message(STATUS " USE_MKLDNN_CBLAS : ${USE_MKLDNN_CBLAS}")
endif()
message(STATUS " USE_ITT : ${USE_ITT}")
message(STATUS " USE_NCCL : ${USE_NCCL}")
if(${USE_NCCL})
message(STATUS " USE_SYSTEM_NCCL : ${USE_SYSTEM_NCCL}")
Expand Down
7 changes: 2 additions & 5 deletions docs/source/autograd.rst
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,10 @@ Profiler
^^^^^^^^

Autograd includes a profiler that lets you inspect the cost of different
operators inside your model - both on the CPU and GPU. There are three modes
operators inside your model - both on the CPU and GPU. There are two modes
implemented at the moment - CPU-only using :class:`~torch.autograd.profiler.profile`.
nvprof based (registers both CPU and GPU activity) using
and nvprof based (registers both CPU and GPU activity) using
:class:`~torch.autograd.profiler.emit_nvtx`.
and vtune profiler based using
:class:`~torch.autograd.profiler.emit_itt`.

.. autoclass:: torch.autograd.profiler.profile

Expand All @@ -242,7 +240,6 @@ and vtune profiler based using
profiler.profile.total_average

.. autoclass:: torch.autograd.profiler.emit_nvtx
.. autoclass:: torch.autograd.profiler.emit_itt


.. autosummary::
Expand Down
4 changes: 1 addition & 3 deletions docs/source/bottleneck.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ where [args] are any number of arguments to `script.py`, or run
evaluating. If the profiler outputs don't help, you could try looking at
the result of :func:`torch.autograd.profiler.emit_nvtx()` with ``nvprof``.
However, please take into account that the NVTX overhead is very high and
often gives a heavily skewed timeline. Similarly, Intel VTune Profiler helps
to analyze performance on Intel platforms further with
:func:`torch.autograd.profiler.emit_nvtx()`.
often gives a heavily skewed timeline.

.. warning::
If you are profiling CUDA code, the first profiler that ``bottleneck`` runs
Expand Down
1 change: 0 additions & 1 deletion scripts/build_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ else
fi
# Disable unused dependencies
CMAKE_ARGS+=("-DUSE_CUDA=OFF")
CMAKE_ARGS+=("-DUSE_ITT=OFF")
CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
CMAKE_ARGS+=("-DUSE_LMDB=OFF")
Expand Down
1 change: 0 additions & 1 deletion scripts/build_ios.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ CMAKE_ARGS+=("-DBUILD_PYTHON=OFF")

# Disable unused dependencies
CMAKE_ARGS+=("-DUSE_CUDA=OFF")
CMAKE_ARGS+=("-DUSE_ITT=OFF")
CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
CMAKE_ARGS+=("-DUSE_LMDB=OFF")
Expand Down
1 change: 0 additions & 1 deletion scripts/build_mobile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ fi
# Disable unused dependencies
CMAKE_ARGS+=("-DUSE_ROCM=OFF")
CMAKE_ARGS+=("-DUSE_CUDA=OFF")
CMAKE_ARGS+=("-DUSE_ITT=OFF")
CMAKE_ARGS+=("-DUSE_GFLAGS=OFF")
CMAKE_ARGS+=("-DUSE_OPENCV=OFF")
CMAKE_ARGS+=("-DUSE_LMDB=OFF")
Expand Down
1 change: 0 additions & 1 deletion scripts/build_tizen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ cd $BUILD_ROOT
cmake "$CAFFE2_ROOT" \
-DCMAKE_VERBOSE_MAKEFILE=1 \
-DUSE_CUDA=OFF \
-DUSE_ITT=OFF \
-DUSE_OPENCV=OFF \
-DUSE_LMDB=OFF \
-DCAFFE2_CPU_FLAGS="-mfpu=neon -mfloat-abi=soft" \
Expand Down
7 changes: 0 additions & 7 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@
#
# USE_STATIC_MKL
# Prefer to link with MKL statically - Unix only
# USE_ITT=0
# disable use of Intel(R) VTune Profiler's ITT functionality
#
# USE_NNPACK=0
# disables NNPACK build
Expand Down Expand Up @@ -543,11 +541,6 @@ def run(self):
if cmake_cache_vars['USE_LIGHTWEIGHT_DISPATCH']:
report('-- Using lightweight dispatch')

if cmake_cache_vars['USE_ITT']:
report('-- Using ITT')
else:
report('-- Not using ITT')

# Do not use clang to compile extensions if `-fstack-clash-protection` is defined
# in system CFLAGS
c_flags = str(os.getenv('CFLAGS', ''))
Expand Down
1 change: 0 additions & 1 deletion third_party/ittapi
Submodule ittapi deleted from 5b8a7d
7 changes: 0 additions & 7 deletions torch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,6 @@ if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-Wno-writable-strings)
endif()

if(USE_ITT)
list(APPEND TORCH_PYTHON_SRCS
${TORCH_SRC_DIR}/csrc/itt.cpp
)
list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_ITT)
endif()

if(USE_CUDA)
include(${TORCH_ROOT}/cmake/public/cuda.cmake)
append_filelist("libtorch_python_cuda_core_sources" TORCH_PYTHON_SRCS)
Expand Down
1 change: 0 additions & 1 deletion torch/_C/_autograd.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ class ProfilerState(Enum):
CPU = ...
CUDA = ...
NVTX = ...
ITT = ...
KINETO = ...
KINETO_GPU_FALLBACK = ...

Expand Down
4 changes: 0 additions & 4 deletions torch/_C/_itt.pyi

This file was deleted.

64 changes: 0 additions & 64 deletions torch/autograd/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,70 +479,6 @@ def _call_end_callbacks_on_future(self, fut: Future[Any]) -> Future[Any]:
return profiled_future


class emit_itt(object):
"""Context manager that makes every autograd operation emit an ITT range.
It is useful when running the program under Intel(R) VTune Profiler::
vtune <--vtune_flags> <regular command here>
The Instrumentation and Tracing Technology (ITT) API enables your application to generate and
control the collection of trace data during its execution across different Intel tools.
This context manager is to annotate Intel(R) VTune Profiling trace. With help of this context manager,
you will be able to see labled ranges in Intel(R) VTune Profiler GUI.
.. warning:
This context manager should not be called recursively, i.e. at most one
instance should be enabled at any given time.
Args:
enabled (bool, optional, default=True): Setting ``enabled=False`` makes this context manager a no-op.
Default: ``True``.
record_shapes (bool, optional, default=False): If ``record_shapes=True``, the itt range wrapping
each autograd op will append information about the sizes of Tensor arguments received
by that op, in the following format:
``[[arg0.size(0), arg0.size(1), ...], [arg1.size(0), arg1.size(1), ...], ...]``
Non-tensor arguments will be represented by ``[]``.
Arguments will be listed in the order they are received by the backend op.
Please note that this order may not match the order in which those arguments were passed
on the Python side. Also note that shape recording may increase the overhead of itt range creation.
Example:
>>> with torch.autograd.profiler.emit_itt():
... model(x)
"""
def __init__(self, enabled=True, record_shapes=False):
self.enabled = enabled
self.entered = False
self.record_shapes = record_shapes

def __enter__(self):
if not self.enabled:
return
if self.entered:
raise RuntimeError("ITT annotation context manager is not reentrant")
self.entered = True
_enable_profiler(
ProfilerConfig(
ProfilerState.ITT,
self.record_shapes,
False,
False,
False,
False,
_ExperimentalConfig()),
set()
)
return self

def __exit__(self, exc_type, exc_val, exc_tb):
if not self.enabled:
return
_disable_profiler()
return False


class emit_nvtx(object):
"""Context manager that makes every autograd operation emit an NVTX range.
Expand Down
11 changes: 0 additions & 11 deletions torch/csrc/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,14 +910,6 @@ void initModule(PyObject* module);
} // namespace torch
#endif

#ifdef USE_ITT
namespace torch {
namespace profiler {
void initIttBindings(PyObject* module);
} // namespace profiler
} // namespace torch
#endif

static std::vector<PyMethodDef> methods;

// In Python we can't use the trick of C10_LOG_API_USAGE_ONCE
Expand Down Expand Up @@ -1016,9 +1008,6 @@ PyObject* initModule() {
torch::autograd::init_legacy_variable(module);
torch::python::init_bindings(module);
torch::lazy::initLazyBindings(module);
#ifdef USE_ITT
torch::profiler::initIttBindings(module);
#endif
#ifdef USE_CUDA
torch::cuda::initModule(module);
#endif
Expand Down
1 change: 0 additions & 1 deletion torch/csrc/autograd/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
.value("CPU", ProfilerState::CPU)
.value("CUDA", ProfilerState::CUDA)
.value("NVTX", ProfilerState::NVTX)
.value("ITT", ProfilerState::ITT)
.value("KINETO", ProfilerState::KINETO)
.value("KINETO_GPU_FALLBACK", ProfilerState::KINETO_GPU_FALLBACK);

Expand Down
13 changes: 2 additions & 11 deletions torch/csrc/autograd/profiler_kineto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include <torch/csrc/profiler/api.h>
#include <torch/csrc/profiler/collection.h>
#include <torch/csrc/profiler/containers.h>
#include <torch/csrc/profiler/itt_observer.h>
#include <torch/csrc/profiler/kineto_shim.h>
#include <torch/csrc/profiler/nvtx_observer.h>

Expand Down Expand Up @@ -624,8 +623,7 @@ void reportBackendEventToActiveKinetoProfiler(
void prepareProfiler(
const torch::profiler::impl::ProfilerConfig& config,
const std::set<torch::profiler::impl::ActivityType>& activities) {
if (config.state == ProfilerState::NVTX ||
config.state == ProfilerState::ITT) {
if (config.state == ProfilerState::NVTX) {
return;
}
TORCH_CHECK(
Expand All @@ -644,9 +642,6 @@ void enableProfilerWithEventPostProcess(
TORCH_CHECK(
config.state != ProfilerState::NVTX,
"NVTX does not support post processing callback.");
TORCH_CHECK(
config.state != ProfilerState::ITT,
"ITT does not support post processing callback.");
TORCH_INTERNAL_ASSERT(
GlobalStateManager::get() == nullptr,
"On-demand profiling does not support post processing callback");
Expand All @@ -664,9 +659,6 @@ void enableProfiler(
if (config.state == ProfilerState::NVTX) {
torch::profiler::impl::pushNVTXCallbacks(config, scopes);
return;
} else if (config.state == ProfilerState::ITT) {
torch::profiler::impl::pushITTCallbacks(config, scopes);
return;
}

TORCH_CHECK(
Expand Down Expand Up @@ -710,8 +702,7 @@ std::unique_ptr<ProfilerResult> disableProfiler() {
(config.state == ProfilerState::KINETO ||
config.state == ProfilerState::KINETO_GPU_FALLBACK ||
config.state == ProfilerState::KINETO_ONDEMAND ||
config.state == ProfilerState::NVTX ||
config.state == ProfilerState::ITT),
config.state == ProfilerState::NVTX),
"Can't disable Kineto profiler when it's not running");

if (state_ptr->hasCallbackHandle()) {
Expand Down
4 changes: 2 additions & 2 deletions torch/csrc/autograd/profiler_kineto.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ struct TORCH_API KinetoEvent {
int64_t debug_handle_{-1};
std::string backend_;

torch::profiler::impl::ProfilerEventStub cuda_event_start_ = nullptr;
torch::profiler::impl::ProfilerEventStub cuda_event_end_ = nullptr;
torch::profiler::impl::CUDAEventStub cuda_event_start_ = nullptr;
torch::profiler::impl::CUDAEventStub cuda_event_end_ = nullptr;
bool is_python_function_;
};

Expand Down
Loading

0 comments on commit 1454515

Please sign in to comment.