Skip to content

Commit

Permalink
Back out "Only keep fbgemm namespace for torch.ops" (pytorch#1026)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#1026

Original commit changeset: 88ee7df864d6

Original Phabricator Diff: D35104996 (pytorch@356ca9c)

Background: https://fb.workplace.com/groups/3440841732711443/permalink/4698534793608791/

Differential Revision: D35261680

fbshipit-source-id: 6f08f7fd2710b72da23c8861098156ff069a9918
  • Loading branch information
jianyuh authored and facebook-github-bot committed Apr 1, 2022
1 parent 6e6204c commit 8a0c2cc
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 0 deletions.
7 changes: 7 additions & 0 deletions fbgemm_gpu/codegen/embedding_backward_dense_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,13 @@ Tensor split_embedding_codegen_lookup_dense_function(
}
}

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
DISPATCH_TO_CUDA(
"dense_embedding_codegen_lookup_function",
split_embedding_codegen_lookup_dense_function);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
DISPATCH_TO_CUDA(
"dense_embedding_codegen_lookup_function",
Expand Down
9 changes: 9 additions & 0 deletions fbgemm_gpu/codegen/embedding_backward_dense_host_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,15 @@ Tensor split_embedding_codegen_lookup_dense_function(
feature_requires_grad)[0];
}

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def(
"dense_embedding_codegen_lookup_function(Tensor dev_weights, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad) -> Tensor");
DISPATCH_TO_CPU(
"dense_embedding_codegen_lookup_function",
split_embedding_codegen_lookup_dense_function);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def(
"dense_embedding_codegen_lookup_function(Tensor dev_weights, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad) -> Tensor");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,14 @@ Tensor split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(
output_dtype)[0];
}

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(Tensor host_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CPU(
"split_embedding_codegen_lookup_{{ optimizer }}_function_cpu",
split_embedding_codegen_lookup_{{ optimizer }}_function_cpu);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(Tensor host_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CPU(
Expand Down
6 changes: 6 additions & 0 deletions fbgemm_gpu/codegen/embedding_backward_split_host_template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ Tensor split_embedding_codegen_lookup_{{ optimizer }}_function(
}
}

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function(Tensor placeholder_autograd_tensor, Tensor dev_weights, Tensor uvm_weights, Tensor lxu_cache_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, Tensor lxu_cache_locations, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CUDA("split_embedding_codegen_lookup_{{ optimizer }}_function", split_embedding_codegen_lookup_{{ optimizer }}_function);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function(Tensor placeholder_autograd_tensor, Tensor dev_weights, Tensor uvm_weights, Tensor lxu_cache_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, Tensor lxu_cache_locations, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CUDA("split_embedding_codegen_lookup_{{ optimizer }}_function", split_embedding_codegen_lookup_{{ optimizer }}_function);
Expand Down
5 changes: 5 additions & 0 deletions fbgemm_gpu/codegen/embedding_bounds_check_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ void bounds_check_indices_cuda(
int64_t bounds_check_mode,
Tensor warning);

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
DISPATCH_TO_CUDA("bounds_check_indices", bounds_check_indices_cuda);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
DISPATCH_TO_CUDA("bounds_check_indices", bounds_check_indices_cuda);
}
9 changes: 9 additions & 0 deletions fbgemm_gpu/codegen/embedding_bounds_check_host_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ void bounds_check_indices_cpu(
}
} // namespace

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
// The (a!) tells PyTorch this is an impure operation and so cannot be CSE'd
// or DCE'd, etc.
m.def(
"bounds_check_indices(Tensor rows_per_table, Tensor(a!) indices, Tensor(a!) offsets, int bounds_check_mode, Tensor(a!) warning) -> ()");
DISPATCH_TO_CPU("bounds_check_indices", bounds_check_indices_cpu);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
// The (a!) tells PyTorch this is an impure operation and so cannot be CSE'd
// or DCE'd, etc.
Expand Down
31 changes: 31 additions & 0 deletions fbgemm_gpu/src/cumem_utils_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,37 @@ using Tensor = at::Tensor;

namespace fbgemm_gpu {

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def("is_uvm_tensor(Tensor t) -> bool", TORCH_FN(is_uvm_tensor));
m.def("uvm_storage(Tensor t) -> bool", TORCH_FN(uvm_storage));
m.def(
"uvm_to_device(Tensor self, Tensor prototype) -> Tensor",
TORCH_FN(uvm_to_device));
m.def("uvm_to_cpu(Tensor t) -> Tensor");
DISPATCH_TO_CUDA("uvm_to_cpu", uvm_to_cpu);
m.def("new_managed_tensor(Tensor self, int[] sizes) -> Tensor");
DISPATCH_TO_CUDA("new_managed_tensor", new_managed_tensor);
m.def("new_vanilla_managed_tensor(Tensor self, int[] sizes) -> Tensor");
DISPATCH_TO_CUDA("new_vanilla_managed_tensor", new_vanilla_managed_tensor);
m.def(
"cuda_mem_advise(Tensor t, int advice) -> ()",
TORCH_FN(uvm_cuda_mem_advise));
m.def(
"cuda_mem_prefetch_async(Tensor t, Tensor? device_t) -> ()",
TORCH_FN(uvm_cuda_mem_prefetch_async));
m.def(
"uvm_mem_advice_dont_fork(Tensor t) -> ()",
TORCH_FN(uvm_mem_advice_dont_fork));

m.def("uvm_to_cpu_clone(Tensor t) -> Tensor", TORCH_FN(uvm_to_cpu_clone));

#ifndef __HIP_PLATFORM_HCC__
// FIXME: some advanced "cudaMemAdvise" flags are not supported by HIP.
m.def(FBGEMM_GPU_ENUM_OP(uvm, fbgemm_gpu_uvm_enum_query));
#endif
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def("is_uvm_tensor(Tensor t) -> bool", TORCH_FN(is_uvm_tensor));
m.def("uvm_storage(Tensor t) -> bool", TORCH_FN(uvm_storage));
Expand Down
30 changes: 30 additions & 0 deletions fbgemm_gpu/src/split_table_batched_embeddings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,36 @@ void lxu_cache_flush_cuda(

namespace {

// Deprecated for fb namespace! Please use fbgemm namespace instead!
TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def(
"linearize_cache_indices(Tensor cache_hash_size_cumsum, Tensor indices, Tensor offsets) -> Tensor");
DISPATCH_TO_CUDA("linearize_cache_indices", linearize_cache_indices_cuda);
m.def(
"lru_cache_populate(Tensor weights, Tensor hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, int time_stamp, Tensor(c!) lru_state, bool stochastic_rounding) -> ()");
DISPATCH_TO_CUDA("lru_cache_populate", lru_cache_populate_cuda);
m.def(
"lru_cache_populate_byte(Tensor weights, Tensor hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, int time_stamp, Tensor(c!) lru_state) -> ()");
DISPATCH_TO_CUDA("lru_cache_populate_byte", lru_cache_populate_byte_cuda);
m.def(
"lfu_cache_populate(Tensor weights, Tensor cache_hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, Tensor(c!) lfu_state, bool stochastic_rounding) -> ()");
DISPATCH_TO_CUDA("lfu_cache_populate", lfu_cache_populate_cuda);
m.def(
"lfu_cache_populate_byte(Tensor weights, Tensor cache_hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, Tensor(c!) lfu_state) -> ()");
DISPATCH_TO_CUDA("lfu_cache_populate_byte", lfu_cache_populate_byte_cuda);
m.def(
"lxu_cache_lookup(Tensor linear_cache_indices, Tensor lxu_cache_state, int invalid_index = -1) -> Tensor");
DISPATCH_TO_CUDA("lxu_cache_lookup", lxu_cache_lookup_cuda);
m.def(
"lxu_cache_flush(Tensor(a!) uvm_weights, Tensor cache_hash_size_cumsum, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, int total_D, Tensor(b!) lxu_cache_state, Tensor(c!) lxu_cache_weights, bool stochastic_rounding) -> ()");
DISPATCH_TO_CUDA("lxu_cache_flush", lxu_cache_flush_cuda);
m.def("lxu_cache_slot(int h_in, int C) -> int");
m.impl(
"lxu_cache_slot",
torch::dispatch(
c10::DispatchKey::CatchAll, TORCH_FN(host_lxu_cache_slot)));
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def(
"linearize_cache_indices(Tensor cache_hash_size_cumsum, Tensor indices, Tensor offsets) -> Tensor");
Expand Down

0 comments on commit 8a0c2cc

Please sign in to comment.