Only keep fbgemm namespace for torch.ops (pytorch#944)

Summary: Pull Request resolved: pytorch#944 As title: removing torch.ops.fb ops in fbgemm_gpu. Reviewed By: jasonjk-park Differential Revision: D34380797 fbshipit-source-id: 31aa37fa39f445fd9da77b5aa1fa74f119770220
Compiler2 · Feb 24, 2022 · a556324 · a556324
1 parent c2d46af
commit a556324
Show file tree

Hide file tree

Showing 9 changed files with 0 additions and 105 deletions.
diff --git a/fbgemm_gpu/codegen/embedding_backward_dense_host.cpp b/fbgemm_gpu/codegen/embedding_backward_dense_host.cpp
@@ -392,12 +392,6 @@ Tensor split_embedding_codegen_lookup_dense_function(
   }
 }
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-  DISPATCH_TO_CUDA(
-      "dense_embedding_codegen_lookup_function",
-      split_embedding_codegen_lookup_dense_function);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   DISPATCH_TO_CUDA(
       "dense_embedding_codegen_lookup_function",

diff --git a/fbgemm_gpu/codegen/embedding_backward_dense_host_cpu.cpp b/fbgemm_gpu/codegen/embedding_backward_dense_host_cpu.cpp
@@ -175,14 +175,6 @@ Tensor split_embedding_codegen_lookup_dense_function(
       feature_requires_grad)[0];
 }
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-  m.def(
-      "dense_embedding_codegen_lookup_function(Tensor dev_weights, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad) -> Tensor");
-  DISPATCH_TO_CPU(
-      "dense_embedding_codegen_lookup_function",
-      split_embedding_codegen_lookup_dense_function);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   m.def(
       "dense_embedding_codegen_lookup_function(Tensor dev_weights, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad) -> Tensor");

diff --git a/fbgemm_gpu/codegen/embedding_backward_split_host_cpu_template.cpp b/fbgemm_gpu/codegen/embedding_backward_split_host_cpu_template.cpp
@@ -216,13 +216,6 @@ Tensor split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(
       output_dtype)[0];
 }
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-    m.def("split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(Tensor host_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
-    DISPATCH_TO_CPU(
-        "split_embedding_codegen_lookup_{{ optimizer }}_function_cpu",
-        split_embedding_codegen_lookup_{{ optimizer }}_function_cpu);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
     m.def("split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(Tensor host_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
     DISPATCH_TO_CPU(

diff --git a/fbgemm_gpu/codegen/embedding_backward_split_host_template.cpp b/fbgemm_gpu/codegen/embedding_backward_split_host_template.cpp
@@ -491,11 +491,6 @@ Tensor split_embedding_codegen_lookup_{{ optimizer }}_function(
   }
 }
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-    m.def("split_embedding_codegen_lookup_{{ optimizer }}_function(Tensor placeholder_autograd_tensor, Tensor dev_weights, Tensor uvm_weights, Tensor lxu_cache_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, Tensor lxu_cache_locations, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
-    DISPATCH_TO_CUDA("split_embedding_codegen_lookup_{{ optimizer }}_function", split_embedding_codegen_lookup_{{ optimizer }}_function);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
     m.def("split_embedding_codegen_lookup_{{ optimizer }}_function(Tensor placeholder_autograd_tensor, Tensor dev_weights, Tensor uvm_weights, Tensor lxu_cache_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, Tensor lxu_cache_locations, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
     DISPATCH_TO_CUDA("split_embedding_codegen_lookup_{{ optimizer }}_function", split_embedding_codegen_lookup_{{ optimizer }}_function);

diff --git a/fbgemm_gpu/codegen/embedding_bounds_check_host.cpp b/fbgemm_gpu/codegen/embedding_bounds_check_host.cpp
@@ -21,10 +21,6 @@ void bounds_check_indices_cuda(
     int64_t bounds_check_mode,
     Tensor warning);
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-  DISPATCH_TO_CUDA("bounds_check_indices", bounds_check_indices_cuda);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   DISPATCH_TO_CUDA("bounds_check_indices", bounds_check_indices_cuda);
 }
diff --git a/fbgemm_gpu/codegen/embedding_bounds_check_host_cpu.cpp b/fbgemm_gpu/codegen/embedding_bounds_check_host_cpu.cpp
@@ -105,14 +105,6 @@ void bounds_check_indices_cpu(
 }
 } // namespace
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-  // The (a!) tells PyTorch this is an impure operation and so cannot be CSE'd
-  // or DCE'd, etc.
-  m.def(
-      "bounds_check_indices(Tensor rows_per_table, Tensor(a!) indices, Tensor(a!) offsets, int bounds_check_mode, Tensor(a!) warning) -> ()");
-  DISPATCH_TO_CPU("bounds_check_indices", bounds_check_indices_cpu);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   // The (a!) tells PyTorch this is an impure operation and so cannot be CSE'd
   // or DCE'd, etc.

diff --git a/fbgemm_gpu/codegen/embedding_forward_quantized_host.cpp b/fbgemm_gpu/codegen/embedding_forward_quantized_host.cpp
@@ -177,17 +177,6 @@ Tensor pruned_array_lookup_cuda(
     Tensor index_remappings,
     Tensor index_remappings_offsets);
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-  DISPATCH_TO_CUDA(
-      "int_nbit_split_embedding_codegen_lookup_function",
-      int_nbit_split_embedding_codegen_lookup_function);
-
-  DISPATCH_TO_CUDA(
-      "pruned_hashmap_lookup", pruned_hashmap_lookup_unweighted_cuda);
-
-  DISPATCH_TO_CUDA("pruned_array_lookup", pruned_array_lookup_cuda);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   DISPATCH_TO_CUDA(
       "int_nbit_split_embedding_codegen_lookup_function",

diff --git a/fbgemm_gpu/codegen/embedding_forward_quantized_host_cpu.cpp b/fbgemm_gpu/codegen/embedding_forward_quantized_host_cpu.cpp
@@ -120,33 +120,6 @@ Tensor pruned_array_lookup_cpu(
     Tensor index_remappings,
     Tensor index_remappings_offsets);
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-  m.def(
-      "int_nbit_split_embedding_codegen_lookup_function(Tensor dev_weights, Tensor uvm_weights, Tensor weights_placements, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, int total_D, int max_int2_D, int max_int4_D, int max_int8_D, int max_float16_D, int max_float32_D, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, int output_dtype=1, Tensor? lxu_cache_weights=None, Tensor? lxu_cache_locations=None) -> Tensor");
-  DISPATCH_TO_CPU(
-      "int_nbit_split_embedding_codegen_lookup_function",
-      int_nbit_split_embedding_codegen_lookup_function_cpu);
-
-  // GPU version of pruned_hashmap needs to use CPU version of
-  // pruned_hashmap_insert
-  m.def(
-      "pruned_hashmap_insert(Tensor indices, Tensor dense_indices, Tensor offsets, Tensor hash_table, Tensor hash_table_offsets) -> ()");
-  DISPATCH_TO_CPU(
-      "pruned_hashmap_insert", pruned_hashmap_insert_unweighted_cpu);
-
-  // CPU version of hashmap Lookup isn't used. For CPUs, we should use
-  // PrunedMapCPU below.
-  m.def(
-      "pruned_hashmap_lookup(Tensor indices, Tensor offsets, Tensor hash_table, Tensor hash_table_offsets) -> Tensor");
-  DISPATCH_TO_CPU(
-      "pruned_hashmap_lookup", pruned_hashmap_lookup_unweighted_cpu);
-
-  // CPU version of array lookup.
-  m.def(
-      "pruned_array_lookup(Tensor indices, Tensor offsets, Tensor index_remappings, Tensor index_remappings_offsets) -> Tensor");
-  DISPATCH_TO_CPU("pruned_array_lookup", pruned_array_lookup_cpu);
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   m.def(
       "int_nbit_split_embedding_codegen_lookup_function(Tensor dev_weights, Tensor uvm_weights, Tensor weights_placements, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, int total_D, int max_int2_D, int max_int4_D, int max_int8_D, int max_float16_D, int max_float32_D, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, int output_dtype=1, Tensor? lxu_cache_weights=None, Tensor? lxu_cache_locations=None) -> Tensor");

diff --git a/fbgemm_gpu/src/split_table_batched_embeddings.cpp b/fbgemm_gpu/src/split_table_batched_embeddings.cpp
@@ -106,35 +106,6 @@ void lxu_cache_flush_cuda(
 
 namespace {
 
-TORCH_LIBRARY_FRAGMENT(fb, m) {
-  m.def(
-      "linearize_cache_indices(Tensor cache_hash_size_cumsum, Tensor indices, Tensor offsets) -> Tensor");
-  DISPATCH_TO_CUDA("linearize_cache_indices", linearize_cache_indices_cuda);
-  m.def(
-      "lru_cache_populate(Tensor weights, Tensor hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, int time_stamp, Tensor(c!) lru_state, bool stochastic_rounding) -> ()");
-  DISPATCH_TO_CUDA("lru_cache_populate", lru_cache_populate_cuda);
-  m.def(
-      "lru_cache_populate_byte(Tensor weights, Tensor hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, int time_stamp, Tensor(c!) lru_state) -> ()");
-  DISPATCH_TO_CUDA("lru_cache_populate_byte", lru_cache_populate_byte_cuda);
-  m.def(
-      "lfu_cache_populate(Tensor weights, Tensor cache_hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, Tensor(c!) lfu_state, bool stochastic_rounding) -> ()");
-  DISPATCH_TO_CUDA("lfu_cache_populate", lfu_cache_populate_cuda);
-  m.def(
-      "lfu_cache_populate_byte(Tensor weights, Tensor cache_hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, Tensor(c!) lfu_state) -> ()");
-  DISPATCH_TO_CUDA("lfu_cache_populate_byte", lfu_cache_populate_byte_cuda);
-  m.def(
-      "lxu_cache_lookup(Tensor linear_cache_indices, Tensor lxu_cache_state) -> Tensor");
-  DISPATCH_TO_CUDA("lxu_cache_lookup", lxu_cache_lookup_cuda);
-  m.def(
-      "lxu_cache_flush(Tensor(a!) uvm_weights, Tensor cache_hash_size_cumsum, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, int total_D, Tensor(b!) lxu_cache_state, Tensor(c!) lxu_cache_weights, bool stochastic_rounding) -> ()");
-  DISPATCH_TO_CUDA("lxu_cache_flush", lxu_cache_flush_cuda);
-  m.def("lxu_cache_slot(int h_in, int C) -> int");
-  m.impl(
-      "lxu_cache_slot",
-      torch::dispatch(
-          c10::DispatchKey::CatchAll, TORCH_FN(host_lxu_cache_slot)));
-}
-
 TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
   m.def(
       "linearize_cache_indices(Tensor cache_hash_size_cumsum, Tensor indices, Tensor offsets) -> Tensor");