Skip to content

Commit

Permalink
Only keep fbgemm namespace for torch.ops (pytorch#944)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#944

As title: removing torch.ops.fb ops in fbgemm_gpu.

Reviewed By: jasonjk-park

Differential Revision: D34380797

fbshipit-source-id: 31aa37fa39f445fd9da77b5aa1fa74f119770220
  • Loading branch information
jianyuh authored and facebook-github-bot committed Feb 24, 2022
1 parent c2d46af commit a556324
Show file tree
Hide file tree
Showing 9 changed files with 0 additions and 105 deletions.
6 changes: 0 additions & 6 deletions fbgemm_gpu/codegen/embedding_backward_dense_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,6 @@ Tensor split_embedding_codegen_lookup_dense_function(
}
}

TORCH_LIBRARY_FRAGMENT(fb, m) {
DISPATCH_TO_CUDA(
"dense_embedding_codegen_lookup_function",
split_embedding_codegen_lookup_dense_function);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
DISPATCH_TO_CUDA(
"dense_embedding_codegen_lookup_function",
Expand Down
8 changes: 0 additions & 8 deletions fbgemm_gpu/codegen/embedding_backward_dense_host_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,6 @@ Tensor split_embedding_codegen_lookup_dense_function(
feature_requires_grad)[0];
}

TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def(
"dense_embedding_codegen_lookup_function(Tensor dev_weights, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad) -> Tensor");
DISPATCH_TO_CPU(
"dense_embedding_codegen_lookup_function",
split_embedding_codegen_lookup_dense_function);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def(
"dense_embedding_codegen_lookup_function(Tensor dev_weights, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad) -> Tensor");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,13 +216,6 @@ Tensor split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(
output_dtype)[0];
}

TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(Tensor host_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CPU(
"split_embedding_codegen_lookup_{{ optimizer }}_function_cpu",
split_embedding_codegen_lookup_{{ optimizer }}_function_cpu);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function_cpu(Tensor host_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CPU(
Expand Down
5 changes: 0 additions & 5 deletions fbgemm_gpu/codegen/embedding_backward_split_host_template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,11 +491,6 @@ Tensor split_embedding_codegen_lookup_{{ optimizer }}_function(
}
}

TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function(Tensor placeholder_autograd_tensor, Tensor dev_weights, Tensor uvm_weights, Tensor lxu_cache_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, Tensor lxu_cache_locations, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CUDA("split_embedding_codegen_lookup_{{ optimizer }}_function", split_embedding_codegen_lookup_{{ optimizer }}_function);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def("split_embedding_codegen_lookup_{{ optimizer }}_function(Tensor placeholder_autograd_tensor, Tensor dev_weights, Tensor uvm_weights, Tensor lxu_cache_weights, Tensor weights_placements, Tensor weights_offsets, Tensor D_offsets, int total_D, int max_D, Tensor hash_size_cumsum, int total_hash_size_bits, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, Tensor? feature_requires_grad, Tensor lxu_cache_locations, bool gradient_clipping, float max_gradient, bool stochastic_rounding, {{ args.split_function_schemas | join(", ") }}, int output_dtype=0) -> Tensor");
DISPATCH_TO_CUDA("split_embedding_codegen_lookup_{{ optimizer }}_function", split_embedding_codegen_lookup_{{ optimizer }}_function);
Expand Down
4 changes: 0 additions & 4 deletions fbgemm_gpu/codegen/embedding_bounds_check_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ void bounds_check_indices_cuda(
int64_t bounds_check_mode,
Tensor warning);

TORCH_LIBRARY_FRAGMENT(fb, m) {
DISPATCH_TO_CUDA("bounds_check_indices", bounds_check_indices_cuda);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
DISPATCH_TO_CUDA("bounds_check_indices", bounds_check_indices_cuda);
}
8 changes: 0 additions & 8 deletions fbgemm_gpu/codegen/embedding_bounds_check_host_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,6 @@ void bounds_check_indices_cpu(
}
} // namespace

TORCH_LIBRARY_FRAGMENT(fb, m) {
// The (a!) tells PyTorch this is an impure operation and so cannot be CSE'd
// or DCE'd, etc.
m.def(
"bounds_check_indices(Tensor rows_per_table, Tensor(a!) indices, Tensor(a!) offsets, int bounds_check_mode, Tensor(a!) warning) -> ()");
DISPATCH_TO_CPU("bounds_check_indices", bounds_check_indices_cpu);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
// The (a!) tells PyTorch this is an impure operation and so cannot be CSE'd
// or DCE'd, etc.
Expand Down
11 changes: 0 additions & 11 deletions fbgemm_gpu/codegen/embedding_forward_quantized_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,17 +177,6 @@ Tensor pruned_array_lookup_cuda(
Tensor index_remappings,
Tensor index_remappings_offsets);

TORCH_LIBRARY_FRAGMENT(fb, m) {
DISPATCH_TO_CUDA(
"int_nbit_split_embedding_codegen_lookup_function",
int_nbit_split_embedding_codegen_lookup_function);

DISPATCH_TO_CUDA(
"pruned_hashmap_lookup", pruned_hashmap_lookup_unweighted_cuda);

DISPATCH_TO_CUDA("pruned_array_lookup", pruned_array_lookup_cuda);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
DISPATCH_TO_CUDA(
"int_nbit_split_embedding_codegen_lookup_function",
Expand Down
27 changes: 0 additions & 27 deletions fbgemm_gpu/codegen/embedding_forward_quantized_host_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,33 +120,6 @@ Tensor pruned_array_lookup_cpu(
Tensor index_remappings,
Tensor index_remappings_offsets);

TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def(
"int_nbit_split_embedding_codegen_lookup_function(Tensor dev_weights, Tensor uvm_weights, Tensor weights_placements, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, int total_D, int max_int2_D, int max_int4_D, int max_int8_D, int max_float16_D, int max_float32_D, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, int output_dtype=1, Tensor? lxu_cache_weights=None, Tensor? lxu_cache_locations=None) -> Tensor");
DISPATCH_TO_CPU(
"int_nbit_split_embedding_codegen_lookup_function",
int_nbit_split_embedding_codegen_lookup_function_cpu);

// GPU version of pruned_hashmap needs to use CPU version of
// pruned_hashmap_insert
m.def(
"pruned_hashmap_insert(Tensor indices, Tensor dense_indices, Tensor offsets, Tensor hash_table, Tensor hash_table_offsets) -> ()");
DISPATCH_TO_CPU(
"pruned_hashmap_insert", pruned_hashmap_insert_unweighted_cpu);

// CPU version of hashmap Lookup isn't used. For CPUs, we should use
// PrunedMapCPU below.
m.def(
"pruned_hashmap_lookup(Tensor indices, Tensor offsets, Tensor hash_table, Tensor hash_table_offsets) -> Tensor");
DISPATCH_TO_CPU(
"pruned_hashmap_lookup", pruned_hashmap_lookup_unweighted_cpu);

// CPU version of array lookup.
m.def(
"pruned_array_lookup(Tensor indices, Tensor offsets, Tensor index_remappings, Tensor index_remappings_offsets) -> Tensor");
DISPATCH_TO_CPU("pruned_array_lookup", pruned_array_lookup_cpu);
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def(
"int_nbit_split_embedding_codegen_lookup_function(Tensor dev_weights, Tensor uvm_weights, Tensor weights_placements, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, int total_D, int max_int2_D, int max_int4_D, int max_int8_D, int max_float16_D, int max_float32_D, Tensor indices, Tensor offsets, int pooling_mode, Tensor? indice_weights, int output_dtype=1, Tensor? lxu_cache_weights=None, Tensor? lxu_cache_locations=None) -> Tensor");
Expand Down
29 changes: 0 additions & 29 deletions fbgemm_gpu/src/split_table_batched_embeddings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,35 +106,6 @@ void lxu_cache_flush_cuda(

namespace {

TORCH_LIBRARY_FRAGMENT(fb, m) {
m.def(
"linearize_cache_indices(Tensor cache_hash_size_cumsum, Tensor indices, Tensor offsets) -> Tensor");
DISPATCH_TO_CUDA("linearize_cache_indices", linearize_cache_indices_cuda);
m.def(
"lru_cache_populate(Tensor weights, Tensor hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, int time_stamp, Tensor(c!) lru_state, bool stochastic_rounding) -> ()");
DISPATCH_TO_CUDA("lru_cache_populate", lru_cache_populate_cuda);
m.def(
"lru_cache_populate_byte(Tensor weights, Tensor hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, int time_stamp, Tensor(c!) lru_state) -> ()");
DISPATCH_TO_CUDA("lru_cache_populate_byte", lru_cache_populate_byte_cuda);
m.def(
"lfu_cache_populate(Tensor weights, Tensor cache_hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, Tensor(c!) lfu_state, bool stochastic_rounding) -> ()");
DISPATCH_TO_CUDA("lfu_cache_populate", lfu_cache_populate_cuda);
m.def(
"lfu_cache_populate_byte(Tensor weights, Tensor cache_hash_size_cumsum, int total_cache_hash_size, Tensor cache_index_table_map, Tensor weights_offsets, Tensor weights_tys, Tensor D_offsets, Tensor linear_cache_indices, Tensor(a!) lxu_cache_state, Tensor(b!) lxu_cache_weights, Tensor(c!) lfu_state) -> ()");
DISPATCH_TO_CUDA("lfu_cache_populate_byte", lfu_cache_populate_byte_cuda);
m.def(
"lxu_cache_lookup(Tensor linear_cache_indices, Tensor lxu_cache_state) -> Tensor");
DISPATCH_TO_CUDA("lxu_cache_lookup", lxu_cache_lookup_cuda);
m.def(
"lxu_cache_flush(Tensor(a!) uvm_weights, Tensor cache_hash_size_cumsum, Tensor cache_index_table_map, Tensor weights_offsets, Tensor D_offsets, int total_D, Tensor(b!) lxu_cache_state, Tensor(c!) lxu_cache_weights, bool stochastic_rounding) -> ()");
DISPATCH_TO_CUDA("lxu_cache_flush", lxu_cache_flush_cuda);
m.def("lxu_cache_slot(int h_in, int C) -> int");
m.impl(
"lxu_cache_slot",
torch::dispatch(
c10::DispatchKey::CatchAll, TORCH_FN(host_lxu_cache_slot)));
}

TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
m.def(
"linearize_cache_indices(Tensor cache_hash_size_cumsum, Tensor indices, Tensor offsets) -> Tensor");
Expand Down

0 comments on commit a556324

Please sign in to comment.