diff --git a/WORKSPACE b/WORKSPACE index 581c832e5..3cb6bcc0d 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -2,6 +2,7 @@ workspace(name = "org_tensorflow_lite_support") load("@bazel_tools//tools/build_defs/repo:java.bzl", "java_import_external") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@//third_party/py:python_configure.bzl", "python_configure") http_archive( name = "io_bazel_rules_closure", @@ -220,6 +221,37 @@ http_archive( build_file = "@//third_party:icu.BUILD", ) +http_archive( + name = "gemmlowp", + sha256 = "6678b484d929f2d0d3229d8ac4e3b815a950c86bb9f17851471d143f6d4f7834", + strip_prefix = "gemmlowp-12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3", + urls = [ + "http://mirror.tensorflow.org/github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip", + "https://github.com/google/gemmlowp/archive/12fed0cd7cfcd9e169bf1925bc3a7a58725fdcc3.zip", + ], +) + +http_archive( + name = "fft2d", + build_file = "@//third_party/fft2d:fft2d.BUILD", + sha256 = "5f4dabc2ae21e1f537425d58a49cdca1c49ea11db0d6271e2a4b27e9697548eb", + strip_prefix = "OouraFFT-1.0", + urls = [ + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/petewarden/OouraFFT/archive/v1.0.tar.gz", + "https://github.com/petewarden/OouraFFT/archive/v1.0.tar.gz", + ], +) + +http_archive( + name = "darts_clone", + build_file = "@//third_party:darts_clone.BUILD", + sha256 = "c97f55d05c98da6fcaf7f9ecc6a6dc6bc5b18b8564465f77abff8879d446491c", + strip_prefix = "darts-clone-e40ce4627526985a7767444b6ed6893ab6ff8983", + urls = [ + "https://github.com/s-yata/darts-clone/archive/e40ce4627526985a7767444b6ed6893ab6ff8983.zip", + ], +) + # AutoValue 1.6+ shades Guava, Auto Common, and JavaPoet. That's OK # because none of these jars become runtime dependencies. java_import_external( @@ -318,3 +350,5 @@ android_configure(name="local_config_android") load("@local_config_android//:android.bzl", "android_workspace") android_workspace() +python_configure(name = "local_config_python") + diff --git a/tensorflow_lite_support/codegen/python/BUILD b/tensorflow_lite_support/codegen/python/BUILD index 0ca1909b1..ee4dcbbdb 100644 --- a/tensorflow_lite_support/codegen/python/BUILD +++ b/tensorflow_lite_support/codegen/python/BUILD @@ -17,7 +17,7 @@ pybind_extension( deps = [ "//tensorflow_lite_support/codegen:android_java_generator", "//tensorflow_lite_support/codegen:code_generator", - "//third_party/python_runtime:headers", + "@local_config_python//:python_headers", "@pybind11", ], ) diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/BUILD b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/BUILD index 1e7423efa..3f0939b73 100644 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/BUILD +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/BUILD @@ -3,11 +3,7 @@ load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library") load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") -load( - "//tensorflow_lite_support/custom_ops/kernel/sentencepiece:native.bzl", - "micore_tf_copts", - "micore_tf_deps", -) +load(":native.bzl", "micore_tf_copts", "micore_tf_deps") load("@org_tensorflow//tensorflow:tensorflow.bzl", "pybind_extension") package( @@ -63,7 +59,9 @@ cc_library( ], deps = [ ":encoder_config", - "//third_party/darts_clone", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@darts_clone", ], ) @@ -90,9 +88,9 @@ cc_library( deps = [ ":double_array_trie_builder", ":encoder_config", - "//third_party/sentencepiece/src:sentencepiece_model_cc_proto", + "//tensorflow_lite_support/cc/port:statusor", "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", + "@com_google_sentencepiece//src:sentencepiece_model_cc_proto", ], ) @@ -131,6 +129,19 @@ cc_library( alwayslink = 1, ) +cc_binary( + name = "sentencepiece_tokenizer_op.so", + srcs = [ + "sentencepiece_tokenizer_op.cc", + ], + copts = micore_tf_copts(), + linkshared = 1, + deps = [ + ":sentencepiece_tokenizer_h", + ":optimized_encoder", + ] + micore_tf_deps(), +) + cc_library( name = "sentencepiece_tokenizer_tflite", srcs = ["sentencepiece_tokenizer_tflite.cc"], @@ -150,13 +161,6 @@ cc_library( ], ) -tf_gen_op_wrapper_py( - name = "gen_sentencepiece_tokenizer_op", - out = "gen_sentencepiece_tokenizer_op.py", - op_whitelist = ["TFSentencepieceTokenizeOp"], - deps = [":sentencepiece_tokenizer_op"], -) - cc_test( name = "optimized_encoder_test", srcs = [ @@ -170,9 +174,11 @@ cc_test( ":encoder_config", ":model_converter", ":optimized_encoder", - "//third_party/sentencepiece/src:sentencepiece_cc_proto", - "//third_party/sentencepiece/src:sentencepiece_processor", + "@com_google_absl//absl/status", + "@com_google_absl//absl/strings:str_format", "@com_google_googletest//:gtest_main", + "@com_google_sentencepiece//src:sentencepiece_cc_proto", + "@com_google_sentencepiece//src:sentencepiece_processor", "@org_tensorflow//tensorflow/core:lib", ], ) @@ -199,9 +205,99 @@ pybind_extension( module_name = "pywrap_tflite_registerer", deps = [ ":py_tflite_registerer", - "//third_party/pybind11", - "//third_party/python_runtime:headers", + "@local_config_python//:python_headers", "@org_tensorflow//tensorflow/lite:framework", "@org_tensorflow//tensorflow/lite/kernels:builtin_ops", + "@pybind11", ], ) + +pybind_extension( + name = "pywrap_model_converter", + srcs = ["pywrap_model_converter.cc"], + hdrs = ["model_converter.h"], + additional_exported_symbols = [ + "ConvertSentencepieceModel", + "GetVocabularySize", + ], + copts = ["-fexceptions"], + features = ["-use_header_modules"], + module_name = "pywrap_model_converter", + deps = [ + ":model_converter", + "@com_google_absl//absl/status", + "@local_config_python//:python_headers", + "@pybind11", + ], +) + +config_setting( + name = "armeabi_v7a_and_fastbuild", + values = { + "cpu": "armeabi-v7a", + "compilation_mode": "fastbuild", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "armeabi_v7a_and_dbg", + values = { + "cpu": "armeabi-v7a", + "compilation_mode": "dbg", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "android", + values = {"crosstool_top": "//external:android/crosstool"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "macos_i386", + values = { + "apple_platform_type": "macos", + "cpu": "darwin", + }, + visibility = ["//visibility:public"], +) + +config_setting( + name = "macos_x86_64", + values = { + "apple_platform_type": "macos", + "cpu": "darwin_x86_64", + }, + visibility = ["//visibility:public"], +) + +alias( + name = "macos", + actual = select({ + ":macos_i386": ":macos_i386", + ":macos_x86_64": ":macos_x86_64", + "//conditions:default": ":macos_i386", # Arbitrarily chosen from above. + }), + visibility = ["//visibility:public"], +) + +config_setting( + name = "ios", + values = { + "crosstool_top": "@bazel_tools//tools/cpp:toolchain", + "apple_platform_type": "ios", + }, + visibility = ["//visibility:public"], +) + +alias( + name = "apple", + actual = select({ + ":macos": ":macos", + ":ios": ":ios", + "//conditions:default": ":ios", # Arbitrarily chosen from above. + }), + visibility = ["//visibility:public"], +) diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/double_array_trie_builder.cc b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/double_array_trie_builder.cc index 381b8bd39..1f39663c2 100644 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/double_array_trie_builder.cc +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/double_array_trie_builder.cc @@ -17,7 +17,8 @@ limitations under the License. #include -#include "third_party/darts_clone/include/darts.h" +#include "absl/memory/memory.h" +#include "include/darts.h" namespace tflite { namespace support { diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.cc b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.cc index a3c44737c..efeef9e57 100644 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.cc +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.cc @@ -17,8 +17,8 @@ limitations under the License. #include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/double_array_trie_builder.h" #include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/encoder_config_generated.h" -#include "src/sentencepiece_model.proto.h" -#include "absl/status/status.h" +#include "src/sentencepiece_model.pb.h" + namespace tflite { namespace support { namespace ops { @@ -50,7 +50,7 @@ DecodePrecompiledCharsmap( std::vector(normalized_ptr, normalized_ptr + normalized_size)); } -absl::StatusOr ConvertSentencepieceModelToFlatBuffer( +tflite::support::StatusOr ConvertSentencepieceModelToFlatBuffer( const std::string& model_config_str, int encoding_offset) { ::sentencepiece::ModelProto model_config; if (!model_config.ParseFromString(model_config_str)) { diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.h b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.h index 13d62714a..1998b02a6 100644 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.h +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.h @@ -17,14 +17,14 @@ limitations under the License. #define THIRD_PARTY_TENSORFLOW_LITE_SUPPORT_CUSTOM_OPS_KERNEL_SENTENCEPIECE_MODEL_CONVERTER_H_ #include -#include "absl/status/statusor.h" +#include "tensorflow_lite_support/cc/port/statusor.h" namespace tflite { namespace support { namespace ops { // Converts Sentencepiece configuration to flatbuffer format. // encoding_offset is used by some encoders that combine different encodings. -absl::StatusOr ConvertSentencepieceModelToFlatBuffer( +tflite::support::StatusOr ConvertSentencepieceModelToFlatBuffer( const std::string& model_config_str, int encoding_offset = 0); // The functions that are provided for the Python wrapper. diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/native.bzl b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/native.bzl new file mode 100644 index 000000000..87695a46c --- /dev/null +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/native.bzl @@ -0,0 +1,86 @@ +"""Build definitions supporting platform-independent native build.""" + +load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_copts", "tf_opts_nortti_if_android") +load("@bazel_skylib//lib:selects.bzl", "selects") + +def micore_if(android, ios = [], default = []): + """Helper to create a select. + + Args: + android: what to return if compiling for Android. + ios: what to return if compiling for iOS. + default: what to return otherwise. + Returns: + the `android` list for Android compilation and the + `default` list otherwise. + """ + return select({ + ":android": android, + ":apple": ios, + "//conditions:default": default, + }) + +def micore_tf_copts(): + """C options for Tensorflow builds. + + Returns: + a list of copts which must be used by each cc_library which + refers to Tensorflow. Enables the library to compile both for + Android and for Linux. + """ + return tf_copts(android_optimization_level_override = None) + tf_opts_nortti_if_android() + [ + "-Wno-narrowing", + "-Wno-sign-compare", + "-Wno-overloaded-virtual", + ] + micore_if( + android = [ + # Set a define so Tensorflow's register_types.h + # adopts to support a rich set of types, to be pruned by + # selective registration. + "-DSUPPORT_SELECTIVE_REGISTRATION", + # Selective registration uses constexprs with recursive + # string comparisons; that can lead to compiler errors, so + # we increase the constexpr recursion depth. + "-fconstexpr-depth=1024", + ], + ) + selects.with_or({ + # If building for armeabi-v7a, and if compilation_mode is 'fastbuild' + # or 'dbg' then forcefully add -Oz to the list compiler options. + # Without it, some TF dependencies can't build (b/112286436). If + # compilation_mode is 'opt' then rely on the toolchain default. + ( + ":armeabi_v7a_and_fastbuild", + ":armeabi_v7a_and_dbg", + ): ["-Oz"], + "//conditions:default": [], + }) + +def micore_tf_deps(): + """Dependencies for Tensorflow builds. + + Returns: + list of dependencies which must be used by each cc_library + which refers to Tensorflow. Enables the library to compile both for + Android and for Linux. Use this macro instead of directly + declaring dependencies on Tensorflow. + """ + return micore_if( + android = [ + # Link to library which does not contain any ops. + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", + "@gemmlowp//:eight_bit_int_gemm", + "@fft2d//:fft2d", + ], + ios = [ + "@org_tensorflow//tensorflow/core:portable_tensorflow_lib", + "@gemmlowp//:eight_bit_int_gemm", + "@fft2d//:fft2d", + ], + default = [ + # Standard references for Tensorflow when building for Linux. We use + # an indirection via the alias targets below, to facilitate whitelisting + # these deps in the mobile license presubmit checks. + "@local_config_tf//:libtensorflow_framework", + "@local_config_tf//:tf_header_lib", + ], + ) diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/optimized_encoder_test.cc b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/optimized_encoder_test.cc index 56d975667..d768b9d4f 100644 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/optimized_encoder_test.cc +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/optimized_encoder_test.cc @@ -15,22 +15,52 @@ limitations under the License. #include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/optimized_encoder.h" +#include + #include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/double_array_trie_builder.h" #include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/encoder_config_generated.h" #include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.h" #include #include -#include "src/sentencepiece.proto.h" +#include "absl/status/status.h" +#include "absl/strings/str_format.h" +#include "src/sentencepiece.pb.h" #include "src/sentencepiece_processor.h" #include "tensorflow/core/platform/env.h" + namespace tflite { namespace support { namespace ops { + + +namespace internal { + +tensorflow::Status TFReadFileToString( + const std::string& filepath, std::string* data) { + return tensorflow::ReadFileToString( + tensorflow::Env::Default(), /*test_path*/ filepath, data); +} + +absl::Status StdReadFileToString( + const std::string& filepath, std::string* data) { + std::ifstream infile(filepath); + if (!infile.is_open()) { + return absl::NotFoundError( + absl::StrFormat("Error when opening %s", filepath)); + } + std::string contents((std::istreambuf_iterator(infile)), + (std::istreambuf_iterator())); + data->append(contents); + infile.close(); + return absl::OkStatus(); +} +} // namespace internal + namespace { static char kConfigFilePath[] = - "/tensorflow_lite_support/custom_ops/kernel/" + "tensorflow_lite_support/custom_ops/kernel/" "sentencepiece/testdata/sentencepiece.model"; TEST(OptimizedEncoder, NormalizeStringWhitestpaces) { @@ -110,13 +140,11 @@ TEST(OptimizedEncoder, NormalizeStringWhitespacesRemove) { TEST(OptimizedEncoder, ConfigConverter) { std::string config; - auto status = tensorflow::ReadFileToString(tensorflow::Env::Default(), - FLAGS_test_srcdir + kConfigFilePath, &config); - + auto status = internal::StdReadFileToString(kConfigFilePath, &config); ASSERT_TRUE(status.ok()); ::sentencepiece::SentencePieceProcessor processor; - ASSERT_OK(processor.LoadFromSerializedProto(config)); + ASSERT_TRUE(processor.LoadFromSerializedProto(config).ok()); const auto converted_model = ConvertSentencepieceModel(config); const std::string test_string("Hello world!\\xF0\\x9F\\x8D\\x95"); const auto encoded = @@ -124,7 +152,7 @@ TEST(OptimizedEncoder, ConfigConverter) { ASSERT_EQ(encoded.codes.size(), encoded.offsets.size()); ::sentencepiece::SentencePieceText reference_encoded; - CHECK_OK(processor.Encode(test_string, &reference_encoded)); + ASSERT_TRUE(processor.Encode(test_string, &reference_encoded).ok()); EXPECT_EQ(encoded.codes.size(), reference_encoded.pieces_size()); for (int i = 0; i < encoded.codes.size(); ++i) { EXPECT_EQ(encoded.codes[i], reference_encoded.pieces(i).id()); diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/py/BUILD b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/py/BUILD deleted file mode 100644 index c9ec6c63b..000000000 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/py/BUILD +++ /dev/null @@ -1,18 +0,0 @@ -# Place holder for py clif macro. - -package( - default_visibility = [ - "//tensorflow_lite_support:users", - ], - licenses = ["notice"], # Apache 2.0 -) - -py_clif_cc( - name = "model_converter", - srcs = [ - "model_converter.clif", - ], - deps = [ - "//tensorflow_lite_support/custom_ops/kernel/sentencepiece:model_converter", - ], -) diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/py/model_converter.clif b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/py/model_converter.clif deleted file mode 100644 index 89a42263b..000000000 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/py/model_converter.clif +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from "third_party/tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.h": - namespace `tflite::support::ops`: - def `ConvertSentencepieceModel` as - convert_sentencepiece_model(serialized_model:bytes) -> bytes - - def `GetVocabularySize` as - get_vocabulary_size(serialized_model:bytes) -> int diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/pywrap_model_converter.cc b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/pywrap_model_converter.cc new file mode 100644 index 000000000..0d3ece3fc --- /dev/null +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/pywrap_model_converter.cc @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" +#include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/model_converter.h" + +namespace tflite { +namespace support { +namespace ops { + +namespace py = pybind11; + +PYBIND11_MODULE(pywrap_model_converter, m) { + m.def("convert_sentencepiece_model", [](py::bytes model_string) { + return py::bytes(ConvertSentencepieceModel(std::string(model_string))); + }); + + m.def("get_vocabulary_size", [](py::bytes model_string) { + return GetVocabularySize(std::string(model_string)); + }); +} + +} // namespace ops +} // namespace support +} // namespace tflite diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/pywrap_tflite_registerer.cc b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/pywrap_tflite_registerer.cc index 8741761c3..69860f021 100644 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/pywrap_tflite_registerer.cc +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/pywrap_tflite_registerer.cc @@ -14,8 +14,8 @@ limitations under the License. ==============================================================================*/ #include "tensorflow_lite_support/custom_ops/kernel/sentencepiece/py_tflite_registerer.h" -#include "third_party/pybind11/include/pybind11/pybind11.h" -#include "third_party/pybind11/include/pybind11/pytypes.h" +#include "pybind11/pybind11.h" +#include "pybind11/pytypes.h" PYBIND11_MODULE(pywrap_lingua_registerer, m) { m.doc() = R"pbdoc( diff --git a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/sentencepiece_tokenizer_op.cc b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/sentencepiece_tokenizer_op.cc index 24ac2ac6e..cce6ce7e4 100644 --- a/tensorflow_lite_support/custom_ops/kernel/sentencepiece/sentencepiece_tokenizer_op.cc +++ b/tensorflow_lite_support/custom_ops/kernel/sentencepiece/sentencepiece_tokenizer_op.cc @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/protobuf/error_codes.proto.h" +#include "tensorflow/core/protobuf/error_codes.pb.h" namespace tensorflow { namespace ops{ diff --git a/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.py b/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.py index 563117aef..2458add73 100644 --- a/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.py +++ b/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.py @@ -24,7 +24,7 @@ import tensorflow_text as tf_text # pylint: disable=g-direct-tensorflow-import from tensorflow.lite.python import interpreter as interpreter_wrapper -from third_party.tensorflow.python.platform import resource_loader +from tensorflow.python.platform import resource_loader TEST_CASES = [ ['this is a test'], diff --git a/tensorflow_lite_support/custom_ops/python/BUILD b/tensorflow_lite_support/custom_ops/python/BUILD index b2ebdd4f1..5f2f07345 100644 --- a/tensorflow_lite_support/custom_ops/python/BUILD +++ b/tensorflow_lite_support/custom_ops/python/BUILD @@ -17,12 +17,11 @@ py_library( py_library( name = "sentencepiece_tokenizer", srcs = ["sentencepiece_tokenizer.py"], + data = ["//tensorflow_lite_support/custom_ops/kernel/sentencepiece:sentencepiece_tokenizer_op.so"], srcs_version = "PY3", deps = [ "//tensorflow_lite_support/custom_ops:expect_tfpy_installed", - "//tensorflow_lite_support/custom_ops/kernel/sentencepiece:gen_sentencepiece_tokenizer_op", - "//tensorflow_lite_support/custom_ops/kernel/sentencepiece:sentencepiece_tokenizer_op", - "//tensorflow_lite_support/custom_ops/kernel/sentencepiece/py:model_converter", + "//tensorflow_lite_support/custom_ops/kernel/sentencepiece:pywrap_model_converter", ], ) @@ -38,6 +37,9 @@ py_test( "//tensorflow_lite_support/custom_ops:expect_tfpy_installed", "//tensorflow_lite_support/custom_ops:expect_tftext_installed", "//tensorflow_lite_support/custom_ops/kernel/sentencepiece:pywrap_tflite_registerer", + "@absl_py//absl:app", "@absl_py//absl/flags", + "@absl_py//absl/logging", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer.py b/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer.py index 9e99a8a14..7100e9d7e 100644 --- a/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer.py +++ b/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer.py @@ -20,9 +20,13 @@ """ import tensorflow.compat.v2 as tf # pylint: disable=g-direct-tensorflow-import -from third_party.tensorflow.python.ops.ragged import ragged_tensor # pylint: disable=g-direct-tensorflow-import -from tensorflow_lite_support.custom_ops.kernel.sentencepiece import gen_sentencepiece_tokenizer_op -from tensorflow_lite_support.custom_ops.kernel.sentencepiece.py import model_converter +from tensorflow.python.ops.ragged import ragged_tensor # pylint: disable=g-direct-tensorflow-import +from tensorflow.python.framework import load_library +from tensorflow.python.platform import resource_loader +sentencepiece_tokenizer_op = load_library.load_op_library( + resource_loader.get_path_to_datafile('../kernel/sentencepiece/sentencepiece_tokenizer_op.so')) +gen_sentencepiece_tokenizer_op = sentencepiece_tokenizer_op.TFSentencepieceTokenizeOp +from tensorflow_lite_support.custom_ops.kernel.sentencepiece import pywrap_model_converter as model_converter class SentencepieceTokenizer: diff --git a/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer_test.py b/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer_test.py index 1e28484f7..1d72efe94 100644 --- a/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer_test.py +++ b/tensorflow_lite_support/custom_ops/python/sentencepiece_tokenizer_test.py @@ -24,7 +24,7 @@ import tensorflow_text from tensorflow.lite.python import interpreter as interpreter_wrapper # pylint: disable=g-direct-tensorflow-import -from third_party.tensorflow.python.platform import resource_loader +from tensorflow.python.platform import resource_loader from tensorflow_lite_support.custom_ops.python import sentencepiece_tokenizer FLAGS = flags.FLAGS diff --git a/tensorflow_lite_support/custom_ops/tf_configure.sh b/tensorflow_lite_support/custom_ops/tf_configure.sh new file mode 100644 index 000000000..dbc96da73 --- /dev/null +++ b/tensorflow_lite_support/custom_ops/tf_configure.sh @@ -0,0 +1,60 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +function write_action_env_to_bazelrc() { + echo "build --action_env $1=\"$2\"" >> .bazelrc +} + +function is_linux() { + [[ "${PLATFORM}" == "linux" ]] +} + +function is_macos() { + [[ "${PLATFORM}" == "darwin" ]] +} + +function is_windows() { + # On windows, the shell script is actually running in msys + [[ "${PLATFORM}" =~ msys_nt*|mingw*|cygwin*|uwin* ]] +} + +TF_CFLAGS=( $(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') ) +TF_LFLAGS="$(python3 -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))')" +HEADER_DIR=${TF_CFLAGS:2} +if is_windows; then + SHARED_LIBRARY_DIR=${SHARED_LIBRARY_DIR//\\//} + SHARED_LIBRARY_NAME=${SHARED_LIBRARY_NAME//\\//} + HEADER_DIR=${HEADER_DIR//\\//} +fi +if is_windows; then + # Use pywrap_tensorflow instead of tensorflow_framework on Windows + SHARED_LIBRARY_DIR=${TF_CFLAGS:2:-7}"python" +else + SHARED_LIBRARY_DIR=${TF_LFLAGS:2} +fi +SHARED_LIBRARY_NAME=$(echo $TF_LFLAGS | rev | cut -d":" -f1 | rev) +if ! [[ $TF_LFLAGS =~ .*:.* ]]; then + if is_macos; then + SHARED_LIBRARY_NAME="libtensorflow_framework.dylib" + elif is_windows; then + # Use pywrap_tensorflow's import library on Windows. It is in the same dir as the dll/pyd. + SHARED_LIBRARY_NAME="_pywrap_tensorflow_internal.lib" + else + SHARED_LIBRARY_NAME="libtensorflow_framework.so" + fi +fi +write_action_env_to_bazelrc "TF_HEADER_DIR" ${HEADER_DIR} +write_action_env_to_bazelrc "TF_SHARED_LIBRARY_DIR" ${SHARED_LIBRARY_DIR} +write_action_env_to_bazelrc "TF_SHARED_LIBRARY_NAME" ${SHARED_LIBRARY_NAME} diff --git a/tensorflow_lite_support/metadata/flatbuffers_lib/BUILD b/tensorflow_lite_support/metadata/flatbuffers_lib/BUILD index 28f0c6c8d..d4171bf9d 100644 --- a/tensorflow_lite_support/metadata/flatbuffers_lib/BUILD +++ b/tensorflow_lite_support/metadata/flatbuffers_lib/BUILD @@ -15,8 +15,8 @@ pybind_extension( features = ["-use_header_modules"], module_name = "_pywrap_flatbuffers", deps = [ - "//third_party/python_runtime:headers", "@flatbuffers", + "@local_config_python//:python_headers", "@pybind11", ], ) diff --git a/tensorflow_lite_support/opensource/opensource_only.files b/tensorflow_lite_support/opensource/opensource_only.files index 1f964cdff..890a146f6 100644 --- a/tensorflow_lite_support/opensource/opensource_only.files +++ b/tensorflow_lite_support/opensource/opensource_only.files @@ -1,3 +1,4 @@ +tensorflow_lite_support/custom_ops/kernel/sentencepiece/native.bzl tensorflow_lite_support/opensource/BUILD tensorflow_lite_support/opensource/WORKSPACE tensorflow_lite_support/third_party/android/BUILD @@ -5,6 +6,12 @@ tensorflow_lite_support/third_party/android/android.bzl.tpl tensorflow_lite_support/third_party/android/android_configure.BUILD.tpl tensorflow_lite_support/third_party/android/android_configure.bzl tensorflow_lite_support/third_party/com_google_absl.BUILD +tensorflow_lite_support/third_party/darts_clone.BUILD +tensorflow_lite_support/third_party/fft2d/BUILD +tensorflow_lite_support/third_party/fft2d/LICENSE +tensorflow_lite_support/third_party/fft2d/fft.h +tensorflow_lite_support/third_party/fft2d/fft2d.BUILD +tensorflow_lite_support/third_party/fft2d/fft2d.h tensorflow_lite_support/third_party/google_toolbox_for_mac.BUILD tensorflow_lite_support/third_party/icu.BUILD tensorflow_lite_support/third_party/libyuv.BUILD diff --git a/third_party/darts_clone.BUILD b/third_party/darts_clone.BUILD new file mode 100644 index 000000000..1d95ec2fa --- /dev/null +++ b/third_party/darts_clone.BUILD @@ -0,0 +1,15 @@ +# Description: +# Darts-clone is a clone of Darts (Double-ARray Trie System). + +licenses(["notice"]) + +exports_files(["LICENSE"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "darts_clone", + hdrs = [ + "include/darts.h", + ], +) diff --git a/third_party/fft2d/BUILD b/third_party/fft2d/BUILD new file mode 100644 index 000000000..863a1cef9 --- /dev/null +++ b/third_party/fft2d/BUILD @@ -0,0 +1,48 @@ +# Headers for 2D Fast Fourier Transform package +# from http://momonga.t.u-tokyo.ac.jp/~ooura/fft2d.html +# This is a separate package because the original downloaded archive doesn't +# contain any header files. + +package( + default_visibility = ["//visibility:public"], +) + +# Unrestricted use; can only distribute original package. +# See fft/readme.txt +licenses(["notice"]) + +exports_files(["LICENSE"]) + +cc_library( + name = "fft2d_headers", + srcs = [ + "fft.h", + "fft2d.h", + ], +) + +objc_library( + name = "fft2d_headersd_ios", + srcs = [ + "fft.h", + "fft2d.h", + ], +) + +# Export the source code so that it could be compiled for Andoid native apps. +filegroup( + name = "fft2d_headers_srcs", + srcs = [ + "fft.h", + "fft2d.h", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = ["**/OWNERS"], + ), + visibility = ["//third_party/tensorflow:__subpackages__"], +) diff --git a/third_party/fft2d/LICENSE b/third_party/fft2d/LICENSE new file mode 100644 index 000000000..2bd85506a --- /dev/null +++ b/third_party/fft2d/LICENSE @@ -0,0 +1,3 @@ +Copyright(C) 1997,2001 Takuya OOURA (email: ooura@kurims.kyoto-u.ac.jp). +You may use, copy, modify this code for any purpose and +without fee. You may distribute this ORIGINAL package. diff --git a/third_party/fft2d/fft.h b/third_party/fft2d/fft.h new file mode 100644 index 000000000..36d838b7f --- /dev/null +++ b/third_party/fft2d/fft.h @@ -0,0 +1,36 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Declarations for 1D FFT routines in third_party/fft2d/fft2d. + +#ifndef FFT2D_FFT_H__ +#define FFT2D_FFT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +extern void cdft(int, int, double *, int *, double *); +extern void rdft(int, int, double *, int *, double *); +extern void ddct(int, int, double *, int *, double *); +extern void ddst(int, int, double *, int *, double *); +extern void dfct(int, double *, double *, int *, double *); +extern void dfst(int, double *, double *, int *, double *); + +#ifdef __cplusplus +} +#endif + +#endif // FFT2D_FFT_H__ diff --git a/third_party/fft2d/fft2d.BUILD b/third_party/fft2d/fft2d.BUILD new file mode 100644 index 000000000..9fa5097f3 --- /dev/null +++ b/third_party/fft2d/fft2d.BUILD @@ -0,0 +1,45 @@ +# 2D Fast Fourier Transform package +# from http://momonga.t.u-tokyo.ac.jp/~ooura/fft2d.html + +package( + default_visibility = ["//visibility:public"], +) + +# Unrestricted use; can only distribute original package. +licenses(["notice"]) + +exports_files(["readme2d.txt"]) + +FFT2D_SRCS = [ + "fftsg.c", + "fftsg2d.c", +] + +config_setting( + name = "windows", + values = {"cpu": "x64_windows"}, +) + +# This is the main 2D FFT library. The 2D FFTs in this library call +# 1D FFTs. In addition, fast DCTs are provided for the special case +# of 8x8 and 16x16. This code in this library is referred to as +# "Version II" on http://momonga.t.u-tokyo.ac.jp/~ooura/fft2d.html. +cc_library( + name = "fft2d", + srcs = FFT2D_SRCS, + linkopts = select({ + ":windows": [], + "//conditions:default": ["-lm"], + }), +) + +objc_library( + name = "fft2d_ios", + srcs = FFT2D_SRCS, +) + +# Export the source code so that it could be compiled for Andoid native apps. +filegroup( + name = "fft2d_srcs", + srcs = FFT2D_SRCS, +) diff --git a/third_party/fft2d/fft2d.h b/third_party/fft2d/fft2d.h new file mode 100644 index 000000000..d587b3b44 --- /dev/null +++ b/third_party/fft2d/fft2d.h @@ -0,0 +1,36 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Declarations for 2D FFT routines in third_party/fft2d/fft2d. + +#ifndef FFT2D_FFT_H__ +#define FFT2D_FFT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +extern void cdft2d(int, int, int, double **, double *, int *, double *); +extern void rdft2d(int, int, int, double **, double *, int *, double *); +extern void ddct2d(int, int, int, double **, double *, int *, double *); +extern void ddst2d(int, int, int, double **, double *, int *, double *); +extern void ddct8x8s(int isgn, double **a); +extern void ddct16x16s(int isgn, double **a); + +#ifdef __cplusplus +} +#endif + +#endif // FFT2D_FFT_H__ diff --git a/third_party/py/BUILD b/third_party/py/BUILD new file mode 100644 index 000000000..e69de29bb diff --git a/third_party/py/BUILD.tpl b/third_party/py/BUILD.tpl new file mode 100644 index 000000000..cc0e013bd --- /dev/null +++ b/third_party/py/BUILD.tpl @@ -0,0 +1,31 @@ +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +# Point both runtimes to the same python binary to ensure we always +# use the python binary specified by ./configure.py script. +load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") + +py_runtime( + name = "py2_runtime", + interpreter_path = "%{PYTHON_BIN_PATH}", + python_version = "PY2", +) + +py_runtime( + name = "py3_runtime", + interpreter_path = "%{PYTHON_BIN_PATH}", + python_version = "PY3", +) + +py_runtime_pair( + name = "py_runtime_pair", + py2_runtime = ":py2_runtime", + py3_runtime = ":py3_runtime", +) + +toolchain( + name = "py_toolchain", + toolchain = ":py_runtime_pair", + toolchain_type = "@bazel_tools//tools/python:toolchain_type", +) diff --git a/third_party/py/python_configure.bzl b/third_party/py/python_configure.bzl new file mode 100644 index 000000000..6601d7f2a --- /dev/null +++ b/third_party/py/python_configure.bzl @@ -0,0 +1,71 @@ +"""Repository rule for Python autoconfiguration. + +`python_configure` depends on the following environment variables: + + * `PYTHON_BIN_PATH`: location of python binary. +""" + +_PYTHON_BIN_PATH = "PYTHON_BIN_PATH" + +def _tpl(repository_ctx, tpl, substitutions = {}, out = None): + if not out: + out = tpl + repository_ctx.template( + out, + Label("//third_party/py:%s.tpl" % tpl), + substitutions, + ) + +def _fail(msg): + """Output failure message when auto configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) + +def _get_python_bin(repository_ctx): + """Gets the python bin path.""" + python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) + if python_bin != None: + return python_bin + python_bin_path = repository_ctx.which("python") + if python_bin_path != None: + return str(python_bin_path) + _fail("Cannot find python in PATH, please make sure " + + "python is installed and add its directory in PATH, or --define " + + "%s='/something/else'.\nPATH=%s" % ( + _PYTHON_BIN_PATH, + repository_ctx.os.environ.get("PATH", ""), + )) + +def _create_local_python_repository(repository_ctx): + """Creates the repository containing files set up to build with Python.""" + python_bin = _get_python_bin(repository_ctx) + _tpl(repository_ctx, "BUILD", { + "%{PYTHON_BIN_PATH}": python_bin, + }) + +def _python_autoconf_impl(repository_ctx): + """Implementation of the python_autoconf repository rule.""" + _create_local_python_repository(repository_ctx) + +python_configure = repository_rule( + implementation = _python_autoconf_impl, + environ = [ + _PYTHON_BIN_PATH, + ], +) +"""Detects and configures the local Python toolchain. + +Add the following to your WORKSPACE FILE: + +```python +load("//third_party/py:python_configure.bzl", "python_configure") + +python_configure(name = "local_config_py_toolchain") + +register_toolchains("@local_config_py_toolchain//:py_toolchain") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/third_party/tensorflow/BUILD.tpl b/third_party/tensorflow/BUILD.tpl index aec61646a..095021ed2 100644 --- a/third_party/tensorflow/BUILD.tpl +++ b/third_party/tensorflow/BUILD.tpl @@ -9,7 +9,7 @@ cc_library( cc_library( name = "libtensorflow_framework", - srcs = [":libtensorflow_framework_so"], + srcs = [":libtensorflow_framework.so"], visibility = ["//visibility:public"], ) diff --git a/third_party/tensorflow/tf_configure.bzl b/third_party/tensorflow/tf_configure.bzl index a9b397c02..328262559 100644 --- a/third_party/tensorflow/tf_configure.bzl +++ b/third_party/tensorflow/tf_configure.bzl @@ -1,4 +1,4 @@ -"""Setup TensorFlow as external dependency.""" +"""Setup TensorFlow as external dependency""" _TF_HEADER_DIR = "TF_HEADER_DIR" _TF_SHARED_LIBRARY_DIR = "TF_SHARED_LIBRARY_DIR" @@ -9,7 +9,7 @@ def _tpl(repository_ctx, tpl, substitutions = {}, out = None): out = tpl repository_ctx.template( out, - Label("@org_tensorflow//tensorflow:%s.tpl" % tpl), + Label("//third_party/tensorflow:%s.tpl" % tpl), substitutions, ) @@ -129,7 +129,8 @@ def _symlink_genrule_for_dir( dest_dir, genrule_name, src_files = [], - dest_files = []): + dest_files = [], + tf_pip_dir_rename_pair = []): """Returns a genrule to symlink(or copy if on Windows) a set of files. If src_dir is passed, files will be read from the given directory; otherwise @@ -142,17 +143,29 @@ def _symlink_genrule_for_dir( genrule_name: genrule name. src_files: list of source files instead of src_dir. dest_files: list of corresonding destination files. - + tf_pip_dir_rename_pair: list of the pair of tf pip parent directory to + replace. For example, in TF pip package, the source code is under + "tensorflow_core", and we might want to replace it with + "tensorflow" to match the header includes. Returns: genrule target that creates the symlinks. """ + + # Check that tf_pip_dir_rename_pair has the right length + tf_pip_dir_rename_pair_len = len(tf_pip_dir_rename_pair) + if tf_pip_dir_rename_pair_len != 0 and tf_pip_dir_rename_pair_len != 2: + _fail("The size of argument tf_pip_dir_rename_pair should be either 0 or 2, but %d is given." % tf_pip_dir_rename_pair_len) + if src_dir != None: src_dir = _norm_path(src_dir) dest_dir = _norm_path(dest_dir) files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines())) # Create a list with the src_dir stripped to use for outputs. - dest_files = files.replace(src_dir, "").splitlines() + if tf_pip_dir_rename_pair_len: + dest_files = files.replace(src_dir, "").replace(tf_pip_dir_rename_pair[0], tf_pip_dir_rename_pair[1]).splitlines() + else: + dest_files = files.replace(src_dir, "").splitlines() src_files = files.splitlines() command = [] outs = [] @@ -166,6 +179,7 @@ def _symlink_genrule_for_dir( cmd = "cp -f" command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) outs.append(' "' + dest_dir + dest_files[i] + '",') + dest_dir = "abc" genrule = _genrule( genrule_name, " && ".join(command), @@ -180,6 +194,7 @@ def _tf_pip_impl(repository_ctx): tf_header_dir, "include", "tf_header_include", + tf_pip_dir_rename_pair = ["tensorflow_core", "tensorflow"], ) tf_shared_library_dir = repository_ctx.os.environ[_TF_SHARED_LIBRARY_DIR] @@ -189,9 +204,9 @@ def _tf_pip_impl(repository_ctx): repository_ctx, None, "", - "libtensorflow_framework_so", + "libtensorflow_framework.so", [tf_shared_library_path], - [tf_shared_library_name], + ["_pywrap_tensorflow_internal.lib" if _is_windows(repository_ctx) else "libtensorflow_framework.so"], ) _tpl(repository_ctx, "BUILD", { @@ -204,5 +219,6 @@ tf_configure = repository_rule( environ = [ _TF_HEADER_DIR, _TF_SHARED_LIBRARY_DIR, + _TF_SHARED_LIBRARY_NAME, ], )