Remove contrib thread pool. (tensorflow#6175)

* Remove contrib thread pool. * Remove commented out contrib import. * Fix lint issues. * move tf.data.options higher. Tweak line breaks.
sigino · Feb 11, 2019 · b6c0c7f · b6c0c7f
1 parent 27e8617
commit b6c0c7f
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 23 deletions.
diff --git a/official/resnet/resnet_run_loop.py b/official/resnet/resnet_run_loop.py
@@ -31,7 +31,6 @@
 # pylint: disable=g-bad-import-order
 from absl import flags
 import tensorflow as tf
-from tensorflow.contrib.data.python.ops import threadpool
 
 from official.resnet import resnet_model
 from official.utils.flags import core as flags_core
@@ -75,6 +74,15 @@ def process_record_dataset(dataset,
   Returns:
     Dataset of (image, label) pairs ready for iteration.
   """
+  # Defines a specific size thread pool for tf.data operations.
+  if datasets_num_private_threads:
+    options = tf.data.Options()
+    options.experimental_threading = tf.data.experimental.ThreadingOptions()
+    options.experimental_threading.private_threadpool_size = (
+        datasets_num_private_threads)
+    dataset = dataset.with_options(options)
+    tf.compat.v1.logging.info('datasets_num_private_threads: %s',
+                              datasets_num_private_threads)
 
   # Prefetches a batch at a time to smooth out the time taken to load input
   # files for shuffling and processing.
@@ -102,16 +110,6 @@ def process_record_dataset(dataset,
   # on how many devices are present.
   dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
 
-  # Defines a specific size thread pool for tf.data operations.
-  if datasets_num_private_threads:
-    tf.compat.v1.logging.info('datasets_num_private_threads: %s',
-                              datasets_num_private_threads)
-    dataset = threadpool.override_threadpool(
-        dataset,
-        threadpool.PrivateThreadPool(
-            datasets_num_private_threads,
-            display_name='input_pipeline_thread_pool'))
-
   return dataset
 
 

diff --git a/official/utils/accelerator/tpu.py b/official/utils/accelerator/tpu.py
@@ -71,7 +71,8 @@ def host_call_fn(global_step, *args):
   # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
   # dimension. These Tensors are implicitly concatenated to
   # [params['batch_size']].
-  global_step_tensor = tf.reshape(tf.compat.v1.train.get_or_create_global_step(), [1])
+  global_step_tensor = tf.reshape(
+      tf.compat.v1.train.get_or_create_global_step(), [1])
   other_tensors = [tf.reshape(metric_dict[key], [1]) for key in metric_names]
 
   return host_call_fn, [global_step_tensor] + other_tensors

diff --git a/official/utils/data/file_io.py b/official/utils/data/file_io.py
@@ -156,6 +156,7 @@ def _serialize_shards(df_shards, columns, pool, writer):
     for example in s:
       writer.write(example)
 
+
 def write_to_buffer(dataframe, buffer_path, columns, expected_size=None):
   """Write a dataframe to a binary file for a dataset to consume.
 
@@ -169,7 +170,8 @@ def write_to_buffer(dataframe, buffer_path, columns, expected_size=None):
   Returns:
     The path of the buffer.
   """
-  if tf.io.gfile.exists(buffer_path) and tf.io.gfile.stat(buffer_path).length > 0:
+  if (tf.io.gfile.exists(buffer_path) and
+      tf.io.gfile.stat(buffer_path).length > 0):
     actual_size = tf.io.gfile.stat(buffer_path).length
     if expected_size == actual_size:
       return buffer_path
@@ -184,7 +186,8 @@ def write_to_buffer(dataframe, buffer_path, columns, expected_size=None):
 
   tf.io.gfile.makedirs(os.path.split(buffer_path)[0])
 
-  tf.compat.v1.logging.info("Constructing TFRecordDataset buffer: {}".format(buffer_path))
+  tf.compat.v1.logging.info("Constructing TFRecordDataset buffer: {}"
+                            .format(buffer_path))
 
   count = 0
   pool = multiprocessing.Pool(multiprocessing.cpu_count())
@@ -195,7 +198,7 @@ def write_to_buffer(dataframe, buffer_path, columns, expected_size=None):
         _serialize_shards(df_shards, columns, pool, writer)
         count += sum([len(s) for s in df_shards])
         tf.compat.v1.logging.info("{}/{} examples written."
-                        .format(str(count).ljust(8), len(dataframe)))
+                                  .format(str(count).ljust(8), len(dataframe)))
   finally:
     pool.terminate()
 

diff --git a/official/utils/logs/hooks_helper.py b/official/utils/logs/hooks_helper.py
@@ -57,8 +57,9 @@ def get_train_hooks(name_list, use_tpu=False, **kwargs):
     return []
 
   if use_tpu:
-    tf.compat.v1.logging.warning("hooks_helper received name_list `{}`, but a TPU is "
-                       "specified. No hooks will be used.".format(name_list))
+    tf.compat.v1.logging.warning('hooks_helper received name_list `{}`, but a '
+                                 'TPU is specified. No hooks will be used.'
+                                 .format(name_list))
     return []
 
   train_hooks = []
@@ -142,6 +143,7 @@ def get_logging_metric_hook(tensors_to_log=None,
       names. If not set, log _TENSORS_TO_LOG by default.
     every_n_secs: `int`, the frequency for logging the metric. Default to every
       10 mins.
+    **kwargs: a dictionary of arguments.
 
   Returns:
     Returns a LoggingMetricHook that saves tensor values in a JSON format.

diff --git a/official/utils/misc/model_helpers.py b/official/utils/misc/model_helpers.py
@@ -88,6 +88,6 @@ def generate_synthetic_data(
 
 def apply_clean(flags_obj):
   if flags_obj.clean and tf.io.gfile.exists(flags_obj.model_dir):
-    tf.compat.v1.logging.info("--clean flag set. Removing existing model dir: {}".format(
-        flags_obj.model_dir))
+    tf.compat.v1.logging.info("--clean flag set. Removing existing model dir:"
+                              " {}".format(flags_obj.model_dir))
     tf.io.gfile.rmtree(flags_obj.model_dir)
diff --git a/official/utils/testing/reference_data.py b/official/utils/testing/reference_data.py
@@ -191,10 +191,11 @@ def _construct_and_save_reference_files(
 
       if correctness_function is not None:
         results = correctness_function(*eval_results)
-        with tf.io.gfile.GFile(os.path.join(data_dir, "results.json"), "w") as f:
+        result_json = os.path.join(data_dir, "results.json")
+        with tf.io.gfile.GFile(result_json, "w") as f:
           json.dump(results, f)
-
-      with tf.io.gfile.GFile(os.path.join(data_dir, "tf_version.json"), "w") as f:
+      tf_version_json = os.path.join(data_dir, "tf_version.json")
+      with tf.io.gfile.GFile(tf_version_json, "w") as f:
         json.dump([tf.version.VERSION, tf.version.GIT_VERSION], f)
 
   def _evaluate_test_case(self, name, graph, ops_to_eval, correctness_function):
@@ -262,7 +263,8 @@ def _evaluate_test_case(self, name, graph, ops_to_eval, correctness_function):
       eval_results = [op.eval() for op in ops_to_eval]
       if correctness_function is not None:
         results = correctness_function(*eval_results)
-        with tf.io.gfile.GFile(os.path.join(data_dir, "results.json"), "r") as f:
+        result_json = os.path.join(data_dir, "results.json")
+        with tf.io.gfile.GFile(result_json, "r") as f:
           expected_results = json.load(f)
         self.assertAllClose(results, expected_results)