Merge branch 'master' into patch-1

cshuangyi82 · Jan 24, 2018 · 6b9d5fb · 6b9d5fb
2 parents 5fd687c + 5fa2a4e
commit 6b9d5fb
Show file tree

Hide file tree

Showing 147 changed files with 3,261 additions and 347 deletions.
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -1,4 +1,4 @@
-/official/ @nealwu @k-w-w
+/official/ @nealwu @k-w-w @karmel
 /research/adversarial_crypto/ @dave-andersen
 /research/adversarial_text/ @rsepassi
 /research/adv_imagenet_models/ @AlexeyKurakin
@@ -39,6 +39,8 @@
 /research/video_prediction/ @cbfinn
 /research/fivo/ @dieterichlawson
 /samples/ @MarkDaoust
+/samples/languages/java/ @asimshankar
 /tutorials/embedding/ @zffchen78 @a-dai
 /tutorials/image/ @sherrym @shlens
+/tutorials/image/cifar10_estimator/ @tfboyd @protoget
 /tutorials/rnn/ @lukaszkaiser @ebrevdo
diff --git a/official/mnist/dataset.py b/official/mnist/dataset.py
@@ -33,7 +33,7 @@ def read32(bytestream):
 
 def check_image_file_header(filename):
   """Validate that filename corresponds to images for the MNIST dataset."""
-  with open(filename) as f:
+  with tf.gfile.Open(filename, 'rb') as f:
     magic = read32(f)
     num_images = read32(f)
     rows = read32(f)
@@ -49,7 +49,7 @@ def check_image_file_header(filename):
 
 def check_labels_file_header(filename):
   """Validate that filename corresponds to labels for the MNIST dataset."""
-  with open(filename) as f:
+  with tf.gfile.Open(filename, 'rb') as f:
     magic = read32(f)
     num_items = read32(f)
     if magic != 2049:
@@ -58,12 +58,12 @@ def check_labels_file_header(filename):
 
 
 def download(directory, filename):
-  """Download (and unzip) a file from the MNIST dataset, if it doesn't already exist."""
-  if not tf.gfile.Exists(directory):
-    tf.gfile.MakeDirs(directory)
+  """Download (and unzip) a file from the MNIST dataset if not already done."""
   filepath = os.path.join(directory, filename)
   if tf.gfile.Exists(filepath):
     return filepath
+  if not tf.gfile.Exists(directory):
+    tf.gfile.MakeDirs(directory)
   # CVDF mirror of http://yann.lecun.com/exdb/mnist/
   url = 'https://storage.googleapis.com/cvdf-datasets/mnist/' + filename + '.gz'
   zipped_filepath = filepath + '.gz'

diff --git a/official/mnist/mnist_tpu.py b/official/mnist/mnist_tpu.py
@@ -0,0 +1,141 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""MNIST model training using TPUs.
+
+This program demonstrates training of the convolutional neural network model
+defined in mnist.py on Google Cloud TPUs (https://cloud.google.com/tpu/).
+
+If you are not interested in TPUs, you should ignore this file.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+import dataset
+import mnist
+
+tf.flags.DEFINE_string("data_dir", "",
+                       "Path to directory containing the MNIST dataset")
+tf.flags.DEFINE_string("model_dir", None, "Estimator model_dir")
+tf.flags.DEFINE_integer("batch_size", 1024,
+                        "Mini-batch size for the training. Note that this "
+                        "is the global batch size and not the per-shard batch.")
+tf.flags.DEFINE_integer("train_steps", 1000, "Total number of training steps.")
+tf.flags.DEFINE_integer("eval_steps", 0,
+                        "Total number of evaluation steps. If `0`, evaluation "
+                        "after training is skipped.")
+tf.flags.DEFINE_float("learning_rate", 0.05, "Learning rate.")
+
+tf.flags.DEFINE_bool("use_tpu", True, "Use TPUs rather than plain CPUs")
+tf.flags.DEFINE_string("master", "local", "GRPC URL of the Cloud TPU instance.")
+tf.flags.DEFINE_integer("iterations", 50,
+                        "Number of iterations per TPU training loop.")
+tf.flags.DEFINE_integer("num_shards", 8, "Number of shards (TPU chips).")
+
+FLAGS = tf.flags.FLAGS
+
+
+def metric_fn(labels, logits):
+  accuracy = tf.metrics.accuracy(
+      labels=tf.argmax(labels, axis=1), predictions=tf.argmax(logits, axis=1))
+  return {"accuracy": accuracy}
+
+
+def model_fn(features, labels, mode, params):
+  del params
+  if mode == tf.estimator.ModeKeys.PREDICT:
+    raise RuntimeError("mode {} is not supported yet".format(mode))
+  image = features
+  if isinstance(image, dict):
+    image = features["image"]
+
+  model = mnist.Model("channels_last")
+  logits = model(image, training=(mode == tf.estimator.ModeKeys.TRAIN))
+  loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
+
+  if mode == tf.estimator.ModeKeys.TRAIN:
+    learning_rate = tf.train.exponential_decay(
+        FLAGS.learning_rate,
+        tf.train.get_global_step(),
+        decay_steps=100000,
+        decay_rate=0.96)
+    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
+    if FLAGS.use_tpu:
+      optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
+    return tf.contrib.tpu.TPUEstimatorSpec(
+        mode=mode,
+        loss=loss,
+        train_op=optimizer.minimize(loss, tf.train.get_global_step()))
+
+  if mode == tf.estimator.ModeKeys.EVAL:
+    return tf.contrib.tpu.TPUEstimatorSpec(
+        mode=mode, loss=loss, eval_metrics=(metric_fn, [labels, logits]))
+
+
+def train_input_fn(params):
+  batch_size = params["batch_size"]
+  data_dir = params["data_dir"]
+  # Retrieves the batch size for the current shard. The # of shards is
+  # computed according to the input pipeline deployment. See
+  # `tf.contrib.tpu.RunConfig` for details.
+  ds = dataset.train(data_dir).cache().repeat().shuffle(
+      buffer_size=50000).apply(
+          tf.contrib.data.batch_and_drop_remainder(batch_size))
+  images, labels = ds.make_one_shot_iterator().get_next()
+  return images, labels
+
+
+def eval_input_fn(params):
+  batch_size = params["batch_size"]
+  data_dir = params["data_dir"]
+  ds = dataset.test(data_dir).apply(
+      tf.contrib.data.batch_and_drop_remainder(batch_size))
+  images, labels = ds.make_one_shot_iterator().get_next()
+  return images, labels
+
+
+def main(argv):
+  del argv  # Unused.
+  tf.logging.set_verbosity(tf.logging.INFO)
+
+  run_config = tf.contrib.tpu.RunConfig(
+      master=FLAGS.master,
+      evaluation_master=FLAGS.master,
+      model_dir=FLAGS.model_dir,
+      session_config=tf.ConfigProto(
+          allow_soft_placement=True, log_device_placement=True),
+      tpu_config=tf.contrib.tpu.TPUConfig(FLAGS.iterations, FLAGS.num_shards),
+  )
+
+  estimator = tf.contrib.tpu.TPUEstimator(
+      model_fn=model_fn,
+      use_tpu=FLAGS.use_tpu,
+      train_batch_size=FLAGS.batch_size,
+      eval_batch_size=FLAGS.batch_size,
+      params={"data_dir": FLAGS.data_dir},
+      config=run_config)
+  # TPUEstimator.train *requires* a max_steps argument.
+  estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)
+  # TPUEstimator.evaluate *requires* a steps argument.
+  # Note that the number of examples used during evaluation is
+  # --eval_steps * --batch_size.
+  # So if you change --batch_size then change --eval_steps too.
+  if FLAGS.eval_steps:
+    estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.eval_steps)
+
+
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/research/astronet/README.md b/research/astronet/README.md
@@ -0,0 +1,17 @@
+# Coming Soon!
+
+This directory will soon be populated with TensorFlow models and data
+processing code for identifying exoplanets in astrophysical light curves.
+
+For full details, see the following paper:
+
+*Identifying Exoplanets With Deep Learning: A Five Planet Resonant Chain Around
+Kepler-80 And An Eighth Planet Around Kepler-90*
+
+Christopher J Shallue and Andrew Vanderburg
+
+To appear in the Astronomical Journal
+
+Preprint available at https://www.cfa.harvard.edu/~avanderb/kepler90i.pdf
+
+Contact: Chris Shallue (@cshallue)
diff --git a/research/brain_coder/README.md b/research/brain_coder/README.md
@@ -1,8 +1,8 @@
 # Brain Coder
 
-*Authors: Daniel Abolafia, Quoc Le, Mohammad Norouzi*
+*Authors: Daniel Abolafia, Mohammad Norouzi, Quoc Le*
 
-Brain coder is a code synthesis experimental environment. We provide code that reproduces the results from our recent paper [Code Synthesis with Priority Queue Training](https://openreview.net/forum?id=r1AoGNlC-). See single_task/README.md for details on how to build and reproduce those experiments.
+Brain coder is a code synthesis experimental environment. We provide code that reproduces the results from our recent paper [Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526). See single_task/README.md for details on how to build and reproduce those experiments.
 
 ## Installation
 

diff --git a/research/brain_coder/common/config_lib.py b/research/brain_coder/common/config_lib.py
@@ -10,6 +10,7 @@
 
 import ast
 import itertools
+from six.moves import xrange
 
 
 class Config(dict):

diff --git a/research/brain_coder/common/schedules_test.py b/research/brain_coder/common/schedules_test.py
@@ -7,6 +7,7 @@
 from math import exp
 from math import sqrt
 import numpy as np
+from six.moves import xrange
 import tensorflow as tf
 
 from common import config_lib  # brain coder

diff --git a/research/brain_coder/common/utils.py b/research/brain_coder/common/utils.py
@@ -12,6 +12,8 @@
 
 from absl import logging
 import numpy as np
+import six
+from six.moves import xrange
 import tensorflow as tf
 
 
@@ -137,7 +139,7 @@ def stack_pad(tensors, pad_axes=None, pad_to_lengths=None, dtype=np.float32,
   same_axes = dict(enumerate(max_lengths))
   if pad_axes is None:
     pad_axes = []
-  if isinstance(pad_axes, (int, long)):
+  if isinstance(pad_axes, six.integer_types):
     if pad_to_lengths is not None:
       max_lengths[pad_axes] = pad_to_lengths
     del same_axes[pad_axes]

diff --git a/research/brain_coder/single_task/README.md b/research/brain_coder/single_task/README.md
@@ -1,6 +1,6 @@
 # Experiments for ICLR 2018 paper.
 
-[Code Synthesis with Priority Queue Training](https://openreview.net/forum?id=r1AoGNlC-).
+[Neural Program Synthesis with Priority Queue Training](https://arxiv.org/abs/1801.03526).
 
 Runs policy gradient (REINFORCE), priority queue training, genetic algorithm,
 and uniform random search.

diff --git a/research/brain_coder/single_task/code_tasks.py b/research/brain_coder/single_task/code_tasks.py
@@ -11,6 +11,7 @@
 
 from absl import logging
 import numpy as np
+from six.moves import xrange
 
 from common import bf  # brain coder
 from common import reward as r  # brain coder

diff --git a/research/brain_coder/single_task/ga_lib.py b/research/brain_coder/single_task/ga_lib.py
@@ -14,6 +14,7 @@
 from absl import flags
 from absl import logging
 import numpy as np
+from six.moves import xrange
 
 from common import bf  # brain coder
 from common import utils  # brain coder
@@ -469,4 +470,3 @@ def __init__(self, *args):
 
 def random_individual(genome_size):
   return lambda: Individual(np.random.choice(GENES, genome_size).tolist())
-
diff --git a/research/brain_coder/single_task/ga_train.py b/research/brain_coder/single_task/ga_train.py
@@ -18,6 +18,7 @@
 from absl import flags
 from absl import logging
 import numpy as np
+from six.moves import xrange
 import tensorflow as tf
 
 from common import utils  # brain coder
@@ -321,4 +322,3 @@ def run_random_search(max_num_programs, checkpoint_dir, task_eval_fn,
       solution_found=found_solution, generations=num_programs_seen,
       num_programs=num_programs_seen, max_generations=max_num_programs,
       max_num_programs=max_num_programs)
-
diff --git a/research/brain_coder/single_task/pg_agent.py b/research/brain_coder/single_task/pg_agent.py
@@ -15,6 +15,7 @@
 
 from absl import logging
 import numpy as np
+from six.moves import xrange
 import tensorflow as tf
 
 from common import rollout as rollout_lib  # brain coder
@@ -1294,4 +1295,3 @@ def process_episodes(
     batch_targets = np.array([], dtype=np.float32)
 
   return (batch_targets, batch_returns)
-
diff --git a/research/brain_coder/single_task/pg_agent_test.py b/research/brain_coder/single_task/pg_agent_test.py
@@ -8,6 +8,7 @@
 
 from absl import logging
 import numpy as np
+from six.moves import xrange
 import tensorflow as tf
 
 from common import utils  # brain coder

diff --git a/research/brain_coder/single_task/results_lib.py b/research/brain_coder/single_task/results_lib.py
@@ -8,6 +8,7 @@
 from collections import namedtuple
 import os
 import re
+from six.moves import xrange
 import tensorflow as tf
 
 
@@ -152,4 +153,3 @@ def read_all(self, num_shards=None):
           r for shard_results in results_per_shard for r in shard_results]
 
     return aggregate, shard_stats
-
diff --git a/research/brain_coder/single_task/results_lib_test.py b/research/brain_coder/single_task/results_lib_test.py
@@ -8,6 +8,7 @@
 import os
 import shutil
 import tempfile
+from six.moves import xrange
 import tensorflow as tf
 
 from single_task import results_lib  # brain coder

diff --git a/research/brain_coder/single_task/test_tasks.py b/research/brain_coder/single_task/test_tasks.py
@@ -4,6 +4,7 @@
 
 """Tasks that test correctness of algorithms."""
 
+from six.moves import xrange
 from common import reward as reward_lib  # brain coder
 from single_task import misc  # brain coder
 
@@ -124,5 +125,3 @@ def __call__(self, actions):
     # closest next element.
     # Maximum distance possible is num_actions * base / 2 = 3 * 8 / 2 = 12
     return (len(prefix) + (1 - min_dist / 12.0)), False
-
-
diff --git a/research/brain_coder/single_task/tune.py b/research/brain_coder/single_task/tune.py
@@ -39,6 +39,7 @@
 from absl import flags
 from absl import logging
 import numpy as np
+from six.moves import xrange
 import tensorflow as tf
 
 from single_task import defaults  # brain coder