Merge pull request tensorflow#3853 from walkerlala/add-ade20k

add ADE20K dataset
gipark2001 · Apr 9, 2018 · 6741cfc · 6741cfc
2 parents 18e0643 + 13c9de3
commit 6741cfc
Show file tree

Hide file tree

Showing 9 changed files with 357 additions and 18 deletions.
diff --git a/research/deeplab/README.md b/research/deeplab/README.md
@@ -90,6 +90,7 @@ Running:
 *   <a href='g3doc/installation.md'>Installation.</a><br>
 *   <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br>
 *   <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br>
+*   <a href='g3doc/ade20k.md'>Running DeepLab on ADE20K semantic segmentation dataset.</a><br>
 
 Models:
 

diff --git a/research/deeplab/datasets/build_ade20k_data.py b/research/deeplab/datasets/build_ade20k_data.py
@@ -0,0 +1,113 @@
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import math
+import os
+import random
+import string
+import sys
+import build_data
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_string(
+    'train_image_folder',
+    './ADE20K/ADEChallengeData2016/images/training',
+    'Folder containing trainng images')
+tf.app.flags.DEFINE_string(
+    'train_image_label_folder',
+    './ADE20K/ADEChallengeData2016/annotations/training',
+    'Folder containing annotations for trainng images')
+
+tf.app.flags.DEFINE_string(
+    'val_image_folder',
+    './ADE20K/ADEChallengeData2016/images/validation',
+    'Folder containing validation images')
+
+tf.app.flags.DEFINE_string(
+    'val_image_label_folder',
+    './ADE20K/ADEChallengeData2016/annotations/validation',
+    'Folder containing annotations for validation')
+
+tf.app.flags.DEFINE_string(
+    'output_dir', './ADE20K/tfrecord',
+    'Path to save converted SSTable of Tensorflow example')
+
+_NUM_SHARDS = 4
+
+def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
+  """ Converts the ADE20k dataset into into tfrecord format (SSTable).
+
+  Args:
+    dataset_split: Dataset split (e.g., train, val).
+    dataset_dir: Dir in which the dataset locates.
+    dataset_label_dir: Dir in which the annotations locates.
+
+  Raises:
+    RuntimeError: If loaded image and label have different shape.
+  """
+
+  img_names = tf.gfile.Glob(os.path.join(dataset_dir, '*.jpg'))
+  random.shuffle(img_names)
+  seg_names = []
+  for f in img_names:
+    # get the filename without the extension
+    basename = os.path.basename(f).split(".")[0]
+    # cover its corresponding *_seg.png
+    seg = os.path.join(dataset_label_dir, basename+'.png')
+    seg_names.append(seg)
+
+  num_images = len(img_names)
+  num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
+
+  image_reader = build_data.ImageReader('jpeg', channels=3)
+  label_reader = build_data.ImageReader('png', channels=1)
+
+  for shard_id in range(_NUM_SHARDS):
+    output_filename = os.path.join(
+        FLAGS.output_dir,
+        '%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, _NUM_SHARDS))
+    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
+      start_idx = shard_id * num_per_shard
+      end_idx = min((shard_id + 1) * num_per_shard, num_images)
+      for i in range(start_idx, end_idx):
+        sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
+            i + 1, num_images, shard_id))
+        sys.stdout.flush()
+        # Read the image.
+        image_filename = img_names[i]
+        image_data = tf.gfile.FastGFile(image_filename, 'r').read()
+        height, width = image_reader.read_image_dims(image_data)
+        # Read the semantic segmentation annotation.
+        seg_filename = seg_names[i]
+        seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
+        seg_height, seg_width = label_reader.read_image_dims(seg_data)
+        if height != seg_height or width != seg_width:
+          raise RuntimeError('Shape mismatched between image and label.')
+        # Convert to tf example.
+        example = build_data.image_seg_to_tfexample(
+            image_data, img_names[i], height, width, seg_data)
+        tfrecord_writer.write(example.SerializeToString())
+    sys.stdout.write('\n')
+    sys.stdout.flush()
+
+def main(unused_argv):
+  tf.gfile.MakeDirs(FLAGS.output_dir)
+  _convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
+  _convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)
+
+if __name__ == '__main__':
+  tf.app.run()
diff --git a/research/deeplab/datasets/build_voc2012_data.py b/research/deeplab/datasets/build_voc2012_data.py
@@ -50,7 +50,6 @@
   image/segmentation/class/encoded: encoded semantic segmentation content.
   image/segmentation/class/format: semantic segmentation file format.
 """
-import glob
 import math
 import os.path
 import sys
@@ -133,7 +132,7 @@ def _convert_dataset(dataset_split):
 
 
 def main(unused_argv):
-  dataset_splits = glob.glob(os.path.join(FLAGS.list_folder, '*.txt'))
+  dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
   for dataset_split in dataset_splits:
     _convert_dataset(dataset_split)
 

diff --git a/research/deeplab/datasets/download_and_convert_ade20k.sh b/research/deeplab/datasets/download_and_convert_ade20k.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# Copyright 2018 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+#
+# Script to download and preprocess the PASCAL VOC 2012 dataset.
+#
+# Usage:
+#   bash ./download_and_convert_ade20k.sh
+#
+# The folder structure is assumed to be:
+#  + datasets
+#     - build_data.py
+#     - build_ade20k_data.py
+#     - download_and_convert_ade20k.sh
+#     + ADE20K 
+#       + tfrecord
+#       + ADEChallengeData2016
+#         + annotations
+#           + training
+#           + validation
+#         + images
+#           + training
+#           + validation
+
+# Exit immediately if a command exits with a non-zero status.
+set -e
+
+CURRENT_DIR=$(pwd)
+WORK_DIR="./ADE20K"
+mkdir -p "${WORK_DIR}"
+cd "${WORK_DIR}"
+
+# Helper function to download and unpack ADE20K dataset.
+download_and_uncompress() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+
+  if [ ! -f "${FILENAME}" ]; then
+    echo "Downloading ${FILENAME} to ${WORK_DIR}"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  fi
+  echo "Uncompressing ${FILENAME}"
+  unzip "${FILENAME}"
+}
+
+# Download the images.
+BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
+FILENAME="ADEChallengeData2016.zip"
+
+download_and_uncompress "${BASE_URL}" "${FILENAME}"
+
+cd "${CURRENT_DIR}"
+
+# Root path for ADE20K dataset.
+ADE20K_ROOT="${WORK_DIR}/ADEChallengeData2016"
+
+# Build TFRecords of the dataset.
+# First, create output directory for storing TFRecords.
+OUTPUT_DIR="${WORK_DIR}/tfrecord"
+mkdir -p "${OUTPUT_DIR}"
+
+echo "Converting ADE20K dataset..."
+python ./build_ade20k_data.py  \
+  --train_image_folder="${ADE20K_ROOT}/images/training/" \
+  --train_image_label_folder="${ADE20K_ROOT}/annotations/training/" \
+  --val_image_folder="${ADE20K_ROOT}/images/validation/" \
+  --val_image_label_folder="${ADE20K_ROOT}/annotations/validation/" \
+  --output_dir="${OUTPUT_DIR}"
diff --git a/research/deeplab/datasets/download_and_convert_voc2012.sh b/research/deeplab/datasets/download_and_convert_voc2012.sh
@@ -17,13 +17,13 @@
 # Script to download and preprocess the PASCAL VOC 2012 dataset.
 #
 # Usage:
-#   bash ./download_and_preprocess_voc2012.sh
+#   bash ./download_and_convert_voc2012.sh
 #
 # The folder structure is assumed to be:
 #  + datasets
 #     - build_data.py
 #     - build_voc2012_data.py
-#     - download_and_preprocess_voc2012.sh
+#     - download_and_convert_voc2012.sh
 #     - remove_gt_colormap.py
 #     + pascal_voc_seg
 #       + VOCdevkit
@@ -37,27 +37,27 @@ set -e
 
 CURRENT_DIR=$(pwd)
 WORK_DIR="./pascal_voc_seg"
-mkdir -p ${WORK_DIR}
-cd ${WORK_DIR}
+mkdir -p "${WORK_DIR}"
+cd "${WORK_DIR}"
 
 # Helper function to download and unpack VOC 2012 dataset.
 download_and_uncompress() {
   local BASE_URL=${1}
   local FILENAME=${2}
 
-  if [ ! -f ${FILENAME} ]; then
+  if [ ! -f "${FILENAME}" ]; then
     echo "Downloading ${FILENAME} to ${WORK_DIR}"
     wget -nd -c "${BASE_URL}/${FILENAME}"
   fi
   echo "Uncompressing ${FILENAME}"
-  tar -xf ${FILENAME}
+  tar -xf "${FILENAME}"
 }
 
 # Download the images.
 BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
 FILENAME="VOCtrainval_11-May-2012.tar"
 
-download_and_uncompress ${BASE_URL} ${FILENAME}
+download_and_uncompress "${BASE_URL}" "${FILENAME}"
 
 cd "${CURRENT_DIR}"
 

diff --git a/research/deeplab/datasets/segmentation_dataset.py b/research/deeplab/datasets/segmentation_dataset.py
@@ -31,6 +31,11 @@
 The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
 and so on) for urban street scenes.
 
+3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K)
+
+The ADE20K dataset contains 150 semantic labels both urban street scenes and
+indoor scenes.
+
 References:
   M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn,
   and A. Zisserman, The pascal visual object classes challenge a retrospective.
@@ -39,6 +44,9 @@
   M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson,
   U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban
   scene understanding," In Proc. of CVPR, 2016.
+
+  B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing
+  through ADE20K dataset", In Proc. of CVPR, 2017.
 """
 import collections
 import os.path
@@ -85,10 +93,22 @@
     ignore_label=255,
 )
 
+# These number (i.e., 'train'/'test') seems to have to be hard coded
+# You are required to figure it out for your training/testing example.
+_ADE20K_INFORMATION = DatasetDescriptor(
+    splits_to_sizes = {
+        'train': 20210, # num of samples in images/training
+        'val': 2000, # num of samples in images/validation
+    },
+    num_classes=150,
+    ignore_label=255,
+)
+
 
 _DATASETS_INFORMATION = {
     'cityscapes': _CITYSCAPES_INFORMATION,
     'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
+    'ade20k': _ADE20K_INFORMATION,
 }
 
 # Default file pattern of TFRecord of TensorFlow Example.