Skip to content

Commit

Permalink
Merge pull request tensorflow#3853 from walkerlala/add-ade20k
Browse files Browse the repository at this point in the history
add ADE20K dataset
  • Loading branch information
aquariusjay authored Apr 9, 2018
2 parents 18e0643 + 13c9de3 commit 6741cfc
Show file tree
Hide file tree
Showing 9 changed files with 357 additions and 18 deletions.
1 change: 1 addition & 0 deletions research/deeplab/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ Running:
* <a href='g3doc/installation.md'>Installation.</a><br>
* <a href='g3doc/pascal.md'>Running DeepLab on PASCAL VOC 2012 semantic segmentation dataset.</a><br>
* <a href='g3doc/cityscapes.md'>Running DeepLab on Cityscapes semantic segmentation dataset.</a><br>
* <a href='g3doc/ade20k.md'>Running DeepLab on ADE20K semantic segmentation dataset.</a><br>

Models:

Expand Down
113 changes: 113 additions & 0 deletions research/deeplab/datasets/build_ade20k_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import math
import os
import random
import string
import sys
import build_data
import tensorflow as tf

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string(
'train_image_folder',
'./ADE20K/ADEChallengeData2016/images/training',
'Folder containing trainng images')
tf.app.flags.DEFINE_string(
'train_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/training',
'Folder containing annotations for trainng images')

tf.app.flags.DEFINE_string(
'val_image_folder',
'./ADE20K/ADEChallengeData2016/images/validation',
'Folder containing validation images')

tf.app.flags.DEFINE_string(
'val_image_label_folder',
'./ADE20K/ADEChallengeData2016/annotations/validation',
'Folder containing annotations for validation')

tf.app.flags.DEFINE_string(
'output_dir', './ADE20K/tfrecord',
'Path to save converted SSTable of Tensorflow example')

_NUM_SHARDS = 4

def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
""" Converts the ADE20k dataset into into tfrecord format (SSTable).
Args:
dataset_split: Dataset split (e.g., train, val).
dataset_dir: Dir in which the dataset locates.
dataset_label_dir: Dir in which the annotations locates.
Raises:
RuntimeError: If loaded image and label have different shape.
"""

img_names = tf.gfile.Glob(os.path.join(dataset_dir, '*.jpg'))
random.shuffle(img_names)
seg_names = []
for f in img_names:
# get the filename without the extension
basename = os.path.basename(f).split(".")[0]
# cover its corresponding *_seg.png
seg = os.path.join(dataset_label_dir, basename+'.png')
seg_names.append(seg)

num_images = len(img_names)
num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

image_reader = build_data.ImageReader('jpeg', channels=3)
label_reader = build_data.ImageReader('png', channels=1)

for shard_id in range(_NUM_SHARDS):
output_filename = os.path.join(
FLAGS.output_dir,
'%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, _NUM_SHARDS))
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_idx = shard_id * num_per_shard
end_idx = min((shard_id + 1) * num_per_shard, num_images)
for i in range(start_idx, end_idx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i + 1, num_images, shard_id))
sys.stdout.flush()
# Read the image.
image_filename = img_names[i]
image_data = tf.gfile.FastGFile(image_filename, 'r').read()
height, width = image_reader.read_image_dims(image_data)
# Read the semantic segmentation annotation.
seg_filename = seg_names[i]
seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
seg_height, seg_width = label_reader.read_image_dims(seg_data)
if height != seg_height or width != seg_width:
raise RuntimeError('Shape mismatched between image and label.')
# Convert to tf example.
example = build_data.image_seg_to_tfexample(
image_data, img_names[i], height, width, seg_data)
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()

def main(unused_argv):
tf.gfile.MakeDirs(FLAGS.output_dir)
_convert_dataset('train', FLAGS.train_image_folder, FLAGS.train_image_label_folder)
_convert_dataset('val', FLAGS.val_image_folder, FLAGS.val_image_label_folder)

if __name__ == '__main__':
tf.app.run()
3 changes: 1 addition & 2 deletions research/deeplab/datasets/build_voc2012_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
image/segmentation/class/encoded: encoded semantic segmentation content.
image/segmentation/class/format: semantic segmentation file format.
"""
import glob
import math
import os.path
import sys
Expand Down Expand Up @@ -133,7 +132,7 @@ def _convert_dataset(dataset_split):


def main(unused_argv):
dataset_splits = glob.glob(os.path.join(FLAGS.list_folder, '*.txt'))
dataset_splits = tf.gfile.Glob(os.path.join(FLAGS.list_folder, '*.txt'))
for dataset_split in dataset_splits:
_convert_dataset(dataset_split)

Expand Down
80 changes: 80 additions & 0 deletions research/deeplab/datasets/download_and_convert_ade20k.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/bin/bash
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
#
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_convert_ade20k.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_ade20k_data.py
# - download_and_convert_ade20k.sh
# + ADE20K
# + tfrecord
# + ADEChallengeData2016
# + annotations
# + training
# + validation
# + images
# + training
# + validation

# Exit immediately if a command exits with a non-zero status.
set -e

CURRENT_DIR=$(pwd)
WORK_DIR="./ADE20K"
mkdir -p "${WORK_DIR}"
cd "${WORK_DIR}"

# Helper function to download and unpack ADE20K dataset.
download_and_uncompress() {
local BASE_URL=${1}
local FILENAME=${2}

if [ ! -f "${FILENAME}" ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}"
fi
echo "Uncompressing ${FILENAME}"
unzip "${FILENAME}"
}

# Download the images.
BASE_URL="http://data.csail.mit.edu/places/ADEchallenge"
FILENAME="ADEChallengeData2016.zip"

download_and_uncompress "${BASE_URL}" "${FILENAME}"

cd "${CURRENT_DIR}"

# Root path for ADE20K dataset.
ADE20K_ROOT="${WORK_DIR}/ADEChallengeData2016"

# Build TFRecords of the dataset.
# First, create output directory for storing TFRecords.
OUTPUT_DIR="${WORK_DIR}/tfrecord"
mkdir -p "${OUTPUT_DIR}"

echo "Converting ADE20K dataset..."
python ./build_ade20k_data.py \
--train_image_folder="${ADE20K_ROOT}/images/training/" \
--train_image_label_folder="${ADE20K_ROOT}/annotations/training/" \
--val_image_folder="${ADE20K_ROOT}/images/validation/" \
--val_image_label_folder="${ADE20K_ROOT}/annotations/validation/" \
--output_dir="${OUTPUT_DIR}"
14 changes: 7 additions & 7 deletions research/deeplab/datasets/download_and_convert_voc2012.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
# Script to download and preprocess the PASCAL VOC 2012 dataset.
#
# Usage:
# bash ./download_and_preprocess_voc2012.sh
# bash ./download_and_convert_voc2012.sh
#
# The folder structure is assumed to be:
# + datasets
# - build_data.py
# - build_voc2012_data.py
# - download_and_preprocess_voc2012.sh
# - download_and_convert_voc2012.sh
# - remove_gt_colormap.py
# + pascal_voc_seg
# + VOCdevkit
Expand All @@ -37,27 +37,27 @@ set -e

CURRENT_DIR=$(pwd)
WORK_DIR="./pascal_voc_seg"
mkdir -p ${WORK_DIR}
cd ${WORK_DIR}
mkdir -p "${WORK_DIR}"
cd "${WORK_DIR}"

# Helper function to download and unpack VOC 2012 dataset.
download_and_uncompress() {
local BASE_URL=${1}
local FILENAME=${2}

if [ ! -f ${FILENAME} ]; then
if [ ! -f "${FILENAME}" ]; then
echo "Downloading ${FILENAME} to ${WORK_DIR}"
wget -nd -c "${BASE_URL}/${FILENAME}"
fi
echo "Uncompressing ${FILENAME}"
tar -xf ${FILENAME}
tar -xf "${FILENAME}"
}

# Download the images.
BASE_URL="http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
FILENAME="VOCtrainval_11-May-2012.tar"

download_and_uncompress ${BASE_URL} ${FILENAME}
download_and_uncompress "${BASE_URL}" "${FILENAME}"

cd "${CURRENT_DIR}"

Expand Down
20 changes: 20 additions & 0 deletions research/deeplab/datasets/segmentation_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
The Cityscapes dataset contains 19 semantic labels (such as road, person, car,
and so on) for urban street scenes.
3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K)
The ADE20K dataset contains 150 semantic labels both urban street scenes and
indoor scenes.
References:
M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn,
and A. Zisserman, The pascal visual object classes challenge a retrospective.
Expand All @@ -39,6 +44,9 @@
M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson,
U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban
scene understanding," In Proc. of CVPR, 2016.
B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing
through ADE20K dataset", In Proc. of CVPR, 2017.
"""
import collections
import os.path
Expand Down Expand Up @@ -85,10 +93,22 @@
ignore_label=255,
)

# These number (i.e., 'train'/'test') seems to have to be hard coded
# You are required to figure it out for your training/testing example.
_ADE20K_INFORMATION = DatasetDescriptor(
splits_to_sizes = {
'train': 20210, # num of samples in images/training
'val': 2000, # num of samples in images/validation
},
num_classes=150,
ignore_label=255,
)


_DATASETS_INFORMATION = {
'cityscapes': _CITYSCAPES_INFORMATION,
'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION,
'ade20k': _ADE20K_INFORMATION,
}

# Default file pattern of TFRecord of TensorFlow Example.
Expand Down
Loading

0 comments on commit 6741cfc

Please sign in to comment.