Skip to content

Commit

Permalink
refactor code
Browse files Browse the repository at this point in the history
  • Loading branch information
isikdogan committed Sep 9, 2019
1 parent b40fdd4 commit 6a5b07e
Show file tree
Hide file tree
Showing 12 changed files with 129 additions and 555 deletions.
82 changes: 0 additions & 82 deletions custom_metrics.py

This file was deleted.

131 changes: 0 additions & 131 deletions custom_optimizers.py

This file was deleted.

6 changes: 5 additions & 1 deletion create_tfrecords.py → data_tools/create_tfrecords.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
''' Creates tfrecords given GeoTIFF files.
We provide a copy of the dataset in tfrecords format.
You should not need this script unless you modify the dataset.
'''

import os, glob
import argparse
import random
Expand Down Expand Up @@ -42,7 +47,6 @@ def preprocess_and_encode_sample(data_tensor):
return [B2, B3, B4, B5, B6, B7, L]

def create_tfrecords(save_dir, dataset_name, filenames, images_per_shard):

data_placeholder = tf.placeholder(tf.uint16)
processed_bands = preprocess_and_encode_sample(data_placeholder)

Expand Down
28 changes: 17 additions & 11 deletions download_dataset.py → data_tools/download_dataset.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,55 @@
''' Script to generate and download the dataset using the Google Earth Engine.
You should never need to use this script since we provide a copy of the dataset.
It takes over a month to finish processing the entire dataset using this script.
The script is inclueded in the repository for archival purposes.
'''

import ee
import time

ee.Initialize()

# Select tiles
valid_tiles = ee.FeatureCollection("users/isikdogan/valid_tiles_filtered")
#valid_tiles = tiles.filter(ee.Filter.gt('occurrence', 1.0))
valid_tiles = tiles.filter(ee.Filter.gt('occurrence', 1.0))
tile_list = valid_tiles.toList(valid_tiles.size())

# Create the dataset by matching inputs and outputs
date_start = '2015-01-01' #'2015-01-01' #'2015-06-01'
date_end = '2015-12-31' #'2015-02-01' #'2015-07-01'
# TODO: try 1-month composites for cloudier samples
date_start = '2015-01-01'
date_end = '2015-12-31'
input_bands = ee.ImageCollection('LANDSAT/LC08/C01/T1') \
.filterDate(date_start, date_end).median() \
.select(['B2', 'B3', 'B4', 'B5', 'B6', 'B7']) \
.uint16()
labels = ee.ImageCollection('JRC/GSW1_0/YearlyHistory') \
.filter(ee.Filter.date('2015-01-01', '2015-12-31')) \
.filter(ee.Filter.date(date_start, date_end)) \
.select('waterClass').first().uint16()
dataset = input_bands.addBands(labels)

def download_tile(i, tile_list):
def download_tile(i, tile_list, save_folder):
current_tile = tile_list.get(i)
tile_geometry = ee.Feature(current_tile).geometry().getInfo()["coordinates"]
task = ee.batch.Export.image.toDrive(
image=dataset,
description=savepath,
folder='tiles_data_cloudy_1',
folder=save_folder,
fileNamePrefix=savepath,
region=tile_geometry,
scale=30)
task.start()

# Iterate and download
num_tiles = valid_tiles.size().getInfo()
subsample_ratio = 12
for i in range(108918, num_tiles, subsample_ratio):
subsample_ratio = 1
for i in range(0, num_tiles, subsample_ratio):
savepath = "tile_{}".format(i)
save_folder = 'tiles_data_{}'.format((i//10000) * 10000)
try:
download_tile(i, tile_list)
download_tile(i, tile_list, save_folder)
except Exception, e:
print(e)
print("Capacity reached, waiting...")
time.sleep(1200)
download_tile(i, tile_list)
download_tile(i, tile_list, save_folder)
print("Exporting {} ({} / {})".format(savepath, i, num_tiles))
time.sleep(10)
Loading

0 comments on commit 6a5b07e

Please sign in to comment.