forked from renmengye/rec-attend-public
-
Notifications
You must be signed in to change notification settings - Fork 0
/
image_ops.py
180 lines (157 loc) · 6.92 KB
/
image_ops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from tensorflow.python.framework import ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import gen_image_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
import tensorflow as tf
def random_transformation(x,
padding,
phase_train,
rnd_vflip=True,
rnd_hflip=True,
rnd_transpose=True,
rnd_colour=False,
y=None,
d=None,
c=None):
"""
Perform random crop, flip, transpose, hue, saturation, brightness, contrast.
Args:
x: [B, H, W, 3] Input image.
y: [B, T, H, W] Instance segmentation.
d: [B, H, W, 8] Instance orientation.
c: [B, H, W, 1] Semantic segmentation.
padding: int
phase_train: bool
"""
# Random image transformation layers.
phase_train_f = tf.to_float(phase_train)
x_shape = tf.shape(x)
num_ex = x_shape[0]
inp_height = x_shape[1]
inp_width = x_shape[2]
# Add padding
x_pad = tf.pad(x, [[0, 0], [padding, padding], [padding, padding], [0, 0]])
if y is not None:
y_pad = tf.pad(y, [[0, 0], [0, 0], [padding, padding], [padding, padding]])
if d is not None:
assert not rnd_vflip, "Orientation mode is on, no random flips"
assert not rnd_hflip, "Orientation mode is on, no random flips"
assert not rnd_transpose, "Orientation mode is on, no random transpose"
if d is not None:
d_pad = tf.pad(d, [[0, 0], [padding, padding], [padding, padding], [0, 0]])
if c is not None:
c_pad = tf.pad(c, [[0, 0], [padding, padding], [padding, padding], [0, 0]])
# Random crop
offset = tf.random_uniform([2], dtype='int32', maxval=padding * 2)
x_rand = tf.slice(x_pad,
tf.pack([0, offset[0], offset[1], 0]),
tf.pack([-1, inp_height, inp_width, -1]))
if y is not None:
y_rand = tf.slice(y_pad,
tf.pack([0, 0, offset[0], offset[1]]),
tf.pack([-1, -1, inp_height, inp_width]))
if d is not None:
d_rand = tf.slice(d_pad,
tf.pack([0, offset[0], offset[1], 0]),
tf.pack([-1, inp_height, inp_width, -1]))
if c is not None:
c_rand = tf.slice(c_pad,
tf.pack([0, offset[0], offset[1], 0]),
tf.pack([-1, inp_height, inp_width, -1]))
# Center slices (for inference)
x_ctr = tf.slice(x_pad, [0, padding, padding, 0],
tf.pack([-1, inp_height, inp_width, -1]))
if y is not None:
y_ctr = tf.slice(y_pad, [0, 0, padding, padding],
tf.pack([-1, -1, inp_height, inp_width]))
if d is not None:
d_ctr = tf.slice(d_pad, [0, padding, padding, 0],
tf.pack([-1, inp_height, inp_width, -1]))
if c is not None:
c_ctr = tf.slice(c_pad, [0, padding, padding, 0],
tf.pack([-1, inp_height, inp_width, -1]))
if d is None:
# Random horizontal & vertical flip & transpose
rand_h = tf.random_uniform([1], 1.0 - float(rnd_hflip), 1.0)
rand_v = tf.random_uniform([1], 1.0 - float(rnd_vflip), 1.0)
mirror_x = tf.pack([1.0, rand_v[0], rand_h[0], 1.0]) < 0.5
mirror_y = tf.pack([1.0, 1.0, rand_v[0], rand_h[0]]) < 0.5
x_rand = tf.reverse(x_rand, mirror_x)
if y is not None:
y_rand = tf.reverse(y_rand, mirror_y)
rand_t = tf.random_uniform([1], 1.0 - float(rnd_transpose), 1.0)
do_tr = tf.cast(rand_t[0] < 0.5, 'int32')
x_rand = tf.transpose(x_rand, tf.pack([0, 1 + do_tr, 2 - do_tr, 3]))
if y is not None:
y_rand = tf.transpose(y_rand, tf.pack([0, 1, 2 + do_tr, 3 - do_tr]))
# Random hue, saturation, brightness, contrast
if rnd_colour:
x_rand = random_hue(x_rand, 0.1)
x_rand = random_saturation(x_rand, 0.9, 1.1)
x_rand = tf.image.random_brightness(x_rand, 0.1)
x_rand = tf.image.random_contrast(x_rand, 0.9, 1.1)
results = {}
results['x'] = (1.0 - phase_train_f) * x_ctr + phase_train_f * x_rand
if y is not None:
results['y'] = (1.0 - phase_train_f) * y_ctr + phase_train_f * y_rand
if d is not None:
results['d'] = (1.0 - phase_train_f) * d_ctr + phase_train_f * d_rand
if c is not None:
results['c'] = (1.0 - phase_train_f) * c_ctr + phase_train_f * c_rand
return results
def random_flip_left_right(image, seed=None):
uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
mirror = math_ops.less(tf.pack([1.0, 1.0, uniform_random, 1.0]), 0.5)
return tf.reverse(image, mirror)
def random_flip_up_down(image, seed=None):
uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
mirror = math_ops.less(tf.pack([1.0, uniform_random, 1.0, 1.0]), 0.5)
return tf.reverse(image, mirror)
def random_hue(image, max_delta, seed=None):
"""Adjust the hue of an RGB image by a random factor.
Equivalent to `adjust_hue()` but uses a `delta` randomly
picked in the interval `[-max_delta, max_delta]`.
`max_delta` must be in the interval `[0, 0.5]`.
Args:
image: RGB image or images. Size of the last dimension must be 3.
max_delta: float. Maximum value for the random delta.
seed: An operation-specific seed. It will be used in conjunction
with the graph-level seed to determine the real seeds that will be
used in this operation. Please see the documentation of
set_random_seed for its interaction with the graph-level random seed.
Returns:
3-D float tensor of shape `[height, width, channels]`.
Raises:
ValueError: if `max_delta` is invalid.
"""
if max_delta > 0.5:
raise ValueError('max_delta must be <= 0.5.')
if max_delta < 0:
raise ValueError('max_delta must be non-negative.')
delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed)
return tf.image.adjust_hue(image, delta)
def random_saturation(image, lower, upper, seed=None):
"""Adjust the saturation of an RGB image by a random factor.
Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly
picked in the interval `[lower, upper]`.
Args:
image: RGB image or images. Size of the last dimension must be 3.
lower: float. Lower bound for the random saturation factor.
upper: float. Upper bound for the random saturation factor.
seed: An operation-specific seed. It will be used in conjunction
with the graph-level seed to determine the real seeds that will be
used in this operation. Please see the documentation of
set_random_seed for its interaction with the graph-level random seed.
Returns:
Adjusted image(s), same shape and DType as `image`.
Raises:
ValueError: if `upper <= lower` or if `lower < 0`.
"""
if upper <= lower:
raise ValueError('upper must be > lower.')
if lower < 0:
raise ValueError('lower must be non-negative.')
# Pick a float in [lower, upper]
saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed)
return tf.image.adjust_saturation(image, saturation_factor)