-
Notifications
You must be signed in to change notification settings - Fork 49
/
Copy pathdata.py
54 lines (41 loc) · 1.79 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import tensorflow as tf
from glob import glob
from utils import Utils_functions
class Data_functions:
def __init__(self, args):
self.args = args
self.U = Utils_functions(args)
options = tf.data.Options()
options.experimental_deterministic = False
@tf.function
def read_npy(self, p):
x = tf.reshape(
tf.io.decode_raw(tf.io.read_file(p), tf.float32)[-(self.args.max_lat_len * self.args.latdepth * 2) :],
[self.args.max_lat_len, self.args.latdepth * 2],
)
randnum = tf.random.uniform((), 0, self.args.max_lat_len - self.args.latlen, dtype=tf.int64)
x = x[randnum : randnum + self.args.latlen, :]
return x
def create_dataset(self):
print("Calculating total number of samples in data folder...")
datalen = len(glob(self.args.train_path + "/*.npy"))
print(f"Found {datalen} total samples")
options = tf.data.Options()
options.experimental_deterministic = False
if datalen > self.args.totsamples:
ds = tf.data.Dataset.list_files(self.args.train_path + "/*.npy").shuffle(datalen).take(self.args.totsamples)
else:
ds = (
tf.data.Dataset.list_files(self.args.train_path + "/*.npy")
.repeat((self.args.totsamples // datalen) + 1)
.shuffle(datalen * ((self.args.totsamples // datalen) + 1))
.take(self.args.totsamples)
)
ds = (
ds.map(self.read_npy, num_parallel_calls=tf.data.experimental.AUTOTUNE)
.batch(self.args.bs, drop_remainder=True)
.prefetch(tf.data.experimental.AUTOTUNE)
.with_options(options)
) # .apply(tf.data.experimental.ignore_errors())
print("Dataset is ready!")
return ds