forked from gwang-kim/DiffusionCLIP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCelebA_HQ_dataset.py
63 lines (45 loc) · 1.94 KB
/
CelebA_HQ_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from torch.utils.data import Dataset
import lmdb
from io import BytesIO
from PIL import Image
import torchvision.transforms as tfs
import os
class MultiResolutionDataset(Dataset):
def __init__(self, path, transform, resolution=256):
self.env = lmdb.open(
path,
max_readers=32,
readonly=True,
lock=False,
readahead=False,
meminit=False,
)
if not self.env:
raise IOError("Cannot open lmdb dataset", path)
with self.env.begin(write=False) as txn:
self.length = int(txn.get("length".encode("utf-8")).decode("utf-8"))
self.resolution = resolution
self.transform = transform
def __len__(self):
return self.length
def __getitem__(self, index):
with self.env.begin(write=False) as txn:
key = f"{self.resolution}-{str(index).zfill(5)}".encode("utf-8")
img_bytes = txn.get(key)
buffer = BytesIO(img_bytes)
img = Image.open(buffer)
img = self.transform(img)
return img
################################################################################
def get_celeba_dataset(data_root, config):
train_transform = tfs.Compose([tfs.ToTensor(),
tfs.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5),
inplace=True)])
test_transform = tfs.Compose([tfs.ToTensor(),
tfs.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5),
inplace=True)])
train_dataset = MultiResolutionDataset(os.path.join(data_root, 'LMDB_train'),
train_transform, config.data.image_size)
test_dataset = MultiResolutionDataset(os.path.join(data_root, 'LMDB_test'),
test_transform, config.data.image_size)
return train_dataset, test_dataset