forked from mcahny/Deep-Video-Inpainting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
davis.py
104 lines (86 loc) · 4.13 KB
/
davis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from __future__ import division
import torch
from torch.utils import data
# general libs
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import math
import time
import tqdm
import os
import random
import argparse
import glob
import json
from scipy import ndimage, signal
import pdb
def temporal_transform(frame_indices, sample_range):
tmp = np.random.randint(0,len(frame_indices)-sample_range)
return frame_indices[tmp:tmp+sample_range]
DAVIS_2016 = ['bear'
,'bmx-bumps','boat','breakdance-flare','bus','car-turn','dance-jump','dog-agility','drift-turn','elephant','flamingo','hike','hockey','horsejump-low','kite-walk','lucia','mallard-fly','mallard-water','motocross-bumps','motorbike','paragliding','rhino','rollerblade','scooter-gray','soccerball','stroller','surf','swing','tennis','train','blackswan','bmx-trees','breakdance','camel','car-roundabout','car-shadow','cows','dance-twirl','dog','drift-chicane','drift-straight','goat','horsejump-high','kite-surf','libby','motocross-jump','paragliding-launch','parkour','scooter-black','soapbox']
class DAVIS(data.Dataset):
def __init__(self, root, imset='2016/train.txt', resolution='480p', size=(256,256), sample_duration=0):
self.sample_duration = sample_duration
self.root = root
self.mask_dir = os.path.join(root, 'Annotations', resolution)
self.image_dir = os.path.join(root, 'JPEGImages', resolution)
_imset_dir = os.path.join(root, 'ImageSets')
_imset_f = os.path.join(_imset_dir, imset)
self.size = size
self.videos = []
self.num_frames = {}
self.num_objects = {}
self.shape = {}
with open(os.path.join(_imset_f), "r") as lines:
for line in lines:
_video = line.rstrip('\n')
self.videos.append(_video)
self.num_frames[_video] = len(glob.glob(os.path.join(self.image_dir, _video, '*.jpg')))
_mask = np.array(Image.open(os.path.join(self.mask_dir, _video, '00000.png')).convert("P"))
self.num_objects[_video] = np.max(_mask)
self.shape[_video] = np.shape(_mask)
def __len__(self):
return len(self.videos)
def __getitem__(self, index):
video = self.videos[index]
info = {}
info['name'] = video
info['num_frames'] = self.num_frames[video]
num_objects = 1
info['num_objects'] = num_objects
images = []
masks = []
struct = ndimage.generate_binary_structure(2, 2)
f_list = list(range(self.num_frames[video]))
if self.sample_duration >0:
f_list = temporal_transform(f_list,self.sample_duration)
for f in f_list:
img_file = os.path.join(self.image_dir, video, '{:05d}.jpg'.format(f))
image_ = cv2.resize(cv2.imread(img_file), self.size, cv2.INTER_CUBIC)
image_ = np.float32(image_)/255.0
images.append(torch.from_numpy(image_))
try:
mask_file = os.path.join(self.mask_dir, video, '{:05d}.png'.format(f))
mask_ = np.array(Image.open(mask_file).convert('P'), np.uint8)
mask_ = cv2.resize(mask_,self.size, cv2.INTER_NEAREST)
except:
mask_file = os.path.join(self.mask_dir, video, '00000.png')
mask_ = np.array(Image.open(mask_file).convert('P'), np.uint8)
mask_ = cv2.resize(mask_,self.size, cv2.INTER_NEAREST)
if video in DAVIS_2016:
mask_ = (mask_ != 0)
else:
select_mask = min(1,mask_.max())
mask_ = (mask_==select_mask).astype(np.float)
w_k = np.ones((10,6))
mask2 = signal.convolve2d(mask_.astype(np.float), w_k, 'same')
mask2 = 1 - (mask2 == 0)
mask_ = np.float32(mask2)
masks.append( torch.from_numpy(mask_) )
masks = torch.stack(masks)
masks = ( masks == 1 ).type(torch.FloatTensor).unsqueeze(0)
images = torch.stack(images).permute(3,0,1,2)
return images, masks, info