Skip to content


Browse files Browse the repository at this point in the history
  • Loading branch information
ZaiqiangWu committed Apr 21, 2024
1 parent eb22946 commit 3fa73c0
Show file tree
Hide file tree
Showing 3 changed files with 289 additions and 0 deletions.
49 changes: 49 additions & 0 deletions util/
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import numpy as np

class Image2VideoWriter():
def __init__(self):
self.image_list = []

def append(self,image):

def make_video(self,outvid=None, fps=5, size=None,
is_color=True, format="MP4V", isRGB=False):
Create a video from a list of images.
@param outvid output video
@param images list of images to use in the video, BGR format
@param fps frame per second
@param size size of each frame
@param is_color color
@param format see
@return see
The function relies on
By default, the video will have the size of the first image.
It will resize every image to this size before adding them to the video.
from cv2 import VideoWriter, VideoWriter_fourcc, imread, resize
fourcc = VideoWriter_fourcc(*format)
vid = None
for image in self.image_list:
img = image
if isRGB:
if vid is None:
if size is None:
size = img.shape[1], img.shape[0]
if size[0]+size[1]>3000:
size = img.shape[1]//2, img.shape[0]//2
vid = VideoWriter(outvid, fourcc, float(fps), size, is_color)
if size[0] != img.shape[1] and size[1] != img.shape[0]:
img = resize(img, size)
path, name = os.path.split(outvid)
os.system("ffmpeg -i " + outvid + " -vcodec libx264 " + os.path.join(path,name.split('.')[0]+'temp.mp4'))
os.system("rm " + outvid)
os.system("mv "+os.path.join(path, name.split('.')[0]+'temp.mp4')+ " "+outvid)
return vid
76 changes: 76 additions & 0 deletions util/
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from PIL import Image
import cv2
import numpy as np

def crop2_43(img: Image.Image):
img_reshaper = ImageReshaper(img)
return img_reshaper.get_reshaped()

class ImageReshaper:
def __init__(self, img: Image.Image):
self.img = img
self.trans, self.inv_trans = crop2_43_trans(self.img)
w, h = self.img.size
self.trans_mask = self.get_trans_mask(self.inv_trans, [h, w])

def get_reshaped(self):
img = np.array(self.img)
new_h = 1024
new_w = 768
trans_img = cv2.warpAffine(img, self.trans, (new_w, new_h),
borderValue=(0, 0, 0))
return Image.fromarray(trans_img)

def back2rawSahpe(self, img):
raw_img = np.array(self.img)
new_img = img
w, h = self.img.size
raw_new_img = self.roi2raw(new_img, self.inv_trans, [h, w])
composed = raw_img.copy()
composed[self.trans_mask] = raw_new_img[self.trans_mask]
return composed

def roi2raw(self, img, trans, raw_shape):
trans_img = cv2.warpAffine(img, trans, (raw_shape[1], raw_shape[0]),
# borderValue=(0, 0, 0)
return trans_img

def get_trans_mask(self, inv_trans, raw_shape):
mask = np.ones([1024, 768]).astype(np.uint8)
roi_mask = cv2.warpAffine(mask, inv_trans, (raw_shape[1], raw_shape[0]),
roi_mask = roi_mask.astype(bool)
return roi_mask

def crop2_43_trans(img: Image.Image):
raw_h, raw_w = img.size
src = np.zeros([3, 2], np.float32)
if 3 * raw_h > 4 * raw_w: # too tall
delta = (raw_h - raw_w * (4 / 3)) / 2
src[0, :] = np.array([0 + delta, 0], np.float32)
src[1, :] = np.array([raw_h - delta, 0], np.float32)
src[2, :] = np.array([raw_h - delta, raw_w], np.float32)
else: # too wide
delta = (raw_w - raw_h * (3 / 4)) / 2
src[0, :] = np.array([0, 0 + delta], np.float32)
src[1, :] = np.array([raw_h, 0 + delta], np.float32)
src[2, :] = np.array([raw_h, raw_w - delta], np.float32)

dst = np.zeros([3, 2], np.float32)
dst[0, :] = np.array([0, 0], np.float32)
dst[1, :] = np.array([1024, 0], np.float32)
dst[2, :] = np.array([1024, 768], np.float32)
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
inv_trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
return trans, inv_trans
164 changes: 164 additions & 0 deletions util/
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import cv2
import numpy as np
# from util.garment_heatmap import HeatmapGenerator
import torch
import torchvision.transforms as transforms
import ffmpeg
from OpticalFlow.optical_flow import OpticalFlow

class VideoLoader:
def __init__(self, path):
self.path = path
self.frames = self.load_video()
self.min_h = 0
self.min_w = 0
self.max_h = self.frameHeight
self.max_w = self.frameWidth
self.l = 0
self.r = 0
self.u = 0
self.d = 0
if self.frameHeight > self.frameWidth:
self.l = (self.frameHeight - self.frameWidth) // 2
self.r = self.l
# self.heatmap_gen = HeatmapGenerator()
self.post_transform = transforms.Resize((512, 512))
self.opt_flow = None
self.optical_flow = OpticalFlow()

def compute_opt_flow(self):
print("Start computing optical flow")
opt_flow_list = []
for i in range(self.__len__()-1):
with torch.no_grad():
opt_flow = self.optical_flow(self.frames[i],self.frames[i+1]).cpu()
print("Finish computing optical flow:",self.opt_flow.__len__())

def crop2square(self):
if self.frameWidth > self.frameHeight:
offset = (self.frameWidth - self.frameHeight) // 2
self.min_w = offset
self.max_w = offset + self.frameHeight

def __getitem__(self, idx):
im = self.get_image(idx)

normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
resize = transforms.Resize((384, 288))

all_transforms = transforms.Compose([normalize, resize])

# with torch.no_grad():
# heatmaps = self.heatmap_gen.model(all_transforms(im))

# heatmaps = self.post_transform(heatmaps)
return im # , heatmaps

def __len__(self):
return self.frames.shape[0]

def set_bbox(self, min_h, min_w, max_h, max_w):
self.min_h = min_h
self.min_w = min_w
self.max_h = max_h
self.max_w = max_w

def set_padding(self, l, r, u, d):
self.l = l
self.r = r
self.u = u
self.d = d

def get_image(self, idx):
frame = self.get_numpy_image(idx)
img = torch.from_numpy(frame) / 255.0
img = img.permute(2, 0, 1) # CHW, BGR
if torch.cuda.is_available():
img = img.cuda()
img = img.unsqueeze(0)
img = self.post_transform(img)
return img

def get_numpy_image(self, idx):
frame = self.frames[idx]
frame = frame[self.min_h:self.max_h, self.min_w:self.max_w, :]
if self.l > 0:
left = np.zeros((frame.shape[0], self.l, frame.shape[2]), np.uint8)
frame = np.concatenate((left, frame), 1)
if self.r > 0:
right = np.zeros((frame.shape[0], self.r, frame.shape[2]), np.uint8)
frame = np.concatenate((frame, right), 1)
if self.u > 0:
up = np.zeros((self.u, frame.shape[1], frame.shape[2]), np.uint8)
frame = np.concatenate((up, frame), 0)
if self.d > 0:
down = np.zeros((self.d, frame.shape[1], frame.shape[2]), np.uint8)
frame = np.concatenate((frame, down), 0)
frame = cv2.resize(frame, dsize=(512, 512), interpolation=cv2.INTER_CUBIC)
return frame

def get_raw_numpy_image(self, idx):
frame = self.frames[idx]
return frame

def get_heatmap(self, idx):
_, heatmaps = self.__getitem__(idx)
return heatmaps

def get_motor(self, idx):
return torch.zeros(6).cuda() if torch.cuda.is_available() else torch.zeros(6)

def check_rotation(self, path_video_file):
# this returns meta-data of the video file in form of a dictionary
meta_dict = ffmpeg.probe(path_video_file)
# from the dictionary, meta_dict['streams'][0]['tags']['rotate'] is the key
# we are looking for
rotate_code = None
rotate = meta_dict.get('streams', [dict(tags=dict())])[0].get('tags', dict()).get('rotate', 0)
return round(int(rotate) / 90.0) * 90

def load_video(self):
# rotateCode = self.check_rotation(self.path)
cap = cv2.VideoCapture(self.path)
assert cap.isOpened(), self.path+":video load failed!"
self.frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
self.frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fc = 0
ret = True

frame_list = []

while (fc < self.frameCount and ret):
ret, temp =
if temp is None:
buff = np.empty((1, self.frameHeight, self.frameWidth, 3), np.dtype('uint8'))
# print(fc,temp.shape)
buff = temp
buff = np.expand_dims(buff, 0)
fc += 1
frames = np.concatenate(frame_list, 0)
#n, h, w = frames.shape

return frames

if __name__ == '__main__':
path = './videos/'
video_loader = VideoLoader(path)
import matplotlib.pyplot as plt

video_loader.set_bbox(0, 180, 720, 1280 - 180)

0 comments on commit 3fa73c0

Please sign in to comment.