forked from xiaoyufenfei/Efficient-Segmentation-Networks
-
Notifications
You must be signed in to change notification settings - Fork 1
/
test_video.py
93 lines (69 loc) · 2.59 KB
/
test_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import torch
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random
from builders.model_builder import build_model
model_name = "ENet"
num_classes_ = 11
# height, width = 360, 480
height, width = 480, 640
weights_path = 'checkpoint/camvid/ENetbs8gpu1_trainval/model_1000.pth'
model = build_model(model_name, num_classes=num_classes_)
checkpoint = torch.load(weights_path)
model.load_state_dict(checkpoint['model'])
model.eval()
model.to('cuda')
camvid_palette = [random.randint(0, 255) for _ in range(33)]
video_path = 'test_video.mp4'
cap = cv2.VideoCapture(video_path)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_path = model_name+'_output_video.mp4'
fps = int(cap.get(cv2.CAP_PROP_FPS))
out_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
out_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out_video_writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
def camvid_colorize_mask(mask, palette):
colored_mask = np.zeros((mask.shape[0], mask.shape[1], 3), dtype=np.uint8)
for i in range(len(palette) // 3):
colored_mask[mask == i] = palette[i * 3:i * 3 + 3]
return colored_mask
while True:
ret, frame = cap.read()
frame = cv2.resize(frame, (width, height))
if not ret:
break
image = frame.copy()
# Preprocess the image
f_scale=1
mean = [105.65775, 103.329834, 99.625404] #camvid
image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation=cv2.INTER_LINEAR)
image = np.asarray(image, np.float32)
image -= mean
image = image[:, :, ::-1] # revert to RGB
image_copy = image.copy()
# Convert to tensor
input_tensor = torch.from_numpy(image_copy).permute(2, 0, 1).float()
input_batch = input_tensor.unsqueeze(0)
# Make prediction
with torch.no_grad():
output = model(input_batch.cuda()) # Move to GPU if available
# postProcess the prediction
torch.cuda.synchronize()
output = output.cpu().data[0].numpy()
output = output.transpose(1, 2, 0)
predictions = np.argmax(output, axis=2)
# Combine original and segmented image
colored_semantic_map = camvid_colorize_mask(predictions, camvid_palette)
final_img = cv2.addWeighted(frame, 0.7, colored_semantic_map, 0.3, 0)
# Display the original frame and prediction
cv2.imshow('Original Frame', frame)
cv2.imshow('Prediction',final_img) # Convert to NumPy array for display
# Write colored predictions to video
out_video_writer.write(final_img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out_video_writer.release()
cv2.destroyAllWindows()