-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcamera.py
79 lines (65 loc) · 2.37 KB
/
camera.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import cv2
class CameraBase(object):
def __init__(self):
pass
def move(self, img, bbox):
"""generate camera movement
Args:
img (numpy array): CxHxW
bbox (numpy array): (4,), [cx, cy, w, h]
Returns:
(array): (2,), delta (yaw, pitch)
"""
raise NotImplementedError
class CameraDiscrete(CameraBase):
"""a camera that perform fixed simple discrete action
"""
def __init__(self, center_ratio=0.25):
super().__init__()
self.center_ratio = center_ratio
# no-op, up, down, left, right, in form of delta (yaw, pitch)
self.discrete_action = [[0, 0], [0, 10], [0, -10], [-10, 0], [10, 0]]
# self.discrete_action = [[0, 0], [0, 5], [0, -5], [-5, 0], [5, 0]]
def move(self, img, bbox):
h, w, _ = img.shape
r = self.center_ratio * min(w, h)
# treat center of image as origin and invert y-axis
cx = bbox[0] + bbox[2] / 2 - w / 2
cy = -(bbox[1] + bbox[3] / 2 - h / 2)
d = np.sqrt(cx * cx + cy * cy)
k = h / w
# print(f"cx:{cx}, cy:{cy}, d:{d}, k:{k}, h:{h}, w:{w}")
act_id = 0
if d < r:
act_id = 0
elif cy > abs(k * cx): # up
act_id = 1
elif cy < -abs(k * cx): # down
act_id = 2
elif abs(cy) < -k * cx: # left
act_id = 3
elif abs(cy) < k * cx: # right
act_id = 4
return self.discrete_action[act_id]
class CameraOpt(CameraBase):
"""A camera that can accurately point towards a specified direction
"""
def __init__(self, fov):
super().__init__()
self.fov = fov
def move(self, img, bbox, distance):
# focal length = image_width / 2 / tan(FOV/2)
h, w, _ = img.shape
fx = fy = w / 2 / np.tan(self.fov / 2)
cv2.point
def _depth_conversion(point_depth, f):
"""convert depth to camera center to depth to camera plane
"""
h, w = point_depth.shape
i_c = np.float(h) / 2 - 1
j_c = np.float(w) / 2 - 1
cols, rows = np.meshgrid(np.linspace(0, w - 1, num=w), np.linspace(0, h - 1, num=h))
dist_c = ((rows - i_c)**2 + (cols - j_c)**2)**0.5
plane_depth = point_depth / (1 + (dist_c / f)**2)**0.5
return plane_depth