-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathdatadescriptor.py
136 lines (123 loc) · 6.32 KB
/
datadescriptor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
#Values Name Description
----------------------------------------------------------------------------
1 type Describes the type of object: 'Car', 'Van', 'Truck',
'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
'Misc' or 'DontCare'
1 truncated Float from 0 (non-truncated) to 1 (truncated), where
truncated refers to the object leaving image boundaries
1 occluded Integer (0,1,2,3) indicating occlusion state:
0 = fully visible, 1 = partly occluded
2 = largely occluded, 3 = unknown
1 alpha Observation angle of object, ranging [-pi..pi]
4 bbox 2D bounding box of object in the image (0-based index):
contains left, top, right, bottom pixel coordinates
3 dimensions 3D object dimensions: height, width, length (in meters)
3 location 3D object location x,y,z in camera coordinates (in meters)
1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
1 score Only for results: Float, indicating confidence in
detection, needed for p/r curves, higher is better.
"""
from typing import List
from math import pi
class KittiDescriptor:
# This class is responsible for storing a single datapoint for the kitti 3d object detection task
def __init__(self, type=None, bbox=None, dimensions=None, location=None, rotation_y=None, extent=None):
self.type = type
self.truncated = 0
self.occluded = 0
self.alpha = -10
self.bbox = bbox
self.dimensions = dimensions
self.location = location
self.rotation_y = rotation_y
self.extent = extent
self._valid_classes = ['Car', 'Van', 'Truck',
'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
'Misc', 'DontCare']
def set_type(self, obj_type: str):
assert obj_type in self._valid_classes, "Object must be of types {}".format(
self._valid_classes)
self.type = obj_type
def set_truncated(self, truncated: float):
assert 0 <= truncated <= 1, """Truncated must be Float from 0 (non-truncated) to 1 (truncated), where
truncated refers to the object leaving image boundaries """
self.truncated = truncated
def set_occlusion(self, occlusion: int):
assert occlusion in range(0, 4), """Occlusion must be Integer (0,1,2,3) indicating occlusion state:
0 = fully visible, 1 = partly occluded
2 = largely occluded, 3 = unknown"""
self._occluded = occlusion
def set_alpha(self, alpha: float):
assert -pi <= alpha <= pi, "Alpha must be in range [-pi..pi]"
self.alpha = alpha
def set_bbox(self, bbox: List[int]):
assert len(bbox) == 4, """ Bbox must be 2D bounding box of object in the image (0-based index):
contains left, top, right, bottom pixel coordinates (two points)"""
self.bbox = bbox
def set_3d_object_dimensions(self, bbox_extent):
# Bbox extent consists of x,y and z.
# The bbox extent is by Carla set as
# x: length of vehicle (driving direction)
# y: to the right of the vehicle
# z: up (direction of car roof)
# However, Kitti expects height, width and length (z, y, x):
height, width, length = bbox_extent.z, bbox_extent.x, bbox_extent.y
# Since Carla gives us bbox extent, which is a half-box, multiply all by two
self.extent = (height, width, length)
self.dimensions = "{} {} {}".format(2*height, 2*width, 2*length)
def set_3d_object_location(self, obj_location):
""" TODO: Change this to
Converts the 3D object location from CARLA coordinates and saves them as KITTI coordinates in the object
In Unreal, the coordinate system of the engine is defined as, which is the same as the lidar points
z
▲ ▲ x
| /
| /
|/____> y
This is a left-handed coordinate system, with x being forward, y to the right and z up
See also https://github.com/carla-simulator/carla/issues/498
However, the camera coordinate system for KITTI is defined as
▲ z
/
/
/____> x
|
|
|
▼
y
This is a right-handed coordinate system with z being forward, x to the right and y down
Therefore, we have to make the following changes from Carla to Kitti
Carla: X Y Z
KITTI:-X -Y Z
"""
# Object location is four values (x, y, z, w). We only care about three of them (xyz)
x, y, z = [float(x) for x in obj_location][0:3]
assert None not in [
self.extent, self.type], "Extent and type must be set before location!"
if self.type == "Pedestrian":
# Since the midpoint/location of the pedestrian is in the middle of the agent, while for car it is at the bottom
# we need to subtract the bbox extent in the height direction when adding location of pedestrian.
y -= self.extent[0]
# Convert from Carla coordinate system to KITTI
# This works for AVOD (image)
#x *= -1
#y *= -1
#self.location = " ".join(map(str, [y, -z, x]))
self.location = " ".join(map(str, [-x, -y, z]))
# This works for SECOND (lidar)
#self.location = " ".join(map(str, [z, x, y]))
#self.location = " ".join(map(str, [z, x, -y]))
def set_rotation_y(self, rotation_y: float):
assert - \
pi <= rotation_y <= pi, "Rotation y must be in range [-pi..pi] - found {}".format(
rotation_y)
self.rotation_y = rotation_y
def __str__(self):
""" Returns the kitti formatted string of the datapoint if it is valid (all critical variables filled out), else it returns an error."""
if self.bbox is None:
bbox_format = " "
else:
bbox_format = " ".join([str(x) for x in self.bbox])
return "{} {} {} {} {} {} {} {}".format(self.type, self.truncated, self.occluded, self.alpha, bbox_format, self.dimensions, self.location, self.rotation_y)