forked from stereolabs/zed-sdk
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding custom detector python sample
- Loading branch information
Showing
6 changed files
with
4,109 additions
and
0 deletions.
There are no files selected for viewing
48 changes: 48 additions & 0 deletions
48
object detection/custom detector/python/pytorch_yolov5/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# ZED SDK - Object Detection | ||
|
||
This sample shows how to detect custom objects using the official Pytorch implementation of YOLOv5 from a ZED camera and ingest them into the ZED SDK to extract 3D informations and tracking for each objects. | ||
|
||
## Getting Started | ||
|
||
- Get the latest [ZED SDK](https://www.stereolabs.com/developers/release/) and [pyZED Package](https://www.stereolabs.com/docs/app-development/python/install/) | ||
- Check the [Documentation](https://www.stereolabs.com/docs/object-detection/custom-od/) | ||
|
||
## Setting up | ||
|
||
- Clone Yolov5 into the current folder | ||
|
||
```sh | ||
git clone https://github.com/ultralytics/yolov5 | ||
# Install the dependencies if needed | ||
cd yolov5 | ||
pip install -r requirements.txt | ||
``` | ||
|
||
- Download a model file (or prepare your own) https://github.com/ultralytics/yolov5/releases | ||
|
||
``` | ||
# Downloading by commmand line | ||
wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5m.pt | ||
``` | ||
|
||
## Run the program | ||
|
||
*NOTE: The ZED v1 is not compatible with this module* | ||
|
||
``` | ||
python detector.py --weights yolov5m.pt # [--img_size 512 --conf_thres 0.1 --svo path/to/file.svo] | ||
``` | ||
|
||
### Features | ||
|
||
- The camera point cloud is displayed in a 3D OpenGL view | ||
- 3D bounding boxes around detected objects are drawn | ||
- Objects classes and confidences can be changed | ||
|
||
## Training your own model | ||
|
||
This sample can use any model trained with YOLOv5, including custom trained one. For a getting started on how to trained a model on a custom dataset with YOLOv5, see here https://docs.ultralytics.com/tutorials/train-custom-datasets/ | ||
|
||
## Support | ||
|
||
If you need assistance go to our Community site at https://community.stereolabs.com/ |
247 changes: 247 additions & 0 deletions
247
object detection/custom detector/python/pytorch_yolov5/cv_viewer/tracking_viewer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,247 @@ | ||
import cv2 | ||
import numpy as np | ||
|
||
from cv_viewer.utils import * | ||
import pyzed.sl as sl | ||
import math | ||
from collections import deque | ||
|
||
|
||
# ---------------------------------------------------------------------- | ||
# 2D LEFT VIEW | ||
# ---------------------------------------------------------------------- | ||
|
||
|
||
def cvt(pt, scale): | ||
""" | ||
Function that scales point coordinates | ||
""" | ||
out = [pt[0] * scale[0], pt[1] * scale[1]] | ||
return out | ||
|
||
|
||
def get_image_position(bounding_box_image, img_scale): | ||
out_position = np.zeros(2) | ||
out_position[0] = (bounding_box_image[0][0] + (bounding_box_image[2][0] - bounding_box_image[0][0]) * 0.5) * \ | ||
img_scale[0] | ||
out_position[1] = (bounding_box_image[0][1] + (bounding_box_image[2][1] - bounding_box_image[0][1]) * 0.5) * \ | ||
img_scale[1] | ||
return out_position | ||
|
||
|
||
def render_2D(left_display, img_scale, objects, is_tracking_on): | ||
overlay = left_display.copy() | ||
|
||
line_thickness = 2 | ||
for obj in objects.object_list: | ||
if render_object(obj, is_tracking_on): | ||
base_color = generate_color_id_u(obj.id) | ||
# Display image scaled 2D bounding box | ||
top_left_corner = cvt(obj.bounding_box_2d[0], img_scale) | ||
top_right_corner = cvt(obj.bounding_box_2d[1], img_scale) | ||
bottom_right_corner = cvt(obj.bounding_box_2d[2], img_scale) | ||
bottom_left_corner = cvt(obj.bounding_box_2d[3], img_scale) | ||
|
||
# Creation of the 2 horizontal lines | ||
cv2.line(left_display, (int(top_left_corner[0]), int(top_left_corner[1])), | ||
(int(top_right_corner[0]), int(top_right_corner[1])), base_color, line_thickness) | ||
cv2.line(left_display, (int(bottom_left_corner[0]), int(bottom_left_corner[1])), | ||
(int(bottom_right_corner[0]), int(bottom_right_corner[1])), base_color, line_thickness) | ||
# Creation of 2 vertical lines | ||
draw_vertical_line(left_display, bottom_left_corner, top_left_corner, base_color, line_thickness) | ||
draw_vertical_line(left_display, bottom_right_corner, top_right_corner, base_color, line_thickness) | ||
|
||
# Scaled ROI | ||
roi_height = int(top_right_corner[0] - top_left_corner[0]) | ||
roi_width = int(bottom_left_corner[1] - top_left_corner[1]) | ||
overlay_roi = overlay[int(top_left_corner[1]):int(top_left_corner[1] + roi_width) | ||
, int(top_left_corner[0]):int(top_left_corner[0] + roi_height)] | ||
|
||
overlay_roi[:, :, :] = base_color | ||
|
||
# Display Object label as text | ||
position_image = get_image_position(obj.bounding_box_2d, img_scale) | ||
text_position = (int(position_image[0] - 20), int(position_image[1] - 12)) | ||
text = "class " + str(obj.raw_label) | ||
text_color = (255, 255, 255, 255) | ||
cv2.putText(left_display, text, text_position, cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, text_color, 1) | ||
|
||
# Diplay Object distance to camera as text | ||
if np.isfinite(obj.position[2]): | ||
text = str(round(abs(obj.position[2]), 1)) + "M" | ||
text_position = (int(position_image[0] - 20), int(position_image[1])) | ||
cv2.putText(left_display, text, text_position, cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.5, text_color, 1) | ||
|
||
# Here, overlay is as the left image, but with opaque masks on each detected objects | ||
cv2.addWeighted(left_display, 0.7, overlay, 0.3, 0.0, left_display) | ||
|
||
|
||
# ---------------------------------------------------------------------- | ||
# 2D TRACKING VIEW | ||
# ---------------------------------------------------------------------- | ||
|
||
class TrackingViewer: | ||
def __init__(self, res, fps, D_max): | ||
# Window size | ||
self.window_width = res.width | ||
self.window_height = res.height | ||
|
||
# Visualisation settings | ||
self.has_background_ready = False | ||
self.background = np.full((self.window_height, self.window_width, 4), [245, 239, 239, 255], np.uint8) | ||
|
||
# Invert Z due to Y axis of ocv window | ||
# Show objects between [z_min, 0] (z_min < 0) | ||
self.z_min = -D_max | ||
# Show objects between [x_min, x_max] | ||
self.x_min = self.z_min | ||
self.x_max = -self.x_min | ||
|
||
# Conversion from world position to pixel coordinates | ||
self.x_step = (self.x_max - self.x_min) / self.window_width | ||
self.z_step = abs(self.z_min) / self.window_height | ||
|
||
self.camera_calibration = sl.CalibrationParameters() | ||
|
||
# List of alive tracks | ||
self.tracklets = [] | ||
|
||
def set_camera_calibration(self, calib): | ||
self.camera_calibration = calib | ||
self.has_background_ready = False | ||
|
||
def generate_view(self, objects, current_camera_pose, tracking_view, tracking_enabled): | ||
# To get position in WORLD reference | ||
for obj in objects.object_list: | ||
pos = obj.position | ||
tmp_pos = sl.Translation() | ||
tmp_pos.init_vector(pos[0], pos[1], pos[2]) | ||
new_pos = ( | ||
tmp_pos * current_camera_pose.get_orientation()).get() + current_camera_pose.get_translation().get() | ||
obj.position = np.array([new_pos[0], new_pos[1], new_pos[2]]) | ||
|
||
# Initialize visualisation | ||
if not self.has_background_ready: | ||
self.generate_background() | ||
|
||
np.copyto(tracking_view, self.background, 'no') | ||
|
||
if tracking_enabled: | ||
# First add new points and remove the ones that are too old | ||
current_timestamp = objects.timestamp.get_seconds() | ||
self.add_to_tracklets(objects, current_timestamp) | ||
self.prune_old_points(current_timestamp) | ||
|
||
# Draw all tracklets | ||
self.draw_tracklets(tracking_view, current_camera_pose) | ||
else: | ||
self.draw_points(objects.object_list, tracking_view, current_camera_pose) | ||
|
||
def add_to_tracklets(self, objects, current_timestamp): | ||
for obj in objects.object_list: | ||
if (obj.tracking_state != sl.OBJECT_TRACKING_STATE.OK) or (not np.isfinite(obj.position[0])) or ( | ||
obj.id < 0): | ||
continue | ||
|
||
new_object = True | ||
for i in range(len(self.tracklets)): | ||
if self.tracklets[i].id == obj.id: | ||
new_object = False | ||
self.tracklets[i].add_point(obj, current_timestamp) | ||
|
||
# In case this object does not belong to existing tracks | ||
if (new_object): | ||
self.tracklets.append(Tracklet(obj, obj.label, current_timestamp)) | ||
|
||
def prune_old_points(self, ts): | ||
track_to_delete = [] | ||
for it in self.tracklets: | ||
if ((ts - it.last_timestamp) > (3)): | ||
track_to_delete.append(it) | ||
|
||
for it in track_to_delete: | ||
self.tracklets.remove(it) | ||
|
||
# ---------------------------------------------------------------------- | ||
# Drawing functions | ||
# ---------------------------------------------------------------------- | ||
|
||
def draw_points(self, objects, tracking_view, current_camera_pose): | ||
for obj in objects: | ||
if (not np.isfinite(obj.position[0])): | ||
continue | ||
clr = generate_color_id_u(obj.id) | ||
pt = TrackPoint(obj.position) | ||
cv_start_point = self.to_cv_point(pt.get_xyz(), current_camera_pose) | ||
cv2.circle(tracking_view, (int(cv_start_point[0]), int(cv_start_point[1])), 6, clr, 2) | ||
|
||
def draw_tracklets(self, tracking_view, current_camera_pose): | ||
for track in self.tracklets: | ||
clr = generate_color_id_u(track.id) | ||
cv_start_point = self.to_cv_point(track.positions[0].get_xyz(), current_camera_pose) | ||
for point_index in range(1, len(track.positions)): | ||
cv_end_point = self.to_cv_point(track.positions[point_index].get_xyz(), current_camera_pose) | ||
cv2.line(tracking_view, (int(cv_start_point[0]), int(cv_start_point[1])), | ||
(int(cv_end_point[0]), int(cv_end_point[1])), clr, 3) | ||
cv_start_point = cv_end_point | ||
cv2.circle(tracking_view, (int(cv_start_point[0]), int(cv_start_point[1])), 6, clr, -1) | ||
|
||
def generate_background(self): | ||
camera_color = [255, 230, 204, 255] | ||
|
||
# Get FOV intersection with window borders | ||
fov = 2.0 * math.atan( | ||
self.camera_calibration.left_cam.image_size.width / (2.0 * self.camera_calibration.left_cam.fx)) | ||
|
||
z_at_x_max = self.x_max / math.tan(fov / 2.0) | ||
left_intersection_pt = self.to_cv_point(self.x_min, -z_at_x_max) | ||
right_intersection_pt = self.to_cv_point(self.x_max, -z_at_x_max) | ||
|
||
# Drawing camera | ||
camera_pts = np.array([left_intersection_pt | ||
, right_intersection_pt | ||
, [int(self.window_width / 2), self.window_height]] | ||
, dtype=np.int32) | ||
cv2.fillConvexPoly(self.background, camera_pts, camera_color) | ||
|
||
def to_cv_point(self, x, z): | ||
out = [] | ||
if isinstance(x, float) and isinstance(z, float): | ||
out = [int((x - self.x_min) / self.x_step), int((z - self.z_min) / self.z_step)] | ||
elif isinstance(x, list) and isinstance(z, sl.Pose): | ||
# Go to camera current pose | ||
rotation = z.get_rotation_matrix() | ||
rotation.inverse() | ||
tmp = x - (z.get_translation() * rotation.get_orientation()).get() | ||
new_position = sl.Translation() | ||
new_position.init_vector(tmp[0], tmp[1], tmp[2]) | ||
out = [int(((new_position.get()[0] - self.x_min) / self.x_step) + 0.5), | ||
int(((new_position.get()[2] - self.z_min) / self.z_step) + 0.5)] | ||
elif isinstance(x, TrackPoint) and isinstance(z, sl.Pose): | ||
pos = x.get_xyz() | ||
out = self.to_cv_point(pos, z) | ||
else: | ||
print("Unhandled argument type") | ||
return out | ||
|
||
|
||
class TrackPoint: | ||
def __init__(self, pos_): | ||
self.x = pos_[0] | ||
self.y = pos_[1] | ||
self.z = pos_[2] | ||
|
||
def get_xyz(self): | ||
return [self.x, self.y, self.z] | ||
|
||
|
||
class Tracklet: | ||
def __init__(self, obj_, type_, timestamp_): | ||
self.id = obj_.id | ||
self.object_type = type_ | ||
self.positions = deque() | ||
self.add_point(obj_, timestamp_) | ||
|
||
def add_point(self, obj_, timestamp_): | ||
self.positions.append(TrackPoint(obj_.position)) | ||
self.last_timestamp = timestamp_ |
38 changes: 38 additions & 0 deletions
38
object detection/custom detector/python/pytorch_yolov5/cv_viewer/utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import cv2 | ||
import numpy as np | ||
import pyzed.sl as sl | ||
|
||
id_colors = [(232, 176, 59), | ||
(175, 208, 25), | ||
(102, 205, 105), | ||
(185, 0, 255), | ||
(99, 107, 252)] | ||
|
||
|
||
def render_object(object_data, is_tracking_on): | ||
if is_tracking_on: | ||
return object_data.tracking_state == sl.OBJECT_TRACKING_STATE.OK | ||
else: | ||
return (object_data.tracking_state == sl.OBJECT_TRACKING_STATE.OK) or ( | ||
object_data.tracking_state == sl.OBJECT_TRACKING_STATE.OFF) | ||
|
||
|
||
def generate_color_id_u(idx): | ||
arr = [] | ||
if idx < 0: | ||
arr = [236, 184, 36, 255] | ||
else: | ||
color_idx = idx % 5 | ||
arr = [id_colors[color_idx][0], id_colors[color_idx][1], id_colors[color_idx][2], 255] | ||
return arr | ||
|
||
|
||
def draw_vertical_line(left_display, start_pt, end_pt, clr, thickness): | ||
n_steps = 7 | ||
pt1 = [((n_steps - 1) * start_pt[0] + end_pt[0]) / n_steps | ||
, ((n_steps - 1) * start_pt[1] + end_pt[1]) / n_steps] | ||
pt4 = [(start_pt[0] + (n_steps - 1) * end_pt[0]) / n_steps | ||
, (start_pt[1] + (n_steps - 1) * end_pt[1]) / n_steps] | ||
|
||
cv2.line(left_display, (int(start_pt[0]), int(start_pt[1])), (int(pt1[0]), int(pt1[1])), clr, thickness) | ||
cv2.line(left_display, (int(pt4[0]), int(pt4[1])), (int(end_pt[0]), int(end_pt[1])), clr, thickness) |
Oops, something went wrong.