add new tools to generate the tusimple training samples

mengnutonomy · Dec 13, 2018 · fb7c3f5 · fb7c3f5
1 parent 6d2a963
commit fb7c3f5
Show file tree

Hide file tree

Showing 2 changed files with 202 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -113,7 +113,7 @@ The accuracy during training process rises as follows:
 
 Please cite my repo [lanenet-lane-detection](https://github.com/MaybeShewill-CV/lanenet-lane-detection) if you use it.
 
-## Recently updates
+## Recently updates 2018.11.10
 Adjust some basic cnn op according to the new tensorflow api. Use the 
 traditional SGD optimizer to optimize the whole model instead of the
 origin Adam optimizer used in the origin paper. I have found that the
@@ -132,8 +132,31 @@ git pull origin master
 The rest are just the same as which mentioned above. And recently I will 
 release a new model trained on culane dataset.
 
+## Recently updates 2018.12.13
+Since a lot of user want a automatic tools to generate the training samples
+from the Tusimple Dataset. I upload the tools I use to generate the training
+samples. You need to firstly download the Tusimple dataset and unzip the 
+file to your local disk. Then run the following command to generate the 
+training samples and the train.txt file.
+
+```angular2html
+python tools/generate_tusimple_dataset.py --src_dir path/to/your/unzipped/file
+```
+
+The script will make the train folder and the test folder. The training 
+samples of origin rgb image, binary label image, instance label image will
+be automatically generated in the training/gt_image, training/gt_binary_image,
+training/gt_instance_image folder.You may check it yourself before start
+the training process.
+
+Pay attention that the script only process the training samples and you 
+need to select several lines from the train.txt to generate your own 
+val.txt file. In order to obtain the test images you can modify the 
+script on your own.
+
 ## TODO
 - [x] Add a embedding visualization tools to visualize the embedding feature map
 - [x] Add detailed explanation of training the components of lanenet separately.
 - [x] Training the model on different dataset
-- [ ] Adjust the lanenet hnet model and merge the hnet model to the main lanenet model
+- [x] Adjust the lanenet hnet model and merge the hnet model to the main lanenet model
+- [ ] Change the normalization function from BN to GN
diff --git a/tools/generate_tusimple_dataset.py b/tools/generate_tusimple_dataset.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# @Time    : 18-5-18 下午7:31
+# @Author  : Luo Yao
+# @Site    : http://icode.baidu.com/repos/baidu/personal-code/Luoyao
+# @File    : generate_tusimple_dataset.py
+# @IDE: PyCharm Community Edition
+"""
+处理tusimple数据集脚本
+"""
+import argparse
+import glob
+import json
+import os
+import os.path as ops
+import shutil
+
+import cv2
+import numpy as np
+
+
+def init_args():
+    """
+
+    :return:
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--src_dir', type=str, help='The origin path of unzipped tusimple dataset')
+
+    return parser.parse_args()
+
+
+def process_json_file(json_file_path, src_dir, ori_dst_dir, binary_dst_dir, instance_dst_dir):
+    """
+
+    :param json_file_path:
+    :param src_dir: 原始clips文件路径
+    :param ori_dst_dir: rgb训练样本
+    :param binary_dst_dir: binary训练标签
+    :param instance_dst_dir: instance训练标签
+    :return:
+    """
+    assert ops.exists(json_file_path), '{:s} not exist'.format(json_file_path)
+
+    image_nums = len(os.listdir(ori_dst_dir))
+
+    with open(json_file_path, 'r') as file:
+        for line_index, line in enumerate(file):
+            info_dict = json.loads(line)
+
+            image_dir = ops.split(info_dict['raw_file'])[0]
+            image_dir_split = image_dir.split('/')[1:]
+            image_dir_split.append(ops.split(info_dict['raw_file'])[1])
+            image_name = '_'.join(image_dir_split)
+            image_path = ops.join(src_dir, info_dict['raw_file'])
+            assert ops.exists(image_path), '{:s} not exist'.format(image_path)
+
+            h_samples = info_dict['h_samples']
+            lanes = info_dict['lanes']
+
+            image_name_new = '{:s}.png'.format('{:d}'.format(line_index + image_nums).zfill(4))
+
+            src_image = cv2.imread(image_path, cv2.IMREAD_COLOR)
+            dst_binary_image = np.zeros([src_image.shape[0], src_image.shape[1]], np.uint8)
+            dst_instance_image = np.zeros([src_image.shape[0], src_image.shape[1]], np.uint8)
+
+            for lane_index, lane in enumerate(lanes):
+                assert len(h_samples) == len(lane)
+                lane_x = []
+                lane_y = []
+                for index in range(len(lane)):
+                    if lane[index] == -2:
+                        continue
+                    else:
+                        ptx = lane[index]
+                        pty = h_samples[index]
+                        lane_x.append(ptx)
+                        lane_y.append(pty)
+                if not lane_x:
+                    continue
+                lane_pts = np.vstack((lane_x, lane_y)).transpose()
+                lane_pts = np.array([lane_pts], np.int64)
+
+                cv2.polylines(dst_binary_image, lane_pts, isClosed=False,
+                              color=255, thickness=5)
+                cv2.polylines(dst_instance_image, lane_pts, isClosed=False,
+                              color=lane_index * 50 + 20, thickness=5)
+
+            dst_binary_image_path = ops.join(binary_dst_dir, image_name_new)
+            dst_instance_image_path = ops.join(instance_dst_dir, image_name_new)
+            dst_rgb_image_path = ops.join(ori_dst_dir, image_name_new)
+
+            cv2.imwrite(dst_binary_image_path, dst_binary_image)
+            cv2.imwrite(dst_instance_image_path, dst_instance_image)
+            cv2.imwrite(dst_rgb_image_path, src_image)
+
+            print('Process {:s} success'.format(image_name))
+
+
+def gen_train_sample(src_dir, b_gt_image_dir, i_gt_image_dir, image_dir):
+    """
+    生成图像训练列表
+    :param src_dir:
+    :param b_gt_image_dir: 二值基准图
+    :param i_gt_image_dir: 实例分割基准图
+    :param image_dir: 原始图像
+    :return:
+    """
+
+    with open('{:s}/training/train.txt'.format(src_dir), 'w') as file:
+
+        for image_name in os.listdir(b_gt_image_dir):
+            if not image_name.endswith('.png'):
+                continue
+
+            binary_gt_image_path = ops.join(b_gt_image_dir, image_name)
+            instance_gt_image_path = ops.join(i_gt_image_dir, image_name)
+            image_path = ops.join(image_dir, image_name)
+
+            assert ops.exists(image_path), '{:s} not exist'.format(image_path)
+            assert ops.exists(instance_gt_image_path), '{:s} not exist'.format(instance_gt_image_path)
+
+            b_gt_image = cv2.imread(binary_gt_image_path, cv2.IMREAD_COLOR)
+            i_gt_image = cv2.imread(instance_gt_image_path, cv2.IMREAD_COLOR)
+            image = cv2.imread(image_path, cv2.IMREAD_COLOR)
+
+            if b_gt_image is None or image is None or i_gt_image is None:
+                print('图像对: {:s}损坏'.format(image_name))
+                continue
+            else:
+                info = '{:s} {:s} {:s}'.format(image_path, binary_gt_image_path, instance_gt_image_path)
+                file.write(info + '\n')
+    return
+
+
+def process_tusimple_dataset(src_dir):
+    """
+
+    :param src_dir:
+    :return:
+    """
+    traing_folder_path = ops.join(src_dir, 'training')
+    testing_folder_path = ops.join(src_dir, 'testing')
+
+    os.makedirs(traing_folder_path, exist_ok=True)
+    os.makedirs(testing_folder_path, exist_ok=True)
+
+    for json_label_path in glob.glob('{:s}/label*.json'.format(src_dir)):
+        json_label_name = ops.split(json_label_path)[1]
+
+        shutil.copyfile(json_label_path, ops.join(traing_folder_path, json_label_name))
+
+    for json_label_path in glob.glob('{:s}/test*.json'.format(src_dir)):
+        json_label_name = ops.split(json_label_path)[1]
+
+        shutil.copyfile(json_label_path, ops.join(testing_folder_path, json_label_name))
+
+    gt_image_dir = ops.join(traing_folder_path, 'gt_image')
+    gt_binary_dir = ops.join(traing_folder_path, 'gt_binary_image')
+    gt_instance_dir = ops.join(traing_folder_path, 'gt_instance_image')
+
+    os.makedirs(gt_image_dir, exist_ok=True)
+    os.makedirs(gt_binary_dir, exist_ok=True)
+    os.makedirs(gt_instance_dir, exist_ok=True)
+
+    for json_label_path in glob.glob('{:s}/*.json'.format(traing_folder_path)):
+        process_json_file(json_label_path, src_dir, gt_image_dir, gt_binary_dir, gt_instance_dir)
+
+    gen_train_sample(src_dir, gt_binary_dir, gt_instance_dir, gt_image_dir)
+
+    return
+
+
+if __name__ == '__main__':
+    args = init_args()
+
+    process_tusimple_dataset(args.src_dir)