diff --git a/point-cloud b/point-cloud deleted file mode 160000 index e4c429e..0000000 --- a/point-cloud +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e4c429e813608acbcf487656abe2eb87dcc4636c diff --git a/point-cloud/.DS_Store b/point-cloud/.DS_Store new file mode 100644 index 0000000..2ab131c Binary files /dev/null and b/point-cloud/.DS_Store differ diff --git a/point-cloud/.gitignore b/point-cloud/.gitignore new file mode 100644 index 0000000..5a80274 --- /dev/null +++ b/point-cloud/.gitignore @@ -0,0 +1,9 @@ +dataset +# cache +__pycache__ + +# results +results + +# logs +logs \ No newline at end of file diff --git a/point-cloud/.idea/deployment.xml b/point-cloud/.idea/deployment.xml new file mode 100644 index 0000000..a28531d --- /dev/null +++ b/point-cloud/.idea/deployment.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/point-cloud/.idea/inspectionProfiles/profiles_settings.xml b/point-cloud/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..20fc29e --- /dev/null +++ b/point-cloud/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/point-cloud/.idea/misc.xml b/point-cloud/.idea/misc.xml new file mode 100644 index 0000000..cbb5b0e --- /dev/null +++ b/point-cloud/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/point-cloud/.idea/modules.xml b/point-cloud/.idea/modules.xml new file mode 100644 index 0000000..34b8176 --- /dev/null +++ b/point-cloud/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/point-cloud/.idea/sfa3d.iml b/point-cloud/.idea/sfa3d.iml new file mode 100644 index 0000000..2946dc0 --- /dev/null +++ b/point-cloud/.idea/sfa3d.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/point-cloud/.idea/vcs.xml b/point-cloud/.idea/vcs.xml new file mode 100644 index 0000000..9661ac7 --- /dev/null +++ b/point-cloud/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/point-cloud/.idea/workspace.xml b/point-cloud/.idea/workspace.xml new file mode 100644 index 0000000..80ba82a --- /dev/null +++ b/point-cloud/.idea/workspace.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + 1661844398596 + + + + + + + \ No newline at end of file diff --git a/point-cloud/LICENSE b/point-cloud/LICENSE new file mode 100644 index 0000000..f35fbf5 --- /dev/null +++ b/point-cloud/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Nguyen Mau Dung + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/point-cloud/README.md b/point-cloud/README.md new file mode 100644 index 0000000..1bd2ab2 --- /dev/null +++ b/point-cloud/README.md @@ -0,0 +1,116 @@ +# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds + +[![python-image]][python-url] +[![pytorch-image]][pytorch-url] + +--- + +## 1. Getting Started +### 1.1 Requirement + +The instructions for setting up a virtual environment is [here](https://github.com/maudzung/virtual_environment_python3). + +```shell script +cd SFA3D/ +pip install -r requirements.txt +``` + +### 1.2 Data Preparation +Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). + +The downloaded data includes: + +- Velodyne point clouds _**(29 GB)**_ +- Training labels of object data set _**(5 MB)**_ + + + +Please make sure that you construct the source code & dataset directories structure as below. + +## 2. How to run + + +### 2.1 Inference + +The pre-trained model was pushed to this repo. +- **CPU** +``` +python inference.py --no_cuda=True +``` +- **GPU** +``` +python inference.py +``` +Label of inference + +- Pedestrian +- Car +- Cyclist + +### 2.2 Training +#### 2.2.1 CPU +``` +python train.py --no_cuda=True +``` + +#### 2.2.2 Single machine, single gpu + +```shell script +python train.py --gpu_idx 0 +``` + +#### 2.2.3 Distributed Data Parallel Training +- **Single machine (node), multiple GPUs** + +``` +python train.py --multiprocessing-distributed --world-size 1 --rank 0 --batch_size 64 --num_workers 8 +``` + +- **Two machines (two nodes), multiple GPUs** + + - _**First machine**_ + ``` + python train.py --dist-url 'tcp://IP_OF_NODE1:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 0 --batch_size 64 --num_workers 8 + ``` + + - _**Second machine**_ + ``` + python train.py --dist-url 'tcp://IP_OF_NODE2:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 1 --batch_size 64 --num_workers 8 + ``` + +## References +[1] SFA3D: [PyTorch Implementation](https://github.com/maudzung/SFA3D) + +## Folder structure +### Dataset +``` +└── kitti/ + ├── image_2/ (left color camera,非必须) + ├── calib/ (非必须) + ├── label_2/ (标注结果/标签,非必须) + └── velodyne/ (点云文件,必须) +``` +### Checkpoints & Algorithm +``` +${ROOT} +└── checkpoints/ + ├── fpn_resnet_18/ + ├── fpn_resnet_18_epoch_300.pth (点云目标检测标注模型) +└── sfa/ (点云标注算法) + ├── config/ + ├── data_process/ + ├── models/ + ├── utils/ + ├── inference.py + └── train.py +├── README.md +├── LICENSE +└── requirements.txt +``` + + + +[python-image]: https://img.shields.io/badge/Python-3.6-ff69b4.svg +[python-url]: https://www.python.org/ +[pytorch-image]: https://img.shields.io/badge/PyTorch-1.5-2BAF2B.svg +[pytorch-url]: https://pytorch.org/ diff --git a/point-cloud/Technical_details.md b/point-cloud/Technical_details.md new file mode 100644 index 0000000..759ae4a --- /dev/null +++ b/point-cloud/Technical_details.md @@ -0,0 +1,55 @@ +# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds + +--- + +Technical details of the implementation + + +## 1. Network architecture + +- The **ResNet-based Keypoint Feature Pyramid Network** (KFPN) that was proposed in [RTM3D paper](https://arxiv.org/pdf/2001.03343.pdf). +The unofficial implementation of the RTM3D paper by using PyTorch is [here](https://github.com/maudzung/RTM3D) +- **Input**: + - The model takes a birds-eye-view (BEV) map as input. + - The BEV map is encoded by height, intensity, and density of 3D LiDAR point clouds. Assume that the size of the BEV input is `(H, W, 3)`. + +- **Outputs**: + - Heatmap for main center with a size of `(H/S, W/S, C)` where `S=4` _(the down-sample ratio)_, and `C=3` _(the number of classes)_ + - Center offset: `(H/S, W/S, 2)` + - The heading angle _(yaw)_: `(H/S, W/S, 2)`. The model estimates the **im**aginary and the **re**al fraction (`sin(yaw)` and `cos(yaw)` values). + - Dimension _(h, w, l)_: `(H/S, W/S, 3)` + - `z` coordinate: `(H/S, W/S, 1)` + +- **Targets**: **7 degrees of freedom** _(7-DOF)_ of objects: `(cx, cy, cz, l, w, h, θ)` + - `cx, cy, cz`: The center coordinates. + - `l, w, h`: length, width, height of the bounding box. + - `θ`: The heading angle in radians of the bounding box. + +- **Objects**: Cars, Pedestrians, Cyclists. + +## 2. Losses function + +- For main center heatmap: Used `focal loss` + +- For heading angle _(yaw)_: The `im` and `re` fractions are directly regressed by using `l1_loss` + +- For `z coordinate` and `3 dimensions` (height, width, length), I used `balanced l1 loss` that was proposed by the paper + [Libra R-CNN: Towards Balanced Learning for Object Detection](https://arxiv.org/pdf/1904.02701.pdf) + +## 3. Training in details + +- Set uniform weights to the above components of losses. (`=1.0` for all) +- Number of epochs: 300. +- Learning rate scheduler: [`cosine`](https://arxiv.org/pdf/1812.01187.pdf), initial learning rate: 0.001. +- Batch size: `16` (on a single GTX 1080Ti). + +## 4. Inference + +- A `3 × 3` max-pooling operation was applied on the center heat map, then only `50` predictions whose +center confidences are larger than 0.2 were kept. +- The heading angle _(yaw)_ = `arctan`(_imaginary fraction_ / _real fraction_) + +## 5. How to expand the work + +- The model could be trained with more classes and with a larger detected area by modifying configurations in +the [config/kitti_dataset.py](https://github.com/maudzung/Super-Fast-Accurate-3D-Object-Detection/blob/master/src/config/kitti_config.py) file. \ No newline at end of file diff --git a/point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth b/point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth new file mode 100644 index 0000000..15ed4e2 Binary files /dev/null and b/point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth differ diff --git a/point-cloud/requirements.txt b/point-cloud/requirements.txt new file mode 100644 index 0000000..ac7fae2 --- /dev/null +++ b/point-cloud/requirements.txt @@ -0,0 +1,41 @@ +absl-py==1.1.0 +cachetools==4.2.4 +certifi==2022.6.15 +charset-normalizer==2.0.12 +cycler==0.11.0 +easydict==1.9 +future==0.18.2 +google-auth==1.35.0 +google-auth-oauthlib==0.4.6 +grpcio==1.46.3 +idna==3.3 +importlib-metadata==4.11.4 +joblib==1.1.0 +kiwisolver==1.4.3 +Markdown==3.3.7 +matplotlib==3.3.3 +numpy==1.18.3 +oauthlib==3.2.0 +opencv-python==4.2.0.34 +Pillow==8.4.0 +protobuf==3.19.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyparsing==3.0.9 +python-dateutil==2.8.2 +requests==2.28.0 +requests-oauthlib==1.3.1 +rsa==4.8 +scikit-learn==0.22.2 +scipy==1.8.1 +six==1.16.0 +tensorboard==2.2.1 +tensorboard-plugin-wit==1.8.1 +torch==1.5.0 +torchsummary==1.5.1 +torchvision==0.6.0 +tqdm==4.54.0 +urllib3==1.26.9 +Werkzeug==2.1.2 +wget==3.2 +zipp==3.8.0 diff --git a/point-cloud/sfa/config/__init__.py b/point-cloud/sfa/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point-cloud/sfa/config/kitti_config.py b/point-cloud/sfa/config/kitti_config.py new file mode 100644 index 0000000..436616a --- /dev/null +++ b/point-cloud/sfa/config/kitti_config.py @@ -0,0 +1,99 @@ +import math + +import numpy as np + +# Car and Van ==> Car class +# Pedestrian and Person_Sitting ==> Pedestrian Class +# for train +CLASS_NAME_TO_ID = { + 'Pedestrian': 0, + 'Car': 1, + 'Cyclist': 2, + 'Van': 1, + 'Truck': -3, + 'Person_sitting': 0, + 'Tram': -99, + 'Misc': -99, + 'TraffiCone': -1, + 'DontCare': -1 +} + +# for test +CLASS_ID_TO_NAME = { + 0: 'Pedestrian', # Person_sitting in the same class + 1: 'Car', # Van in the same class + 2: 'Cyclist' +} + + +colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0], [255, 120, 0], + [255, 120, 120], [0, 120, 0], [120, 255, 255], [120, 0, 255]] + +##################################################################################### +boundary = { + "minX": -50, + "maxX": 50, + "minY": -25, + "maxY": 25, + "minZ": -2.73, + "maxZ": 1.27 +} + +bound_size_x = boundary['maxX'] - boundary['minX'] +bound_size_y = boundary['maxY'] - boundary['minY'] +bound_size_z = boundary['maxZ'] - boundary['minZ'] + +boundary_back = { + "minX": -50, + "maxX": 0, + "minY": -25, + "maxY": 25, + "minZ": -2.73, + "maxZ": 1.27 +} + +BEV_WIDTH = 608 # across y axis -25m ~ 25m +BEV_HEIGHT = 1216 # across x axis 0m ~ 50m +DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT +DISCRETIZATION_Y = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT +DISCRETIZATION_X = (boundary["maxY"] - boundary["minY"]) / BEV_WIDTH +# maximum number of points per voxel +T = 35 + +# voxel size +vd = 0.1 # z +vh = 0.05 # y +vw = 0.05 # x + +# voxel grid +W = math.ceil(bound_size_x / vw) +H = math.ceil(bound_size_y / vh) +D = math.ceil(bound_size_z / vd) + +# Following parameters are calculated as an average from KITTI dataset for simplicity +##################################################################################### +Tr_velo_to_cam = np.array([ + [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03], + [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02], + [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01], + [0, 0, 0, 1] +]) + +# cal mean from train set +R0 = np.array([ + [0.99992475, 0.00975976, -0.00734152, 0], + [-0.0097913, 0.99994262, -0.00430371, 0], + [0.00729911, 0.0043753, 0.99996319, 0], + [0, 0, 0, 1] +]) + +P2 = np.array([[719.787081, 0., 608.463003, 44.9538775], + [0., 719.787081, 174.545111, 0.1066855], + [0., 0., 1., 3.0106472e-03], + [0., 0., 0., 0] + ]) + +R0_inv = np.linalg.inv(R0) +Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam) +P2_inv = np.linalg.pinv(P2) +##################################################################################### diff --git a/point-cloud/sfa/config/train_config.py b/point-cloud/sfa/config/train_config.py new file mode 100644 index 0000000..4e4a971 --- /dev/null +++ b/point-cloud/sfa/config/train_config.py @@ -0,0 +1,172 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.08.17 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: The configurations of the project will be defined here +""" + +import os +import argparse + +import torch +from easydict import EasyDict as edict + + +def parse_train_configs(): + parser = argparse.ArgumentParser(description='The Implementation using PyTorch') + parser.add_argument('--seed', type=int, default=2020, + help='re-produce the results with seed random') + parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', + help='The name using for saving logs, models,...') + + parser.add_argument('--root_dir', type=str, default='../', metavar='PATH', + help='The ROOT working directory') + #################################################################### + ############## Model configs ######################## + #################################################################### + parser.add_argument('--arch', type=str, default='fpn_resnet_18', metavar='ARCH', + help='The name of the model architecture') + parser.add_argument('--model_load_dir', type=str, default=None, metavar='PATH', + help='the path of the pretrained checkpoint') + + #################################################################### + ############## Dataloader and Running configs ####### + #################################################################### + parser.add_argument('--data_url', type=str, default='../dataset/apollo/training', metavar='PATH', + help='the path of the dataset') + parser.add_argument('--val_data_url', type=str, default='../dataset/apollo/val', metavar='PATH', + help='the path of the dataset') + parser.add_argument('--train_model_out', type=str, default='../checkpoints', metavar='PATH', + help='the path of the model output') + parser.add_argument('--train_out', type=str, default='../logs', metavar='PATH', + help='the path of the logs output') + parser.add_argument('--hflip_prob', type=float, default=0.5, + help='The probability of horizontal flip') + parser.add_argument('--no-val', action='store_true', + help='If true, dont evaluate the model on the val set') + parser.add_argument('--num_samples', type=int, default=None, + help='Take a subset of the dataset to run and debug') + parser.add_argument('--num_workers', type=int, default=4, + help='Number of threads for loading data') + parser.add_argument('--batch_size', type=int, default=8, + help='mini-batch size (default: 16), this is the total' + 'batch size of all GPUs on the current node when using' + 'Data Parallel or Distributed Data Parallel') + parser.add_argument('--print_freq', type=int, default=50, metavar='N', + help='print frequency (default: 50)') + parser.add_argument('--tensorboard_freq', type=int, default=50, metavar='N', + help='frequency of saving tensorboard (default: 50)') + parser.add_argument('--checkpoint_freq', type=int, default=2, metavar='N', + help='frequency of saving checkpoints (default: 5)') + parser.add_argument('--gpu_num_per_node', type=int, default=1, + help='Number of GPU') + #################################################################### + ############## Training strategy #################### + #################################################################### + + parser.add_argument('--start_epoch', type=int, default=1, metavar='N', + help='the starting epoch') + parser.add_argument('--num_epochs', type=int, default=300, metavar='N', + help='number of total epochs to run') + parser.add_argument('--lr_type', type=str, default='cosin', + help='the type of learning rate scheduler (cosin or multi_step or one_cycle)') + parser.add_argument('--lr', type=float, default=0.001, metavar='LR', + help='initial learning rate') + parser.add_argument('--minimum_lr', type=float, default=1e-7, metavar='MIN_LR', + help='minimum learning rate during training') + parser.add_argument('--momentum', type=float, default=0.949, metavar='M', + help='momentum') + parser.add_argument('-wd', '--weight_decay', type=float, default=0., metavar='WD', + help='weight decay (default: 0.)') + parser.add_argument('--optimizer_type', type=str, default='adam', metavar='OPTIMIZER', + help='the type of optimizer, it can be sgd or adam') + parser.add_argument('--steps', nargs='*', default=[150, 180], + help='number of burn in step') + + #################################################################### + ############## Loss weight ########################## + #################################################################### + + #################################################################### + ############## Distributed Data Parallel ############ + #################################################################### + parser.add_argument('--world-size', default=-1, type=int, metavar='N', + help='number of nodes for distributed training') + parser.add_argument('--rank', default=-1, type=int, metavar='N', + help='node rank for distributed training') + parser.add_argument('--dist-url', default='tcp://127.0.0.1:29500', type=str, + help='url used to set up distributed training') + parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') + parser.add_argument('--gpu_idx', default=0, type=int, + help='GPU index to use.') + parser.add_argument('--no_cuda', default= False, + help='If true, cuda is not used.') + parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') + #################################################################### + ############## Evaluation configurations ################### + #################################################################### + parser.add_argument('--evaluate', action='store_true', + help='only evaluate the model, not training') + parser.add_argument('--resume_path', type=str, default=None, metavar='PATH', + help='the path of the resumed checkpoint') + parser.add_argument('--K', type=int, default=50, + help='the number of top K') + + configs = edict(vars(parser.parse_args())) + + #################################################################### + ############## Hardware configurations ############################# + #################################################################### + # configs.device = torch.device('cpu' if configs.no_cuda else 'cuda') + configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) + configs.ngpus_per_node = torch.cuda.device_count() + + configs.pin_memory = True + configs.input_size = (1216, 608) + configs.hm_size = (304, 152) + configs.down_ratio = 4 + configs.max_objects = 50 + + configs.imagenet_pretrained = True + configs.head_conv = 64 + configs.num_classes = 3 + configs.num_center_offset = 2 + configs.num_z = 1 + configs.num_dim = 3 + configs.num_direction = 2 # sin, cos + + configs.heads = { + 'hm_cen': configs.num_classes, + 'cen_offset': configs.num_center_offset, + 'direction': configs.num_direction, + 'z_coor': configs.num_z, + 'dim': configs.num_dim + } + + configs.num_input_features = 4 + + #################################################################### + ############## Dataset, logs, Checkpoints dir ###################### + #################################################################### + configs.dataset = 'apollo' # or kitti + configs.dataset_dir = configs.data_url + # configs.checkpoints_dir = os.path.join(configs.train_model_out, configs.saved_fn) + configs.checkpoints_dir = configs.train_model_out + # configs.logs_dir = os.path.join(configs.train_out, configs.saved_fn) + configs.logs_dir = configs.train_out + configs.pretrained_path = configs.model_load_dir + + if not os.path.isdir(configs.checkpoints_dir): + os.makedirs(configs.checkpoints_dir) + if not os.path.isdir(configs.logs_dir): + os.makedirs(configs.logs_dir) + + return configs diff --git a/point-cloud/sfa/data_process/__init__.py b/point-cloud/sfa/data_process/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/point-cloud/sfa/data_process/demo_dataset.py b/point-cloud/sfa/data_process/demo_dataset.py new file mode 100644 index 0000000..0c78c84 --- /dev/null +++ b/point-cloud/sfa/data_process/demo_dataset.py @@ -0,0 +1,99 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.08.17 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: This script for the KITTI dataset +""" + +import sys +import os +from builtins import int +from glob import glob + +import numpy as np +from torch.utils.data import Dataset +import cv2 +import torch + +src_dir = os.path.dirname(os.path.realpath(__file__)) +# while not src_dir.endswith("sfa"): +# src_dir = os.path.dirname(src_dir) +if src_dir not in sys.path: + sys.path.append(src_dir) + +from data_process.kitti_data_utils import get_filtered_lidar +from data_process.kitti_bev_utils import makeBEVMap +import config.kitti_config as cnf + + +class Demo_KittiDataset(Dataset): + def __init__(self, configs): + self.dataset_dir = os.path.join(configs.dataset_dir, configs.foldername, configs.foldername[:10], + configs.foldername) + self.input_size = configs.input_size + self.hm_size = configs.hm_size + + self.num_classes = configs.num_classes + self.max_objects = configs.max_objects + + self.image_dir = os.path.join(self.dataset_dir, "image_02", "data") + self.lidar_dir = os.path.join(self.dataset_dir, "velodyne_points", "data") + self.label_dir = os.path.join(self.dataset_dir, "label_2", "data") + self.sample_id_list = sorted(glob(os.path.join(self.lidar_dir, '*.bin'))) + self.sample_id_list = [float(os.path.basename(fn)[:-4]) for fn in self.sample_id_list] + self.num_samples = len(self.sample_id_list) + + def __len__(self): + return len(self.sample_id_list) + + def __getitem__(self, index): + pass + + def load_bevmap_front(self, index): + """Load only image for the testing phase""" + sample_id = int(self.sample_id_list[index]) + img_path, img_rgb = self.get_image(sample_id) + lidarData = self.get_lidar(sample_id) + front_lidar = get_filtered_lidar(lidarData, cnf.boundary) + front_bevmap = makeBEVMap(front_lidar, cnf.boundary) + front_bevmap = torch.from_numpy(front_bevmap) + + metadatas = { + 'img_path': img_path, + } + + return metadatas, front_bevmap, img_rgb + + def load_bevmap_front_vs_back(self, index): + """Load only image for the testing phase""" + sample_id = int(self.sample_id_list[index]) + img_path, img_rgb = self.get_image(sample_id) + lidarData = self.get_lidar(sample_id) + + front_lidar = get_filtered_lidar(lidarData, cnf.boundary) + front_bevmap = makeBEVMap(front_lidar, cnf.boundary) + front_bevmap = torch.from_numpy(front_bevmap) + + back_lidar = get_filtered_lidar(lidarData, cnf.boundary_back) + back_bevmap = makeBEVMap(back_lidar, cnf.boundary_back) + back_bevmap = torch.from_numpy(back_bevmap) + + metadatas = { + 'img_path': img_path, + } + + return metadatas, front_bevmap, back_bevmap, img_rgb + + def get_image(self, idx): + img_path = os.path.join(self.image_dir, '{:010d}.png'.format(idx)) + img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) + + return img_path, img + + def get_lidar(self, idx): + lidar_file = os.path.join(self.lidar_dir, '{:010d}.bin'.format(idx)) + # assert os.path.isfile(lidar_file) + return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) diff --git a/point-cloud/sfa/data_process/kitti_bev_utils.py b/point-cloud/sfa/data_process/kitti_bev_utils.py new file mode 100644 index 0000000..b25f79a --- /dev/null +++ b/point-cloud/sfa/data_process/kitti_bev_utils.py @@ -0,0 +1,98 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +""" + +import math +import os +import sys + +import cv2 +import numpy as np +src_dir = os.path.dirname(os.path.realpath(__file__)) +# while not src_dir.endswith("sfa"): +# src_dir = os.path.dirname(src_dir) +if src_dir not in sys.path: + sys.path.append(src_dir) + +import config.kitti_config as cnf + + +def makeBEVMap(PointCloud_, boundary): + Height = cnf.BEV_HEIGHT + 1 + Width = cnf.BEV_WIDTH + 1 + + # Discretize Feature Map + PointCloud = np.copy(PointCloud_) + # PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION)) + # PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION) + Width / 2) + + # 针对Apollo数据集,检测360° + PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION_Y) + Height / 2) + PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION_X) + Width / 2) + + # sort-3times + indices = np.lexsort((-PointCloud[:, 2], PointCloud[:, 1], PointCloud[:, 0])) + PointCloud = PointCloud[indices] + + # Height Map + heightMap = np.zeros((Height, Width)) + + _, indices = np.unique(PointCloud[:, 0:2], axis=0, return_index=True) + PointCloud_frac = PointCloud[indices] + + # some important problem is image coordinate is (y,x), not (x,y) + max_height = float(np.abs(boundary['maxZ'] - boundary['minZ'])) + + heightMap[np.int_(PointCloud_frac[:, 0]), np.int_(PointCloud_frac[:, 1])] = PointCloud_frac[:, 2] / max_height #(1217,609) + # Intensity Map & DensityMap + intensityMap = np.zeros((Height, Width)) + densityMap = np.zeros((Height, Width)) + + _, indices, counts = np.unique(PointCloud[:, 0:2], axis=0, return_index=True, return_counts=True) + PointCloud_top = PointCloud[indices] + + normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64)) + + intensityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = PointCloud_top[:, 3] / 255.0 # hesai40p的反射强度0~255 + densityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = normalizedCounts + RGB_Map = np.zeros((3, Height - 1, Width - 1)) + RGB_Map[2, :, :] = densityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # r_map + RGB_Map[1, :, :] = heightMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # g_map + RGB_Map[0, :, :] = intensityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # b_map + + return RGB_Map + + +# bev image coordinates format +def get_corners(x, y, w, l, yaw): + bev_corners = np.zeros((4, 2), dtype=np.float32) + cos_yaw = np.cos(yaw) + sin_yaw = np.sin(yaw) + # front left + bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw + + # rear left + bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw + + # rear right + bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw + bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw + + # front right + bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw + bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw + + return bev_corners + + +def drawRotatedBox(img, x, y, w, l, yaw, color): + img_cp = img.copy() + bev_corners = get_corners(x, y, w, l, yaw) + corners_int = bev_corners.reshape(-1, 1, 2).astype(int) + cv2.polylines(img, [corners_int], True, color, 2) + corners_int = bev_corners.reshape(-1, 2) + cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2) + # return img_cp diff --git a/point-cloud/sfa/data_process/kitti_data_utils.py b/point-cloud/sfa/data_process/kitti_data_utils.py new file mode 100644 index 0000000..12fdb3b --- /dev/null +++ b/point-cloud/sfa/data_process/kitti_data_utils.py @@ -0,0 +1,324 @@ +""" +# -*- coding: utf-8 -*- +----------------------------------------------------------------------------------- +# Author: Nguyen Mau Dung +# DoC: 2020.08.17 +# email: nguyenmaudung93.kstn@gmail.com +----------------------------------------------------------------------------------- +# Description: The utils of the kitti dataset +""" + +from __future__ import print_function +import os +import sys + +import numpy as np +import cv2 + +src_dir = os.path.dirname(os.path.realpath(__file__)) +# while not src_dir.endswith("sfa"): +# src_dir = os.path.dirname(src_dir) +if src_dir not in sys.path: + sys.path.append(src_dir) + +import config.kitti_config as cnf + + +class Object3d(object): + ''' 3d object label ''' + + def __init__(self, label_file_line): + data = label_file_line.split(' ') + data[1:] = [float(x) for x in data[1:]] + # extract label, truncation, occlusion + self.type = data[0] # 'Car', 'Pedestrian', ... + self.cls_id = self.cls_type_to_id(self.type) + self.truncation = data[1] # truncated pixel ratio [0..1] + self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown + self.alpha = data[3] # object observation angle [-pi..pi] + + # extract 2d bounding box in 0-based coordinates + self.xmin = data[4] # left + self.ymin = data[5] # top + self.xmax = data[6] # right + self.ymax = data[7] # bottom + self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) + + # extract 3d bounding box information + self.h = data[8] # box height + self.w = data[9] # box width + self.l = data[10] # box length (in meters) + self.t = (data[11], data[12], data[13]) # location (x,y,z) in camera coord. + self.dis_to_cam = np.linalg.norm(self.t) + self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi] + self.score = data[15] if data.__len__() == 16 else -1.0 + self.level_str = None + self.level = self.get_obj_level() + + def cls_type_to_id(self, cls_type): + if cls_type not in cnf.CLASS_NAME_TO_ID.keys(): + return -1 + + return cnf.CLASS_NAME_TO_ID[cls_type] + + def get_obj_level(self): + height = float(self.box2d[3]) - float(self.box2d[1]) + 1 + + if height >= 40 and self.truncation <= 0.15 and self.occlusion <= 0: + self.level_str = 'Easy' + return 1 # Easy + elif height >= 25 and self.truncation <= 0.3 and self.occlusion <= 1: + self.level_str = 'Moderate' + return 2 # Moderate + elif height >= 25 and self.truncation <= 0.5 and self.occlusion <= 2: + self.level_str = 'Hard' + return 3 # Hard + else: + self.level_str = 'UnKnown' + return 4 + + def print_object(self): + print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \ + (self.type, self.truncation, self.occlusion, self.alpha)) + print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \ + (self.xmin, self.ymin, self.xmax, self.ymax)) + print('3d bbox h,w,l: %f, %f, %f' % \ + (self.h, self.w, self.l)) + print('3d bbox location, ry: (%f, %f, %f), %f' % \ + (self.t[0], self.t[1], self.t[2], self.ry)) + + def to_kitti_format(self): + kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \ + % (self.type, self.truncation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1], + self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.t[0], self.t[1], self.t[2], + self.ry, self.score) + return kitti_str + + +def read_label(label_filename): + lines = [line.rstrip() for line in open(label_filename)] + objects = [Object3d(line) for line in lines] + return objects + + +class Calibration(object): + ''' Calibration matrices and utils + 3d XYZ in