Browse Source

add point-cloud

pull/13/MERGE
enlin 2 years ago
parent
commit
e5dd57508b
42 changed files with 4556 additions and 1 deletions
  1. +0
    -1
      point-cloud
  2. BIN
      point-cloud/.DS_Store
  3. +9
    -0
      point-cloud/.gitignore
  4. +14
    -0
      point-cloud/.idea/deployment.xml
  5. +6
    -0
      point-cloud/.idea/inspectionProfiles/profiles_settings.xml
  6. +7
    -0
      point-cloud/.idea/misc.xml
  7. +8
    -0
      point-cloud/.idea/modules.xml
  8. +12
    -0
      point-cloud/.idea/sfa3d.iml
  9. +6
    -0
      point-cloud/.idea/vcs.xml
  10. +49
    -0
      point-cloud/.idea/workspace.xml
  11. +21
    -0
      point-cloud/LICENSE
  12. +116
    -0
      point-cloud/README.md
  13. +55
    -0
      point-cloud/Technical_details.md
  14. BIN
      point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth
  15. +41
    -0
      point-cloud/requirements.txt
  16. +0
    -0
      point-cloud/sfa/config/__init__.py
  17. +99
    -0
      point-cloud/sfa/config/kitti_config.py
  18. +172
    -0
      point-cloud/sfa/config/train_config.py
  19. +0
    -0
      point-cloud/sfa/data_process/__init__.py
  20. +99
    -0
      point-cloud/sfa/data_process/demo_dataset.py
  21. +98
    -0
      point-cloud/sfa/data_process/kitti_bev_utils.py
  22. +324
    -0
      point-cloud/sfa/data_process/kitti_data_utils.py
  23. +67
    -0
      point-cloud/sfa/data_process/kitti_dataloader.py
  24. +335
    -0
      point-cloud/sfa/data_process/kitti_dataset.py
  25. +426
    -0
      point-cloud/sfa/data_process/transformation.py
  26. +378
    -0
      point-cloud/sfa/inference.py
  27. +0
    -0
      point-cloud/sfa/losses/__init__.py
  28. +163
    -0
      point-cloud/sfa/losses/losses.py
  29. +0
    -0
      point-cloud/sfa/models/__init__.py
  30. +252
    -0
      point-cloud/sfa/models/fpn_resnet.py
  31. +134
    -0
      point-cloud/sfa/models/model_utils.py
  32. +284
    -0
      point-cloud/sfa/models/resnet.py
  33. +290
    -0
      point-cloud/sfa/train.py
  34. +0
    -0
      point-cloud/sfa/utils/__init__.py
  35. +137
    -0
      point-cloud/sfa/utils/demo_utils.py
  36. +183
    -0
      point-cloud/sfa/utils/evaluation_utils.py
  37. +49
    -0
      point-cloud/sfa/utils/logger.py
  38. +312
    -0
      point-cloud/sfa/utils/lr_scheduler.py
  39. +71
    -0
      point-cloud/sfa/utils/misc.py
  40. +45
    -0
      point-cloud/sfa/utils/torch_utils.py
  41. +140
    -0
      point-cloud/sfa/utils/train_utils.py
  42. +154
    -0
      point-cloud/sfa/utils/visualization_utils.py

+ 0
- 1
point-cloud

@@ -1 +0,0 @@
Subproject commit e4c429e813608acbcf487656abe2eb87dcc4636c

BIN
point-cloud/.DS_Store View File


+ 9
- 0
point-cloud/.gitignore View File

@@ -0,0 +1,9 @@
dataset
# cache
__pycache__
# results
results
# logs
logs

+ 14
- 0
point-cloud/.idea/deployment.xml View File

@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData">
<serverData>
<paths name="root@10.5.24.134:10000">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
</component>
</project>

+ 6
- 0
point-cloud/.idea/inspectionProfiles/profiles_settings.xml View File

@@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

+ 7
- 0
point-cloud/.idea/misc.xml View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
</project>

+ 8
- 0
point-cloud/.idea/modules.xml View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/sfa3d.iml" filepath="$PROJECT_DIR$/.idea/sfa3d.iml" />
</modules>
</component>
</project>

+ 12
- 0
point-cloud/.idea/sfa3d.iml View File

@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

+ 6
- 0
point-cloud/.idea/vcs.xml View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

+ 49
- 0
point-cloud/.idea/workspace.xml View File

@@ -0,0 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_BRANCH_BY_REPOSITORY">
<map>
<entry key="$PROJECT_DIR$" value="master" />
</map>
</option>
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="ProjectId" id="2E4AHz6idZOBGHdHApv98dU5PkK" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" />
<created>1661844398596</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1661844398596</updated>
</task>
<servers />
</component>
<component name="Vcs.Log.Tabs.Properties">
<option name="TAB_STATES">
<map>
<entry key="MAIN">
<value>
<State />
</value>
</entry>
</map>
</option>
</component>
</project>

+ 21
- 0
point-cloud/LICENSE View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2020 Nguyen Mau Dung
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

+ 116
- 0
point-cloud/README.md View File

@@ -0,0 +1,116 @@
# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds
[![python-image]][python-url]
[![pytorch-image]][pytorch-url]
---
## 1. Getting Started
### 1.1 Requirement
The instructions for setting up a virtual environment is [here](https://github.com/maudzung/virtual_environment_python3).
```shell script
cd SFA3D/
pip install -r requirements.txt
```
### 1.2 Data Preparation
Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d).
The downloaded data includes:
- Velodyne point clouds _**(29 GB)**_
- Training labels of object data set _**(5 MB)**_
Please make sure that you construct the source code & dataset directories structure as below.
## 2. How to run
### 2.1 Inference
The pre-trained model was pushed to this repo.
- **CPU**
```
python inference.py --no_cuda=True
```
- **GPU**
```
python inference.py
```
Label of inference
- Pedestrian
- Car
- Cyclist
### 2.2 Training
#### 2.2.1 CPU
```
python train.py --no_cuda=True
```
#### 2.2.2 Single machine, single gpu
```shell script
python train.py --gpu_idx 0
```
#### 2.2.3 Distributed Data Parallel Training
- **Single machine (node), multiple GPUs**
```
python train.py --multiprocessing-distributed --world-size 1 --rank 0 --batch_size 64 --num_workers 8
```
- **Two machines (two nodes), multiple GPUs**
- _**First machine**_
```
python train.py --dist-url 'tcp://IP_OF_NODE1:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 0 --batch_size 64 --num_workers 8
```
- _**Second machine**_
```
python train.py --dist-url 'tcp://IP_OF_NODE2:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 1 --batch_size 64 --num_workers 8
```
## References
[1] SFA3D: [PyTorch Implementation](https://github.com/maudzung/SFA3D)
## Folder structure
### Dataset
```
└── kitti/
├── image_2/ (left color camera,非必须)
├── calib/ (非必须)
├── label_2/ (标注结果/标签,非必须)
└── velodyne/ (点云文件,必须)
```
### Checkpoints & Algorithm
```
${ROOT}
└── checkpoints/
├── fpn_resnet_18/
├── fpn_resnet_18_epoch_300.pth (点云目标检测标注模型)
└── sfa/ (点云标注算法)
├── config/
├── data_process/
├── models/
├── utils/
├── inference.py
└── train.py
├── README.md
├── LICENSE
└── requirements.txt
```
[python-image]: https://img.shields.io/badge/Python-3.6-ff69b4.svg
[python-url]: https://www.python.org/
[pytorch-image]: https://img.shields.io/badge/PyTorch-1.5-2BAF2B.svg
[pytorch-url]: https://pytorch.org/

+ 55
- 0
point-cloud/Technical_details.md View File

@@ -0,0 +1,55 @@
# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds
---
Technical details of the implementation
## 1. Network architecture
- The **ResNet-based Keypoint Feature Pyramid Network** (KFPN) that was proposed in [RTM3D paper](https://arxiv.org/pdf/2001.03343.pdf).
The unofficial implementation of the RTM3D paper by using PyTorch is [here](https://github.com/maudzung/RTM3D)
- **Input**:
- The model takes a birds-eye-view (BEV) map as input.
- The BEV map is encoded by height, intensity, and density of 3D LiDAR point clouds. Assume that the size of the BEV input is `(H, W, 3)`.
- **Outputs**:
- Heatmap for main center with a size of `(H/S, W/S, C)` where `S=4` _(the down-sample ratio)_, and `C=3` _(the number of classes)_
- Center offset: `(H/S, W/S, 2)`
- The heading angle _(yaw)_: `(H/S, W/S, 2)`. The model estimates the **im**aginary and the **re**al fraction (`sin(yaw)` and `cos(yaw)` values).
- Dimension _(h, w, l)_: `(H/S, W/S, 3)`
- `z` coordinate: `(H/S, W/S, 1)`
- **Targets**: **7 degrees of freedom** _(7-DOF)_ of objects: `(cx, cy, cz, l, w, h, θ)`
- `cx, cy, cz`: The center coordinates.
- `l, w, h`: length, width, height of the bounding box.
- `θ`: The heading angle in radians of the bounding box.
- **Objects**: Cars, Pedestrians, Cyclists.
## 2. Losses function
- For main center heatmap: Used `focal loss`
- For heading angle _(yaw)_: The `im` and `re` fractions are directly regressed by using `l1_loss`
- For `z coordinate` and `3 dimensions` (height, width, length), I used `balanced l1 loss` that was proposed by the paper
[Libra R-CNN: Towards Balanced Learning for Object Detection](https://arxiv.org/pdf/1904.02701.pdf)
## 3. Training in details
- Set uniform weights to the above components of losses. (`=1.0` for all)
- Number of epochs: 300.
- Learning rate scheduler: [`cosine`](https://arxiv.org/pdf/1812.01187.pdf), initial learning rate: 0.001.
- Batch size: `16` (on a single GTX 1080Ti).
## 4. Inference
- A `3 × 3` max-pooling operation was applied on the center heat map, then only `50` predictions whose
center confidences are larger than 0.2 were kept.
- The heading angle _(yaw)_ = `arctan`(_imaginary fraction_ / _real fraction_)
## 5. How to expand the work
- The model could be trained with more classes and with a larger detected area by modifying configurations in
the [config/kitti_dataset.py](https://github.com/maudzung/Super-Fast-Accurate-3D-Object-Detection/blob/master/src/config/kitti_config.py) file.

BIN
point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth View File


+ 41
- 0
point-cloud/requirements.txt View File

@@ -0,0 +1,41 @@
absl-py==1.1.0
cachetools==4.2.4
certifi==2022.6.15
charset-normalizer==2.0.12
cycler==0.11.0
easydict==1.9
future==0.18.2
google-auth==1.35.0
google-auth-oauthlib==0.4.6
grpcio==1.46.3
idna==3.3
importlib-metadata==4.11.4
joblib==1.1.0
kiwisolver==1.4.3
Markdown==3.3.7
matplotlib==3.3.3
numpy==1.18.3
oauthlib==3.2.0
opencv-python==4.2.0.34
Pillow==8.4.0
protobuf==3.19.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyparsing==3.0.9
python-dateutil==2.8.2
requests==2.28.0
requests-oauthlib==1.3.1
rsa==4.8
scikit-learn==0.22.2
scipy==1.8.1
six==1.16.0
tensorboard==2.2.1
tensorboard-plugin-wit==1.8.1
torch==1.5.0
torchsummary==1.5.1
torchvision==0.6.0
tqdm==4.54.0
urllib3==1.26.9
Werkzeug==2.1.2
wget==3.2
zipp==3.8.0

+ 0
- 0
point-cloud/sfa/config/__init__.py View File


+ 99
- 0
point-cloud/sfa/config/kitti_config.py View File

@@ -0,0 +1,99 @@
import math
import numpy as np
# Car and Van ==> Car class
# Pedestrian and Person_Sitting ==> Pedestrian Class
# for train
CLASS_NAME_TO_ID = {
'Pedestrian': 0,
'Car': 1,
'Cyclist': 2,
'Van': 1,
'Truck': -3,
'Person_sitting': 0,
'Tram': -99,
'Misc': -99,
'TraffiCone': -1,
'DontCare': -1
}
# for test
CLASS_ID_TO_NAME = {
0: 'Pedestrian', # Person_sitting in the same class
1: 'Car', # Van in the same class
2: 'Cyclist'
}
colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0], [255, 120, 0],
[255, 120, 120], [0, 120, 0], [120, 255, 255], [120, 0, 255]]
#####################################################################################
boundary = {
"minX": -50,
"maxX": 50,
"minY": -25,
"maxY": 25,
"minZ": -2.73,
"maxZ": 1.27
}
bound_size_x = boundary['maxX'] - boundary['minX']
bound_size_y = boundary['maxY'] - boundary['minY']
bound_size_z = boundary['maxZ'] - boundary['minZ']
boundary_back = {
"minX": -50,
"maxX": 0,
"minY": -25,
"maxY": 25,
"minZ": -2.73,
"maxZ": 1.27
}
BEV_WIDTH = 608 # across y axis -25m ~ 25m
BEV_HEIGHT = 1216 # across x axis 0m ~ 50m
DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT
DISCRETIZATION_Y = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT
DISCRETIZATION_X = (boundary["maxY"] - boundary["minY"]) / BEV_WIDTH
# maximum number of points per voxel
T = 35
# voxel size
vd = 0.1 # z
vh = 0.05 # y
vw = 0.05 # x
# voxel grid
W = math.ceil(bound_size_x / vw)
H = math.ceil(bound_size_y / vh)
D = math.ceil(bound_size_z / vd)
# Following parameters are calculated as an average from KITTI dataset for simplicity
#####################################################################################
Tr_velo_to_cam = np.array([
[7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03],
[1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02],
[9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01],
[0, 0, 0, 1]
])
# cal mean from train set
R0 = np.array([
[0.99992475, 0.00975976, -0.00734152, 0],
[-0.0097913, 0.99994262, -0.00430371, 0],
[0.00729911, 0.0043753, 0.99996319, 0],
[0, 0, 0, 1]
])
P2 = np.array([[719.787081, 0., 608.463003, 44.9538775],
[0., 719.787081, 174.545111, 0.1066855],
[0., 0., 1., 3.0106472e-03],
[0., 0., 0., 0]
])
R0_inv = np.linalg.inv(R0)
Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam)
P2_inv = np.linalg.pinv(P2)
#####################################################################################

+ 172
- 0
point-cloud/sfa/config/train_config.py View File

@@ -0,0 +1,172 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: The configurations of the project will be defined here
"""
import os
import argparse
import torch
from easydict import EasyDict as edict
def parse_train_configs():
parser = argparse.ArgumentParser(description='The Implementation using PyTorch')
parser.add_argument('--seed', type=int, default=2020,
help='re-produce the results with seed random')
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN',
help='The name using for saving logs, models,...')
parser.add_argument('--root_dir', type=str, default='../', metavar='PATH',
help='The ROOT working directory')
####################################################################
############## Model configs ########################
####################################################################
parser.add_argument('--arch', type=str, default='fpn_resnet_18', metavar='ARCH',
help='The name of the model architecture')
parser.add_argument('--model_load_dir', type=str, default=None, metavar='PATH',
help='the path of the pretrained checkpoint')
####################################################################
############## Dataloader and Running configs #######
####################################################################
parser.add_argument('--data_url', type=str, default='../dataset/apollo/training', metavar='PATH',
help='the path of the dataset')
parser.add_argument('--val_data_url', type=str, default='../dataset/apollo/val', metavar='PATH',
help='the path of the dataset')
parser.add_argument('--train_model_out', type=str, default='../checkpoints', metavar='PATH',
help='the path of the model output')
parser.add_argument('--train_out', type=str, default='../logs', metavar='PATH',
help='the path of the logs output')
parser.add_argument('--hflip_prob', type=float, default=0.5,
help='The probability of horizontal flip')
parser.add_argument('--no-val', action='store_true',
help='If true, dont evaluate the model on the val set')
parser.add_argument('--num_samples', type=int, default=None,
help='Take a subset of the dataset to run and debug')
parser.add_argument('--num_workers', type=int, default=4,
help='Number of threads for loading data')
parser.add_argument('--batch_size', type=int, default=8,
help='mini-batch size (default: 16), this is the total'
'batch size of all GPUs on the current node when using'
'Data Parallel or Distributed Data Parallel')
parser.add_argument('--print_freq', type=int, default=50, metavar='N',
help='print frequency (default: 50)')
parser.add_argument('--tensorboard_freq', type=int, default=50, metavar='N',
help='frequency of saving tensorboard (default: 50)')
parser.add_argument('--checkpoint_freq', type=int, default=2, metavar='N',
help='frequency of saving checkpoints (default: 5)')
parser.add_argument('--gpu_num_per_node', type=int, default=1,
help='Number of GPU')
####################################################################
############## Training strategy ####################
####################################################################
parser.add_argument('--start_epoch', type=int, default=1, metavar='N',
help='the starting epoch')
parser.add_argument('--num_epochs', type=int, default=300, metavar='N',
help='number of total epochs to run')
parser.add_argument('--lr_type', type=str, default='cosin',
help='the type of learning rate scheduler (cosin or multi_step or one_cycle)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
help='initial learning rate')
parser.add_argument('--minimum_lr', type=float, default=1e-7, metavar='MIN_LR',
help='minimum learning rate during training')
parser.add_argument('--momentum', type=float, default=0.949, metavar='M',
help='momentum')
parser.add_argument('-wd', '--weight_decay', type=float, default=0., metavar='WD',
help='weight decay (default: 0.)')
parser.add_argument('--optimizer_type', type=str, default='adam', metavar='OPTIMIZER',
help='the type of optimizer, it can be sgd or adam')
parser.add_argument('--steps', nargs='*', default=[150, 180],
help='number of burn in step')
####################################################################
############## Loss weight ##########################
####################################################################
####################################################################
############## Distributed Data Parallel ############
####################################################################
parser.add_argument('--world-size', default=-1, type=int, metavar='N',
help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int, metavar='N',
help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://127.0.0.1:29500', type=str,
help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
help='distributed backend')
parser.add_argument('--gpu_idx', default=0, type=int,
help='GPU index to use.')
parser.add_argument('--no_cuda', default= False,
help='If true, cuda is not used.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
help='Use multi-processing distributed training to launch '
'N processes per node, which has N GPUs. This is the '
'fastest way to use PyTorch for either single node or '
'multi node data parallel training')
####################################################################
############## Evaluation configurations ###################
####################################################################
parser.add_argument('--evaluate', action='store_true',
help='only evaluate the model, not training')
parser.add_argument('--resume_path', type=str, default=None, metavar='PATH',
help='the path of the resumed checkpoint')
parser.add_argument('--K', type=int, default=50,
help='the number of top K')
configs = edict(vars(parser.parse_args()))
####################################################################
############## Hardware configurations #############################
####################################################################
# configs.device = torch.device('cpu' if configs.no_cuda else 'cuda')
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx))
configs.ngpus_per_node = torch.cuda.device_count()
configs.pin_memory = True
configs.input_size = (1216, 608)
configs.hm_size = (304, 152)
configs.down_ratio = 4
configs.max_objects = 50
configs.imagenet_pretrained = True
configs.head_conv = 64
configs.num_classes = 3
configs.num_center_offset = 2
configs.num_z = 1
configs.num_dim = 3
configs.num_direction = 2 # sin, cos
configs.heads = {
'hm_cen': configs.num_classes,
'cen_offset': configs.num_center_offset,
'direction': configs.num_direction,
'z_coor': configs.num_z,
'dim': configs.num_dim
}
configs.num_input_features = 4
####################################################################
############## Dataset, logs, Checkpoints dir ######################
####################################################################
configs.dataset = 'apollo' # or kitti
configs.dataset_dir = configs.data_url
# configs.checkpoints_dir = os.path.join(configs.train_model_out, configs.saved_fn)
configs.checkpoints_dir = configs.train_model_out
# configs.logs_dir = os.path.join(configs.train_out, configs.saved_fn)
configs.logs_dir = configs.train_out
configs.pretrained_path = configs.model_load_dir
if not os.path.isdir(configs.checkpoints_dir):
os.makedirs(configs.checkpoints_dir)
if not os.path.isdir(configs.logs_dir):
os.makedirs(configs.logs_dir)
return configs

+ 0
- 0
point-cloud/sfa/data_process/__init__.py View File


+ 99
- 0
point-cloud/sfa/data_process/demo_dataset.py View File

@@ -0,0 +1,99 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: This script for the KITTI dataset
"""
import sys
import os
from builtins import int
from glob import glob
import numpy as np
from torch.utils.data import Dataset
import cv2
import torch
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from data_process.kitti_data_utils import get_filtered_lidar
from data_process.kitti_bev_utils import makeBEVMap
import config.kitti_config as cnf
class Demo_KittiDataset(Dataset):
def __init__(self, configs):
self.dataset_dir = os.path.join(configs.dataset_dir, configs.foldername, configs.foldername[:10],
configs.foldername)
self.input_size = configs.input_size
self.hm_size = configs.hm_size
self.num_classes = configs.num_classes
self.max_objects = configs.max_objects
self.image_dir = os.path.join(self.dataset_dir, "image_02", "data")
self.lidar_dir = os.path.join(self.dataset_dir, "velodyne_points", "data")
self.label_dir = os.path.join(self.dataset_dir, "label_2", "data")
self.sample_id_list = sorted(glob(os.path.join(self.lidar_dir, '*.bin')))
self.sample_id_list = [float(os.path.basename(fn)[:-4]) for fn in self.sample_id_list]
self.num_samples = len(self.sample_id_list)
def __len__(self):
return len(self.sample_id_list)
def __getitem__(self, index):
pass
def load_bevmap_front(self, index):
"""Load only image for the testing phase"""
sample_id = int(self.sample_id_list[index])
img_path, img_rgb = self.get_image(sample_id)
lidarData = self.get_lidar(sample_id)
front_lidar = get_filtered_lidar(lidarData, cnf.boundary)
front_bevmap = makeBEVMap(front_lidar, cnf.boundary)
front_bevmap = torch.from_numpy(front_bevmap)
metadatas = {
'img_path': img_path,
}
return metadatas, front_bevmap, img_rgb
def load_bevmap_front_vs_back(self, index):
"""Load only image for the testing phase"""
sample_id = int(self.sample_id_list[index])
img_path, img_rgb = self.get_image(sample_id)
lidarData = self.get_lidar(sample_id)
front_lidar = get_filtered_lidar(lidarData, cnf.boundary)
front_bevmap = makeBEVMap(front_lidar, cnf.boundary)
front_bevmap = torch.from_numpy(front_bevmap)
back_lidar = get_filtered_lidar(lidarData, cnf.boundary_back)
back_bevmap = makeBEVMap(back_lidar, cnf.boundary_back)
back_bevmap = torch.from_numpy(back_bevmap)
metadatas = {
'img_path': img_path,
}
return metadatas, front_bevmap, back_bevmap, img_rgb
def get_image(self, idx):
img_path = os.path.join(self.image_dir, '{:010d}.png'.format(idx))
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
return img_path, img
def get_lidar(self, idx):
lidar_file = os.path.join(self.lidar_dir, '{:010d}.bin'.format(idx))
# assert os.path.isfile(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)

+ 98
- 0
point-cloud/sfa/data_process/kitti_bev_utils.py View File

@@ -0,0 +1,98 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
"""
import math
import os
import sys
import cv2
import numpy as np
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
import config.kitti_config as cnf
def makeBEVMap(PointCloud_, boundary):
Height = cnf.BEV_HEIGHT + 1
Width = cnf.BEV_WIDTH + 1
# Discretize Feature Map
PointCloud = np.copy(PointCloud_)
# PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION))
# PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION) + Width / 2)
# 针对Apollo数据集,检测360°
PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION_Y) + Height / 2)
PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION_X) + Width / 2)
# sort-3times
indices = np.lexsort((-PointCloud[:, 2], PointCloud[:, 1], PointCloud[:, 0]))
PointCloud = PointCloud[indices]
# Height Map
heightMap = np.zeros((Height, Width))
_, indices = np.unique(PointCloud[:, 0:2], axis=0, return_index=True)
PointCloud_frac = PointCloud[indices]
# some important problem is image coordinate is (y,x), not (x,y)
max_height = float(np.abs(boundary['maxZ'] - boundary['minZ']))
heightMap[np.int_(PointCloud_frac[:, 0]), np.int_(PointCloud_frac[:, 1])] = PointCloud_frac[:, 2] / max_height #(1217,609)
# Intensity Map & DensityMap
intensityMap = np.zeros((Height, Width))
densityMap = np.zeros((Height, Width))
_, indices, counts = np.unique(PointCloud[:, 0:2], axis=0, return_index=True, return_counts=True)
PointCloud_top = PointCloud[indices]
normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64))
intensityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = PointCloud_top[:, 3] / 255.0 # hesai40p的反射强度0~255
densityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = normalizedCounts
RGB_Map = np.zeros((3, Height - 1, Width - 1))
RGB_Map[2, :, :] = densityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # r_map
RGB_Map[1, :, :] = heightMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # g_map
RGB_Map[0, :, :] = intensityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # b_map
return RGB_Map
# bev image coordinates format
def get_corners(x, y, w, l, yaw):
bev_corners = np.zeros((4, 2), dtype=np.float32)
cos_yaw = np.cos(yaw)
sin_yaw = np.sin(yaw)
# front left
bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
# rear left
bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
# rear right
bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
# front right
bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
return bev_corners
def drawRotatedBox(img, x, y, w, l, yaw, color):
img_cp = img.copy()
bev_corners = get_corners(x, y, w, l, yaw)
corners_int = bev_corners.reshape(-1, 1, 2).astype(int)
cv2.polylines(img, [corners_int], True, color, 2)
corners_int = bev_corners.reshape(-1, 2)
cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2)
# return img_cp

+ 324
- 0
point-cloud/sfa/data_process/kitti_data_utils.py View File

@@ -0,0 +1,324 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: The utils of the kitti dataset
"""
from __future__ import print_function
import os
import sys
import numpy as np
import cv2
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
import config.kitti_config as cnf
class Object3d(object):
''' 3d object label '''
def __init__(self, label_file_line):
data = label_file_line.split(' ')
data[1:] = [float(x) for x in data[1:]]
# extract label, truncation, occlusion
self.type = data[0] # 'Car', 'Pedestrian', ...
self.cls_id = self.cls_type_to_id(self.type)
self.truncation = data[1] # truncated pixel ratio [0..1]
self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
self.alpha = data[3] # object observation angle [-pi..pi]
# extract 2d bounding box in 0-based coordinates
self.xmin = data[4] # left
self.ymin = data[5] # top
self.xmax = data[6] # right
self.ymax = data[7] # bottom
self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
# extract 3d bounding box information
self.h = data[8] # box height
self.w = data[9] # box width
self.l = data[10] # box length (in meters)
self.t = (data[11], data[12], data[13]) # location (x,y,z) in camera coord.
self.dis_to_cam = np.linalg.norm(self.t)
self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]
self.score = data[15] if data.__len__() == 16 else -1.0
self.level_str = None
self.level = self.get_obj_level()
def cls_type_to_id(self, cls_type):
if cls_type not in cnf.CLASS_NAME_TO_ID.keys():
return -1
return cnf.CLASS_NAME_TO_ID[cls_type]
def get_obj_level(self):
height = float(self.box2d[3]) - float(self.box2d[1]) + 1
if height >= 40 and self.truncation <= 0.15 and self.occlusion <= 0:
self.level_str = 'Easy'
return 1 # Easy
elif height >= 25 and self.truncation <= 0.3 and self.occlusion <= 1:
self.level_str = 'Moderate'
return 2 # Moderate
elif height >= 25 and self.truncation <= 0.5 and self.occlusion <= 2:
self.level_str = 'Hard'
return 3 # Hard
else:
self.level_str = 'UnKnown'
return 4
def print_object(self):
print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \
(self.type, self.truncation, self.occlusion, self.alpha))
print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \
(self.xmin, self.ymin, self.xmax, self.ymax))
print('3d bbox h,w,l: %f, %f, %f' % \
(self.h, self.w, self.l))
print('3d bbox location, ry: (%f, %f, %f), %f' % \
(self.t[0], self.t[1], self.t[2], self.ry))
def to_kitti_format(self):
kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \
% (self.type, self.truncation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1],
self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.t[0], self.t[1], self.t[2],
self.ry, self.score)
return kitti_str
def read_label(label_filename):
lines = [line.rstrip() for line in open(label_filename)]
objects = [Object3d(line) for line in lines]
return objects
class Calibration(object):
''' Calibration matrices and utils
3d XYZ in <label>.txt are in rect camera coord.
2d box xy are in image2 coord
Points in <lidar>.bin are in Velodyne coord.
y_image2 = P^2_rect * x_rect
y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
x_ref = Tr_velo_to_cam * x_velo
x_rect = R0_rect * x_ref
P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x;
0, f^2_v, c^2_v, -f^2_v b^2_y;
0, 0, 1, 0]
= K * [1|t]
image2 coord:
----> x-axis (u)
|
|
v y-axis (v)
velodyne coord:
front x, left y, up z
rect/ref camera coord:
right x, down y, front z
Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
TODO(rqi): do matrix multiplication only once for each projection.
'''
def __init__(self, calib_filepath):
calibs = self.read_calib_file(calib_filepath)
# Projection matrix from rect camera coord to image2 coord
self.P2 = calibs['P2']
self.P2 = np.reshape(self.P2, [3, 4])
self.P3 = calibs['P3']
self.P3 = np.reshape(self.P3, [3, 4])
# Rigid transform from Velodyne coord to reference camera coord
self.V2C = calibs['Tr_velo2cam']
self.V2C = np.reshape(self.V2C, [3, 4])
# Rotation from reference camera coord to rect camera coord
self.R0 = calibs['R_rect']
self.R0 = np.reshape(self.R0, [3, 3])
# Camera intrinsics and extrinsics
self.c_u = self.P2[0, 2]
self.c_v = self.P2[1, 2]
self.f_u = self.P2[0, 0]
self.f_v = self.P2[1, 1]
self.b_x = self.P2[0, 3] / (-self.f_u) # relative
self.b_y = self.P2[1, 3] / (-self.f_v)
def read_calib_file(self, filepath):
with open(filepath) as f:
lines = f.readlines()
obj = lines[2].strip().split(' ')[1:]
P2 = np.array(obj, dtype=np.float32)
obj = lines[3].strip().split(' ')[1:]
P3 = np.array(obj, dtype=np.float32)
obj = lines[4].strip().split(' ')[1:]
R0 = np.array(obj, dtype=np.float32)
obj = lines[5].strip().split(' ')[1:]
Tr_velo_to_cam = np.array(obj, dtype=np.float32)
return {'P2': P2.reshape(3, 4),
'P3': P3.reshape(3, 4),
'R_rect': R0.reshape(3, 3),
'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)}
def cart2hom(self, pts_3d):
"""
:param pts: (N, 3 or 2)
:return pts_hom: (N, 4 or 3)
"""
pts_hom = np.hstack((pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)))
return pts_hom
def compute_radius(det_size, min_overlap=0.7):
height, width = det_size
a1 = 1
b1 = (height + width)
c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
r1 = (b1 + sq1) / 2
a2 = 4
b2 = 2 * (height + width)
c2 = (1 - min_overlap) * width * height
sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
r2 = (b2 + sq2) / 2
a3 = 4 * min_overlap
b3 = -2 * min_overlap * (height + width)
c3 = (min_overlap - 1) * width * height
sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
r3 = (b3 + sq3) / 2
return min(r1, r2, r3)
def gaussian2D(shape, sigma=1):
m, n = [(ss - 1.) / 2. for ss in shape]
y, x = np.ogrid[-m:m + 1, -n:n + 1]
h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
h[h < np.finfo(h.dtype).eps * h.max()] = 0
return h
def gen_hm_radius(heatmap, center, radius, k=1):
diameter = 2 * radius + 1
gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
x, y = int(center[0]), int(center[1])
height, width = heatmap.shape[0:2]
left, right = min(x, radius), min(width - x, radius + 1)
top, bottom = min(y, radius), min(height - y, radius + 1)
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
return heatmap
def get_filtered_lidar(lidar, boundary, labels=None):
minX = boundary['minX']
maxX = boundary['maxX']
minY = boundary['minY']
maxY = boundary['maxY']
minZ = boundary['minZ']
maxZ = boundary['maxZ']
# Remove the point out of range x,y,z
mask = np.where((lidar[:, 0] >= minX) & (lidar[:, 0] <= maxX) &
(lidar[:, 1] >= minY) & (lidar[:, 1] <= maxY) &
(lidar[:, 2] >= minZ) & (lidar[:, 2] <= maxZ))
lidar = lidar[mask]
lidar[:, 2] = lidar[:, 2] - minZ
if labels is not None:
label_x = (labels[:, 1] >= minX) & (labels[:, 1] < maxX)
label_y = (labels[:, 2] >= minY) & (labels[:, 2] < maxY)
label_z = (labels[:, 3] >= minZ) & (labels[:, 3] < maxZ)
mask_label = label_x & label_y & label_z
labels = labels[mask_label]
return lidar, labels
else:
return lidar
def box3d_corners_to_center(box3d_corner):
# (N, 8, 3) -> (N, 7)
assert box3d_corner.ndim == 3
xyz = np.mean(box3d_corner, axis=1)
h = abs(np.mean(box3d_corner[:, 4:, 2] - box3d_corner[:, :4, 2], axis=1, keepdims=True))
w = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 1, [0, 1]]) ** 2, axis=1, keepdims=True)) +
np.sqrt(np.sum((box3d_corner[:, 2, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) +
np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 5, [0, 1]]) ** 2, axis=1, keepdims=True)) +
np.sqrt(np.sum((box3d_corner[:, 6, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4
l = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) +
np.sqrt(np.sum((box3d_corner[:, 1, [0, 1]] - box3d_corner[:, 2, [0, 1]]) ** 2, axis=1, keepdims=True)) +
np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True)) +
np.sqrt(np.sum((box3d_corner[:, 5, [0, 1]] - box3d_corner[:, 6, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4
yaw = (np.arctan2(box3d_corner[:, 2, 1] - box3d_corner[:, 1, 1],
box3d_corner[:, 2, 0] - box3d_corner[:, 1, 0]) +
np.arctan2(box3d_corner[:, 3, 1] - box3d_corner[:, 0, 1],
box3d_corner[:, 3, 0] - box3d_corner[:, 0, 0]) +
np.arctan2(box3d_corner[:, 2, 0] - box3d_corner[:, 3, 0],
box3d_corner[:, 3, 1] - box3d_corner[:, 2, 1]) +
np.arctan2(box3d_corner[:, 1, 0] - box3d_corner[:, 0, 0],
box3d_corner[:, 0, 1] - box3d_corner[:, 1, 1]))[:, np.newaxis] / 4
return np.concatenate([h, w, l, xyz, yaw], axis=1).reshape(-1, 7)
def box3d_center_to_conners(box3d_center):
h, w, l, x, y, z, yaw = box3d_center
Box = np.array([[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2],
[w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2],
[0, 0, 0, 0, h, h, h, h]])
rotMat = np.array([
[np.cos(yaw), -np.sin(yaw), 0.0],
[np.sin(yaw), np.cos(yaw), 0.0],
[0.0, 0.0, 1.0]])
velo_box = np.dot(rotMat, Box)
cornerPosInVelo = velo_box + np.tile(np.array([x, y, z]), (8, 1)).T
box3d_corner = cornerPosInVelo.transpose()
return box3d_corner.astype(np.float32)
if __name__ == '__main__':
heatmap = np.zeros((96, 320))
h, w = 40, 50
radius = compute_radius((h, w))
radius = max(0, int(radius))
print('h: {}, w: {}, radius: {}, sigma: {}'.format(h, w, radius, (2 * radius + 1) / 6.))
gen_hm_radius(heatmap, center=(200, 50), radius=radius)
while True:
cv2.imshow('heatmap', heatmap)
if cv2.waitKey(0) & 0xff == 27:
break
max_pos = np.unravel_index(heatmap.argmax(), shape=heatmap.shape)
print('max_pos: {}'.format(max_pos))

+ 67
- 0
point-cloud/sfa/data_process/kitti_dataloader.py View File

@@ -0,0 +1,67 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: This script for creating the dataloader for training/validation/test phase
"""
import os
import sys
import torch
from torch.utils.data import DataLoader
import numpy as np
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from data_process.kitti_dataset import KittiDataset
from data_process.transformation import OneOf, Random_Rotation, Random_Scaling
def create_train_dataloader(configs):
"""Create dataloader for training"""
train_lidar_aug = OneOf([
Random_Rotation(limit_angle=np.pi / 4, p=1.0),
Random_Scaling(scaling_range=(0.95, 1.05), p=1.0),
], p=0.66)
train_dataset = KittiDataset(configs, mode='train', lidar_aug=train_lidar_aug, hflip_prob=configs.hflip_prob,
num_samples=configs.num_samples)
train_sampler = None
if configs.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, batch_size=configs.batch_size, shuffle=(train_sampler is None),
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=train_sampler)
return train_dataloader, train_sampler
def create_val_dataloader(configs):
"""Create dataloader for validation"""
val_sampler = None
val_dataset = KittiDataset(configs, mode='val', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples)
if configs.distributed:
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False)
val_dataloader = DataLoader(val_dataset, batch_size=configs.batch_size, shuffle=False,
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=val_sampler)
return val_dataloader
def create_test_dataloader(configs):
"""Create dataloader for testing phase"""
test_dataset = KittiDataset(configs, mode='test', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples)
test_sampler = None
if configs.distributed:
test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, batch_size=configs.batch_size, shuffle=False,
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=test_sampler)
return test_dataloader

+ 335
- 0
point-cloud/sfa/data_process/kitti_dataset.py View File

@@ -0,0 +1,335 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: This script for the KITTI dataset
"""
import sys
import os
import math
from builtins import int
import numpy as np
from torch.utils.data import Dataset
import cv2
import torch
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from data_process.kitti_data_utils import gen_hm_radius, compute_radius, Calibration, get_filtered_lidar
from data_process.kitti_bev_utils import makeBEVMap, drawRotatedBox, get_corners
from data_process import transformation
import config.kitti_config as cnf
class KittiDataset(Dataset):
def __init__(self, configs, mode='train', lidar_aug=None, hflip_prob=None, num_samples=None):
self.dataset_dir = configs.dataset_dir
self.input_size = configs.input_size
self.hm_size = configs.hm_size
self.num_classes = configs.num_classes
self.max_objects = configs.max_objects
assert mode in ['train', 'val', 'test'], 'Invalid mode: {}'.format(mode)
self.mode = mode
self.is_test = (self.mode == 'test')
# sub_folder = 'testing' if self.is_test else 'training'
self.lidar_aug = lidar_aug
self.hflip_prob = hflip_prob
if mode == 'val':
self.val_data_url = configs.val_data_url
self.lidar_dir = os.path.join(self.val_data_url, "velodyne")
self.calib_dir = os.path.join(self.val_data_url, "calib")
self.label_dir = os.path.join(self.val_data_url, "label_2")
# self.image_dir = os.path.join(self.dataset_dir, sub_folder, "image_2")
else:
self.lidar_dir = os.path.join(self.dataset_dir, "velodyne")
self.calib_dir = os.path.join(self.dataset_dir, "calib")
self.label_dir = os.path.join(self.dataset_dir, "label_2")
# split_txt_path = os.path.join('../dataset/apollo/', 'ImageSets', '{}.txt'.format(mode))
sample_list = []
sample_files = os.listdir(self.lidar_dir)
for bin_file in sample_files:
bin_name = bin_file.split('.')[0]
sample_list.append(bin_name)
self.sample_id_list = sample_list
if num_samples is not None:
self.sample_id_list = self.sample_id_list[:num_samples]
self.num_samples = len(self.sample_id_list)
def __len__(self):
return len(self.sample_id_list)
def __getitem__(self, index):
if self.is_test:
return self.load_img_only(index)
else:
return self.load_img_with_targets(index)
def load_img_only(self, index):
"""Load only image for the testing phase"""
sample_id = self.sample_id_list[index]
# print(sample_id)
# img_path, img_rgb = self.get_image(sample_id)
lidarData = self.get_lidar(sample_id)
lidarData = get_filtered_lidar(lidarData, cnf.boundary)
bev_map = makeBEVMap(lidarData, cnf.boundary)
bev_map = torch.from_numpy(bev_map)
bev_path = os.path.join(self.lidar_dir, '{}.png'.format(sample_id))
metadatas = {
'bev_path': bev_path,
}
# return metadatas, bev_map, img_rgb
return bev_map,metadatas
def load_img_with_targets(self, index):
"""Load images and targets for the training and validation phase"""
sample_id = self.sample_id_list[index]
# img_path = os.path.join(self.image_dir, '{}.png'.format(sample_id))
lidarData = self.get_lidar(sample_id)
# calib = self.get_calib(sample_id)
labels, has_labels = self.get_label(sample_id)
# if has_labels:
# labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2)
if self.lidar_aug:
lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:])
lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels)
bev_map = makeBEVMap(lidarData, cnf.boundary)
bev_map = torch.from_numpy(bev_map)
hflipped = False
if np.random.random() < self.hflip_prob:
hflipped = True
# C, H, W
bev_map = torch.flip(bev_map, [-1])
targets = self.build_targets(labels, hflipped)
# metadatas = {
# 'img_path': img_path,
# 'hflipped': hflipped
# }
# return metadatas, bev_map, targets
return bev_map, targets
def get_image(self, idx):
img_path = os.path.join(self.image_dir, '{}.png'.format(idx))
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
return img_path, img
def get_calib(self, idx):
calib_file = os.path.join(self.calib_dir, '{}.txt'.format(idx))
# assert os.path.isfile(calib_file)
return Calibration(calib_file)
def get_lidar(self, idx):
lidar_file = os.path.join(self.lidar_dir, '{}.bin'.format(idx))
# assert os.path.isfile(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_label(self, idx):
labels = []
label_path = os.path.join(self.label_dir, '{}.txt'.format(idx))
for line in open(label_path, 'r'):
line = line.rstrip()
line_parts = line.split(' ')
obj_name = line_parts[0] # 'Car', 'Pedestrian', ...
cat_id = int(cnf.CLASS_NAME_TO_ID[obj_name])
if cat_id <= -99: # ignore Tram and Misc
continue
truncated = int(float(line_parts[1])) # truncated pixel ratio [0..1]
occluded = int(line_parts[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
alpha = float(line_parts[3]) # object observation angle [-pi..pi]
# xmin, ymin, xmax, ymax
# bbox = np.array([float(line_parts[4]), float(line_parts[5]), float(line_parts[6]), float(line_parts[7])])
# height, width, length (h, w, l)
h, w, l = float(line_parts[8]), float(line_parts[9]), float(line_parts[10])
# location (x,y,z) in camera coord.
x, y, z = float(line_parts[11]), float(line_parts[12]), float(line_parts[13])
ry = float(line_parts[14]) # yaw angle (around Y-axis in camera coordinates) [-pi..pi]
object_label = [cat_id, x, y, z, h, w, l, ry]
labels.append(object_label)
if len(labels) == 0:
labels = np.zeros((1, 8), dtype=np.float32)
has_labels = False
else:
labels = np.array(labels, dtype=np.float32)
has_labels = True
return labels, has_labels
def build_targets(self, labels, hflipped):
minX = cnf.boundary['minX']
maxX = cnf.boundary['maxX']
minY = cnf.boundary['minY']
maxY = cnf.boundary['maxY']
minZ = cnf.boundary['minZ']
maxZ = cnf.boundary['maxZ']
num_objects = min(len(labels), self.max_objects)
hm_l, hm_w = self.hm_size
hm_main_center = np.zeros((self.num_classes, hm_l, hm_w), dtype=np.float32)
cen_offset = np.zeros((self.max_objects, 2), dtype=np.float32)
direction = np.zeros((self.max_objects, 2), dtype=np.float32)
z_coor = np.zeros((self.max_objects, 1), dtype=np.float32)
dimension = np.zeros((self.max_objects, 3), dtype=np.float32)
indices_center = np.zeros((self.max_objects), dtype=np.int64)
obj_mask = np.zeros((self.max_objects), dtype=np.uint8)
for k in range(num_objects):
cls_id, x, y, z, h, w, l, yaw = labels[k]
cls_id = int(cls_id)
# Invert yaw angle
yaw = -yaw
if not ((minX <= x <= maxX) and (minY <= y <= maxY) and (minZ <= z <= maxZ)):
continue
if (h <= 0) or (w <= 0) or (l <= 0):
continue
bbox_l = l / cnf.bound_size_x * hm_l
bbox_w = w / cnf.bound_size_y * hm_w
radius = compute_radius((math.ceil(bbox_l), math.ceil(bbox_w)))
radius = max(0, int(radius))
center_y = (x - minX) / cnf.bound_size_x * hm_l # x --> y (invert to 2D image space)
center_x = (y - minY) / cnf.bound_size_y * hm_w # y --> x
center = np.array([center_x, center_y], dtype=np.float32)
if hflipped:
center[0] = hm_w - center[0] - 1
center_int = center.astype(np.int32)
if cls_id < 0:
ignore_ids = [_ for _ in range(self.num_classes)] if cls_id == - 1 else [- cls_id - 2]
# Consider to make mask ignore
for cls_ig in ignore_ids:
gen_hm_radius(hm_main_center[cls_ig], center_int, radius)
hm_main_center[ignore_ids, center_int[1], center_int[0]] = 0.9999
continue
# Generate heatmaps for main center
gen_hm_radius(hm_main_center[cls_id], center, radius)
# Index of the center
indices_center[k] = center_int[1] * hm_w + center_int[0]
# targets for center offset
cen_offset[k] = center - center_int
# targets for dimension
dimension[k, 0] = h
dimension[k, 1] = w
dimension[k, 2] = l
# targets for direction
direction[k, 0] = math.sin(float(yaw)) # im
direction[k, 1] = math.cos(float(yaw)) # re
# im -->> -im
if hflipped:
direction[k, 0] = - direction[k, 0]
# targets for depth
z_coor[k] = z - minZ
# Generate object masks
obj_mask[k] = 1
targets = {
'hm_cen': hm_main_center,
'cen_offset': cen_offset,
'direction': direction,
'z_coor': z_coor,
'dim': dimension,
'indices_center': indices_center,
'obj_mask': obj_mask,
}
return targets
def draw_img_with_label(self, index):
sample_id = self.sample_id_list[index]
lidar_path = os.path.join(self.lidar_dir, '{}.bin'.format(sample_id))
lidarData = self.get_lidar(sample_id)
calib = self.get_calib(sample_id)
labels, has_labels = self.get_label(sample_id)
print(lidar_path)
if has_labels:
labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2)
if self.lidar_aug:
lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:])
lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels)
bev_map = makeBEVMap(lidarData, cnf.boundary)
print(labels)
return bev_map, labels, lidar_path
if __name__ == '__main__':
from easydict import EasyDict as edict
from data_process.transformation import OneOf, Random_Scaling, Random_Rotation, lidar_to_camera_box
from utils.visualization_utils import merge_rgb_to_bev, show_rgb_image_with_boxes
configs = edict()
configs.distributed = False # For testing
configs.pin_memory = False
configs.num_samples = None
configs.input_size = (1216, 608)
configs.hm_size = (304, 152)
configs.max_objects = 50
configs.num_classes = 3
configs.output_width = 608
# configs.dataset_dir = os.path.join('../../', 'dataset', 'kitti')
# lidar_aug = OneOf([
# Random_Rotation(limit_angle=np.pi / 4, p=1.),
# Random_Scaling(scaling_range=(0.95, 1.05), p=1.),
# ], p=1.)
lidar_aug = None
dataset = KittiDataset(configs, mode='val', lidar_aug=lidar_aug, hflip_prob=0., num_samples=configs.num_samples)
print('\n\nPress n to see the next sample >>> Press Esc to quit...')
for idx in range(len(dataset)):
bev_map, labels, lidar_path = dataset.draw_img_with_label(idx)
calib = Calibration(lidar_path.replace(".bin", ".txt").replace("velodyne", "calib"))
bev_map = (bev_map.transpose(1, 2, 0) * 255).astype(np.uint8)
# bev_map = cv2.resize(bev_map, (cnf.BEV_HEIGHT, cnf.BEV_WIDTH))
print(bev_map.shape)
for box_idx, (cls_id, x, y, z, h, w, l, yaw) in enumerate(labels):
# Draw rotated box
yaw = -yaw
y1 = int((x - cnf.boundary['minX']) / cnf.DISCRETIZATION)
x1 = int((y - cnf.boundary['minY']) / cnf.DISCRETIZATION)
w1 = int(w / cnf.DISCRETIZATION)
l1 = int(l / cnf.DISCRETIZATION)
drawRotatedBox(bev_map, x1, y1, w1, l1, yaw, cnf.colors[int(cls_id)])
# Rotate the bev_map
bev_map = cv2.rotate(bev_map, cv2.ROTATE_180)
# labels[:, 1:] = lidar_to_camera_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2)
cv2.imshow('bev_map', bev_map)
if cv2.waitKey(0) & 0xff == 27:
break

+ 426
- 0
point-cloud/sfa/data_process/transformation.py View File

@@ -0,0 +1,426 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Refer: https://github.com/ghimiredhikura/Complex-YOLOv3
# Source : https://github.com/jeasinema/VoxelNet-tensorflow/blob/master/utils/utils.py
"""
import os
import sys
import math
import numpy as np
import torch
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from config import kitti_config as cnf
def angle_in_limit(angle):
# To limit the angle in -pi/2 - pi/2
limit_degree = 5
while angle >= np.pi / 2:
angle -= np.pi
while angle < -np.pi / 2:
angle += np.pi
if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi:
angle = np.pi / 2
return angle
def camera_to_lidar(x, y, z, V2C=None, R0=None, P2=None):
p = np.array([x, y, z, 1])
if V2C is None or R0 is None:
p = np.matmul(cnf.R0_inv, p)
p = np.matmul(cnf.Tr_velo_to_cam_inv, p)
else:
R0_i = np.zeros((4, 4))
R0_i[:3, :3] = R0
R0_i[3, 3] = 1
p = np.matmul(np.linalg.inv(R0_i), p)
p = np.matmul(inverse_rigid_trans(V2C), p)
p = p[0:3]
return tuple(p)
def lidar_to_camera(x, y, z, V2C=None, R0=None, P2=None):
p = np.array([x, y, z, 1])
if V2C is None or R0 is None:
p = np.matmul(cnf.Tr_velo_to_cam, p)
p = np.matmul(cnf.R0, p)
else:
p = np.matmul(V2C, p)
p = np.matmul(R0, p)
p = p[0:3]
return tuple(p)
def camera_to_lidar_point(points):
# (N, 3) -> (N, 3)
N = points.shape[0]
points = np.hstack([points, np.ones((N, 1))]).T # (N,4) -> (4,N)
points = np.matmul(cnf.R0_inv, points)
points = np.matmul(cnf.Tr_velo_to_cam_inv, points).T # (4, N) -> (N, 4)
points = points[:, 0:3]
return points.reshape(-1, 3)
def lidar_to_camera_point(points, V2C=None, R0=None):
# (N, 3) -> (N, 3)
N = points.shape[0]
points = np.hstack([points, np.ones((N, 1))]).T
if V2C is None or R0 is None:
points = np.matmul(cnf.Tr_velo_to_cam, points)
points = np.matmul(cnf.R0, points).T
else:
points = np.matmul(V2C, points)
points = np.matmul(R0, points).T
points = points[:, 0:3]
return points.reshape(-1, 3)
def camera_to_lidar_box(boxes, V2C=None, R0=None, P2=None):
# (N, 7) -> (N, 7) x,y,z,h,w,l,r
ret = []
for box in boxes:
x, y, z, h, w, l, ry = box
# print(x, y, z, h, w, l, ry)
(x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -ry - np.pi / 2
# print(x, y, z, h, w, l, ry)
# print("camera_to_lidar")
# rz = angle_in_limit(rz)
ret.append([x, y, z, h, w, l, rz])
return np.array(ret).reshape(-1, 7)
def lidar_to_camera_box(boxes, V2C=None, R0=None, P2=None):
# (N, 7) -> (N, 7) x,y,z,h,w,l,r
ret = []
for box in boxes:
x, y, z, h, w, l, rz = box
# (x, y, z), h, w, l, ry = lidar_to_camera(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -rz - np.pi / 2
# ry = angle_in_limit(ry)
ry = -rz - np.pi / 2
ret.append([x, y, z, h, w, l, ry])
return np.array(ret).reshape(-1, 7)
def center_to_corner_box2d(boxes_center, coordinate='lidar'):
# (N, 5) -> (N, 4, 2)
N = boxes_center.shape[0]
boxes3d_center = np.zeros((N, 7))
boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center
boxes3d_corner = center_to_corner_box3d(boxes3d_center, coordinate=coordinate)
return boxes3d_corner[:, 0:4, 0:2]
def center_to_corner_box3d(boxes_center, coordinate='lidar'):
# (N, 7) -> (N, 8, 3)
N = boxes_center.shape[0]
ret = np.zeros((N, 8, 3), dtype=np.float32)
if coordinate == 'camera':
boxes_center = camera_to_lidar_box(boxes_center)
for i in range(N):
box = boxes_center[i]
translation = box[0:3]
size = box[3:6]
rotation = [0, 0, box[-1]]
h, w, l = size[0], size[1], size[2]
trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet
[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \
[w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \
[0, 0, 0, 0, h, h, h, h]])
# re-create 3D bounding box in velodyne coordinate system
yaw = rotation[2]
rotMat = np.array([
[np.cos(yaw), -np.sin(yaw), 0.0],
[np.sin(yaw), np.cos(yaw), 0.0],
[0.0, 0.0, 1.0]])
cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8, 1)).T
box3d = cornerPosInVelo.transpose()
ret[i] = box3d
if coordinate == 'camera':
for idx in range(len(ret)):
ret[idx] = lidar_to_camera_point(ret[idx])
return ret
CORNER2CENTER_AVG = True
def corner_to_center_box3d(boxes_corner, coordinate='camera'):
# (N, 8, 3) -> (N, 7) x,y,z,h,w,l,ry/z
if coordinate == 'lidar':
for idx in range(len(boxes_corner)):
boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx])
ret = []
for roi in boxes_corner:
if CORNER2CENTER_AVG: # average version
roi = np.array(roi)
h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4)
w = np.sum(
np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2))
) / 4
l = np.sum(
np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2))
) / 4
x = np.sum(roi[:, 0], axis=0) / 8
y = np.sum(roi[0:4, 1], axis=0) / 4
z = np.sum(roi[:, 2], axis=0) / 8
ry = np.sum(
math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) +
math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) +
math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) +
math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) +
math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) +
math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) +
math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) +
math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0])
) / 8
if w > l:
w, l = l, w
ry = ry - np.pi / 2
elif l > w:
l, w = w, l
ry = ry - np.pi / 2
ret.append([x, y, z, h, w, l, ry])
else: # max version
h = max(abs(roi[:4, 1] - roi[4:, 1]))
w = np.max(
np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2))
)
l = np.max(
np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) +
np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2))
)
x = np.sum(roi[:, 0], axis=0) / 8
y = np.sum(roi[0:4, 1], axis=0) / 4
z = np.sum(roi[:, 2], axis=0) / 8
ry = np.sum(
math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) +
math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) +
math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) +
math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) +
math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) +
math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) +
math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) +
math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0])
) / 8
if w > l:
w, l = l, w
ry = angle_in_limit(ry + np.pi / 2)
ret.append([x, y, z, h, w, l, ry])
if coordinate == 'lidar':
ret = camera_to_lidar_box(np.array(ret))
return np.array(ret)
def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0):
# Input:
# points: (N, 3)
# rx/y/z: in radians
# Output:
# points: (N, 3)
N = points.shape[0]
points = np.hstack([points, np.ones((N, 1))])
mat1 = np.eye(4)
mat1[3, 0:3] = tx, ty, tz
points = np.matmul(points, mat1)
if rx != 0:
mat = np.zeros((4, 4))
mat[0, 0] = 1
mat[3, 3] = 1
mat[1, 1] = np.cos(rx)
mat[1, 2] = -np.sin(rx)
mat[2, 1] = np.sin(rx)
mat[2, 2] = np.cos(rx)
points = np.matmul(points, mat)
if ry != 0:
mat = np.zeros((4, 4))
mat[1, 1] = 1
mat[3, 3] = 1
mat[0, 0] = np.cos(ry)
mat[0, 2] = np.sin(ry)
mat[2, 0] = -np.sin(ry)
mat[2, 2] = np.cos(ry)
points = np.matmul(points, mat)
if rz != 0:
mat = np.zeros((4, 4))
mat[2, 2] = 1
mat[3, 3] = 1
mat[0, 0] = np.cos(rz)
mat[0, 1] = -np.sin(rz)
mat[1, 0] = np.sin(rz)
mat[1, 1] = np.cos(rz)
points = np.matmul(points, mat)
return points[:, 0:3]
def box_transform(boxes, tx, ty, tz, r=0, coordinate='lidar'):
# Input:
# boxes: (N, 7) x y z h w l rz/y
# Output:
# boxes: (N, 7) x y z h w l rz/y
boxes_corner = center_to_corner_box3d(boxes, coordinate=coordinate) # (N, 8, 3)
for idx in range(len(boxes_corner)):
if coordinate == 'lidar':
boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r)
else:
boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, ry=r)
return corner_to_center_box3d(boxes_corner, coordinate=coordinate)
def inverse_rigid_trans(Tr):
''' Inverse a rigid body transform matrix (3x4 as [R|t])
[R'|-R't; 0|1]
'''
inv_Tr = np.zeros_like(Tr) # 3x4
inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3])
inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3])
return inv_Tr
class Compose(object):
def __init__(self, transforms, p=1.0):
self.transforms = transforms
self.p = p
def __call__(self, lidar, labels):
if np.random.random() <= self.p:
for t in self.transforms:
lidar, labels = t(lidar, labels)
return lidar, labels
class OneOf(object):
def __init__(self, transforms, p=1.0):
self.transforms = transforms
self.p = p
def __call__(self, lidar, labels):
if np.random.random() <= self.p:
choice = np.random.randint(low=0, high=len(self.transforms))
lidar, labels = self.transforms[choice](lidar, labels)
return lidar, labels
class Random_Rotation(object):
def __init__(self, limit_angle=np.pi / 4, p=0.5):
self.limit_angle = limit_angle
self.p = p
def __call__(self, lidar, labels):
"""
:param labels: # (N', 7) x, y, z, h, w, l, r
:return:
"""
if np.random.random() <= self.p:
angle = np.random.uniform(-self.limit_angle, self.limit_angle)
lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle)
labels = box_transform(labels, 0, 0, 0, r=angle, coordinate='lidar')
return lidar, labels
class Random_Scaling(object):
def __init__(self, scaling_range=(0.95, 1.05), p=0.5):
self.scaling_range = scaling_range
self.p = p
def __call__(self, lidar, labels):
"""
:param labels: # (N', 7) x, y, z, h, w, l, r
:return:
"""
if np.random.random() <= self.p:
factor = np.random.uniform(self.scaling_range[0], self.scaling_range[0])
lidar[:, 0:3] = lidar[:, 0:3] * factor
labels[:, 0:6] = labels[:, 0:6] * factor
return lidar, labels
class Cutout(object):
"""Randomly mask out one or more patches from an image.
Args:
n_holes (int): Number of patches to cut out of each image.
length (int): The length (in pixels) of each square patch.
Refer from: https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py
"""
def __init__(self, n_holes, ratio, fill_value=0., p=1.0):
self.n_holes = n_holes
self.ratio = ratio
assert 0. <= fill_value <= 1., "the fill value is in a range of 0 to 1"
self.fill_value = fill_value
self.p = p
def __call__(self, img, targets):
"""
Args:
img (Tensor): Tensor image of size (C, H, W).
Returns:
Tensor: Image with n_holes of dimension length x length cut out of it.
"""
if np.random.random() <= self.p:
h = img.size(1)
w = img.size(2)
h_cutout = int(self.ratio * h)
w_cutout = int(self.ratio * w)
for n in range(self.n_holes):
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - h_cutout // 2, 0, h)
y2 = np.clip(y + h_cutout // 2, 0, h)
x1 = np.clip(x - w_cutout // 2, 0, w)
x2 = np.clip(x + w_cutout // 2, 0, w)
img[:, y1: y2, x1: x2] = self.fill_value # Zero out the selected area
# Remove targets that are in the selected area
keep_target = []
for target_idx, target in enumerate(targets):
_, _, target_x, target_y, target_w, target_l, _, _ = target
if (x1 <= target_x * w <= x2) and (y1 <= target_y * h <= y2):
continue
keep_target.append(target_idx)
targets = targets[keep_target]
return img, targets

+ 378
- 0
point-cloud/sfa/inference.py View File

@@ -0,0 +1,378 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: Testing script
"""
import argparse
import sys
import os
import time
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
from easydict import EasyDict as edict
import cv2
import torch
import numpy as np
import torch.nn.functional as F
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from data_process.kitti_dataloader import create_test_dataloader
from models.model_utils import create_model
import config.kitti_config as cnf
def parse_test_configs():
parser = argparse.ArgumentParser(description='Testing config for the Implementation')
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN',
help='The name using for saving logs, models,...')
parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH',
help='The name of the model architecture')
parser.add_argument('--model_dir', type=str,
default='/train_out_model/', metavar='PATH',
help='the path of the pretrained checkpoint')
parser.add_argument('--K', type=int, default=50,
help='the number of top K')
parser.add_argument('--no_cuda', default= False,
help='If true, cuda is not used.')
parser.add_argument('--gpu_idx', default=0, type=int,
help='GPU index to use.')
parser.add_argument('--num_samples', type=int, default=None,
help='Take a subset of the dataset to run and debug')
parser.add_argument('--num_workers', type=int, default=1,
help='Number of threads for loading data')
parser.add_argument('--batch_size', type=int, default=1,
help='mini-batch size (default: 4)')
parser.add_argument('--peak_thresh', type=float, default=0.2)
parser.add_argument('--dataset_dir', type=str,default='/dataset_dir/',
help='If true, the output image of the testing phase will be saved')
parser.add_argument('--results_dir', type=str,default='/results_dir/',
help='If true, the output image of the testing phase will be saved')
parser.add_argument('--save_test_output', type=bool, default=True,
help='save the test output or not')
parser.add_argument('--output_format', type=str, default='txt', metavar='PATH',
help='the type of the test output (support image, video or none)')
parser.add_argument('--output_video_fn', type=str, default='out_fpn_resnet_18', metavar='PATH',
help='the video filename if the output format is video')
parser.add_argument('--output-width', type=int, default=608,
help='the width of showing output, the height maybe vary')
configs = edict(vars(parser.parse_args()))
configs.pin_memory = True
configs.distributed = False # For testing on 1 GPU only
configs.input_size = (1216, 608)
configs.hm_size = (304, 152)
configs.down_ratio = 4
configs.max_objects = 50
configs.imagenet_pretrained = False
configs.head_conv = 64
configs.num_classes = 3
configs.num_center_offset = 2
configs.num_z = 1
configs.num_dim = 3
configs.num_direction = 2 # sin, cos
configs.heads = {
'hm_cen': configs.num_classes,
'cen_offset': configs.num_center_offset,
'direction': configs.num_direction,
'z_coor': configs.num_z,
'dim': configs.num_dim
}
configs.num_input_features = 4
####################################################################
##############Dataset, Checkpoints, and results dir configs#########
####################################################################
configs.root_dir = '../'
# configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'apollo')
# configs.results_dir_img = os.path.join(configs.results_dir, configs.saved_fn, 'image')
# configs.results_dir_txt = os.path.join(configs.results_dir, configs.saved_fn, 'txt')
# make_folder(configs.results_dir_img)
# make_folder(configs.results_dir_txt)
make_folder(configs.results_dir)
return configs
def _sigmoid(x):
return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
def time_synchronized():
torch.cuda.synchronize() if torch.cuda.is_available() else None
return time.time()
def make_folder(folder_name):
if not os.path.exists(folder_name):
os.makedirs(folder_name)
def drawRotatedBox(img, x, y, w, l, yaw, color):
bev_corners = get_corners(x, y, w, l, yaw)
corners_int = bev_corners.reshape(-1, 1, 2).astype(int)
cv2.polylines(img, [corners_int], True, color, 2)
corners_int = bev_corners.reshape(-1, 2)
cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2)
# bev image coordinates format
def get_corners(x, y, w, l, yaw):
bev_corners = np.zeros((4, 2), dtype=np.float32)
cos_yaw = np.cos(yaw)
sin_yaw = np.sin(yaw)
# front left
bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
# rear left
bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
# rear right
bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
# front right
bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
return bev_corners
def _nms(heat, kernel=3):
pad = (kernel - 1) // 2
hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
keep = (hmax == heat).float()
return heat * keep
def _gather_feat(feat, ind, mask=None):
dim = feat.size(2)
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
feat = feat.gather(1, ind)
if mask is not None:
mask = mask.unsqueeze(2).expand_as(feat)
feat = feat[mask]
feat = feat.view(-1, dim)
return feat
def _transpose_and_gather_feat(feat, ind):
feat = feat.permute(0, 2, 3, 1).contiguous()
feat = feat.view(feat.size(0), -1, feat.size(3))
feat = _gather_feat(feat, ind)
return feat
def _topk(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
topk_inds = topk_inds % (height * width)
topk_ys = (torch.floor_divide(topk_inds, width)).float()
topk_xs = (topk_inds % width).int().float()
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
topk_clses = (torch.floor_divide(topk_ind, K)).int()
topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40):
batch_size, num_classes, height, width = hm_cen.size()
hm_cen = _nms(hm_cen)
scores, inds, clses, ys, xs = _topk(hm_cen, K=K)
if cen_offset is not None:
cen_offset = _transpose_and_gather_feat(cen_offset, inds)
cen_offset = cen_offset.view(batch_size, K, 2)
xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1]
ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2]
else:
xs = xs.view(batch_size, K, 1) + 0.5
ys = ys.view(batch_size, K, 1) + 0.5
direction = _transpose_and_gather_feat(direction, inds)
direction = direction.view(batch_size, K, 2)
z_coor = _transpose_and_gather_feat(z_coor, inds)
z_coor = z_coor.view(batch_size, K, 1)
dim = _transpose_and_gather_feat(dim, inds)
dim = dim.view(batch_size, K, 3)
clses = clses.view(batch_size, K, 1).float()
scores = scores.view(batch_size, K, 1)
# (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
# (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
# detections: [batch_size, K, 10]
detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2)
return detections
def get_yaw(direction):
return np.arctan2(direction[:, 0:1], direction[:, 1:2])
def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2):
"""
:param detections: [batch_size, K, 10]
# (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
# (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
:return:
"""
# TODO: Need to consider rescale to the original scale: x, y
ret = []
for i in range(detections.shape[0]):
top_preds = {}
classes = detections[i, :, -1]
for j in range(num_classes):
inds = (classes == j)
# x, y, z, h, w, l, yaw
top_preds[j] = np.concatenate([
detections[i, inds, 0:1],
detections[i, inds, 1:2] * down_ratio,
detections[i, inds, 2:3] * down_ratio,
detections[i, inds, 3:4],
detections[i, inds, 4:5],
detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH,
detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT,
get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1)
# Filter by peak_thresh
if len(top_preds[j]) > 0:
keep_inds = (top_preds[j][:, 0] > peak_thresh)
top_preds[j] = top_preds[j][keep_inds]
ret.append(top_preds)
return ret
def draw_predictions(img, detections, num_classes=3):
for j in range(num_classes):
if len(detections[j]) > 0:
for det in detections[j]:
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
_score, _x, _y, _z, _h, _w, _l, _yaw = det
drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)])
return img
def convert_det_to_real_values(detections, num_classes=3):
kitti_dets = []
for cls_id in range(num_classes):
if len(detections[cls_id]) > 0:
for det in detections[cls_id]:
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
_score, _x, _y, _z, _h, _w, _l, _yaw = det
_yaw = round(-_yaw/1, 2)
x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2)
y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2)
z = round(_z + cnf.boundary['minZ'], 2)
w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2)
l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2)
h = round(_h/1, 2)
kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw])
return np.array(kitti_dets)
if __name__ == '__main__':
print("=".ljust(66, "="))
configs = parse_test_configs()
model = create_model(configs)
print('\n\n' + '-*=' * 30 + '\n\n')
# assert os.path.isfile(configs.model_dir), "No file at {}".format(configs.model_dir)
if os.path.isfile(configs.model_dir):
model_path = configs.model_dir
else:
# for file in os.listdir(configs.model_dir):
# model_path = os.path.join(configs.model_dir, file)
# 取最后一个模型
model_path = os.path.join(configs.model_dir, os.listdir(configs.model_dir)[-1])
print('Loaded weights from {}\n'.format(model_path))
# model.load_state_dict(torch.load(model_path))
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx))
model.load_state_dict(torch.load(model_path, map_location=configs.device))
model = model.to(device=configs.device)
out_cap = None
model.eval()
test_dataloader = create_test_dataloader(configs)
with torch.no_grad():
for batch_idx, batch_data in enumerate(test_dataloader):
bev_maps, metadatas = batch_data
input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float()
t1 = time_synchronized()
outputs = model(input_bev_maps)
outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
# detections size (batch_size, K, 10)
detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],
outputs['dim'], K=configs.K)
detections = detections.cpu().numpy().astype(np.float32)
detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh)
t2 = time_synchronized()
detections = detections[0] # only first batch
# Draw prediction in the image
bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8)
bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT))
bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes)
# Rotate the bev_map
bev_map = cv2.rotate(bev_map, cv2.ROTATE_180)
kitti_dets = convert_det_to_real_values(detections)
print('\tDone testing the {}th sample, time: {:.1f}ms, speed {:.2f}FPS'.format(batch_idx, (t2 - t1) * 1000,
1 / (t2 - t1)))
if configs.save_test_output:
img_fn = os.path.basename(metadatas['bev_path'][0])[:-4]
if configs.output_format == 'image':
cv2.imwrite(os.path.join(configs.results_dir_img, '{}.jpg'.format(img_fn)), bev_map)
elif configs.output_format == 'video':
if out_cap is None:
out_cap_h, out_cap_w = bev_map.shape[:2]
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out_cap = cv2.VideoWriter(
os.path.join(configs.results_dir_img, '{}.avi'.format(configs.output_video_fn)),
fourcc, 30, (out_cap_w, out_cap_h))
out_cap.write(bev_map)
else:
pass
txt_path = os.path.join(configs.results_dir,'{}.txt'.format(img_fn))
txt_file = open(txt_path, 'w')
for det in kitti_dets:
write_line = cnf.CLASS_ID_TO_NAME[det[0]] + ' 0 0 0 0 0 0 0 ' + str(det[1]) + ' ' + str(det[2]) +\
' ' + str(det[3]) + ' ' + str(det[4]) + ' ' + str(det[5]) + ' ' + str(det[6]) + ' ' + str(det[7]) +'\n'
txt_file.writelines(write_line)
txt_file.close()
if out_cap:
out_cap.release()
cv2.destroyAllWindows()

+ 0
- 0
point-cloud/sfa/losses/__init__.py View File


+ 163
- 0
point-cloud/sfa/losses/losses.py View File

@@ -0,0 +1,163 @@
# ------------------------------------------------------------------------------
# Portions of this code are from
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# Modified by Nguyen Mau Dung (2020.08.09)
# ------------------------------------------------------------------------------
import os
import sys
import math
import torch.nn as nn
import torch
import torch.nn.functional as F
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from utils.torch_utils import to_cpu, _sigmoid
def _gather_feat(feat, ind, mask=None):
dim = feat.size(2)
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
feat = feat.gather(1, ind)
if mask is not None:
mask = mask.unsqueeze(2).expand_as(feat)
feat = feat[mask]
feat = feat.view(-1, dim)
return feat
def _transpose_and_gather_feat(feat, ind):
feat = feat.permute(0, 2, 3, 1).contiguous()
feat = feat.view(feat.size(0), -1, feat.size(3))
feat = _gather_feat(feat, ind)
return feat
def _neg_loss(pred, gt, alpha=2, beta=4):
''' Modified focal loss. Exactly the same as CornerNet.
Runs faster and costs a little bit more memory
Arguments:
pred (batch x c x h x w)
gt_regr (batch x c x h x w)
'''
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
neg_weights = torch.pow(1 - gt, beta)
loss = 0
pos_loss = torch.log(pred) * torch.pow(1 - pred, alpha) * pos_inds
neg_loss = torch.log(1 - pred) * torch.pow(pred, alpha) * neg_weights * neg_inds
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if num_pos == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
class FocalLoss(nn.Module):
'''nn.Module warpper for focal loss'''
def __init__(self):
super(FocalLoss, self).__init__()
self.neg_loss = _neg_loss
def forward(self, out, target):
return self.neg_loss(out, target)
class L1Loss(nn.Module):
def __init__(self):
super(L1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class L1Loss_Balanced(nn.Module):
"""Balanced L1 Loss
paper: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
Code refer from: https://github.com/OceanPang/Libra_R-CNN
"""
def __init__(self, alpha=0.5, gamma=1.5, beta=1.0):
super(L1Loss_Balanced, self).__init__()
self.alpha = alpha
self.gamma = gamma
assert beta > 0
self.beta = beta
def forward(self, output, mask, ind, target):
pred = _transpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
loss = self.balanced_l1_loss(pred * mask, target * mask)
loss = loss.sum() / (mask.sum() + 1e-4)
return loss
def balanced_l1_loss(self, pred, target):
assert pred.size() == target.size() and target.numel() > 0
diff = torch.abs(pred - target)
b = math.exp(self.gamma / self.alpha) - 1
loss = torch.where(diff < self.beta,
self.alpha / b * (b * diff + 1) * torch.log(b * diff / self.beta + 1) - self.alpha * diff,
self.gamma * diff + self.gamma / b - self.alpha * self.beta)
return loss
class Compute_Loss(nn.Module):
def __init__(self, device):
super(Compute_Loss, self).__init__()
self.device = device
self.focal_loss = FocalLoss()
self.l1_loss = L1Loss()
self.l1_loss_balanced = L1Loss_Balanced(alpha=0.5, gamma=1.5, beta=1.0)
self.weight_hm_cen = 1.
self.weight_z_coor, self.weight_cenoff, self.weight_dim, self.weight_direction = 1., 1., 1., 1.
def forward(self, outputs, tg):
# tg: targets
outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
l_hm_cen = self.focal_loss(outputs['hm_cen'], tg['hm_cen'])
l_cen_offset = self.l1_loss(outputs['cen_offset'], tg['obj_mask'], tg['indices_center'], tg['cen_offset'])
l_direction = self.l1_loss(outputs['direction'], tg['obj_mask'], tg['indices_center'], tg['direction'])
# Apply the L1_loss balanced for z coor and dimension regression
l_z_coor = self.l1_loss_balanced(outputs['z_coor'], tg['obj_mask'], tg['indices_center'], tg['z_coor'])
l_dim = self.l1_loss_balanced(outputs['dim'], tg['obj_mask'], tg['indices_center'], tg['dim'])
total_loss = l_hm_cen * self.weight_hm_cen + l_cen_offset * self.weight_cenoff + \
l_dim * self.weight_dim + l_direction * self.weight_direction + \
l_z_coor * self.weight_z_coor
loss_stats = {
'total_loss': to_cpu(total_loss).item(),
'hm_cen_loss': to_cpu(l_hm_cen).item(),
'cen_offset_loss': to_cpu(l_cen_offset).item(),
'dim_loss': to_cpu(l_dim).item(),
'direction_loss': to_cpu(l_direction).item(),
'z_coor_loss': to_cpu(l_z_coor).item(),
}
return total_loss, loss_stats

+ 0
- 0
point-cloud/sfa/models/__init__.py View File


+ 252
- 0
point-cloud/sfa/models/fpn_resnet.py View File

@@ -0,0 +1,252 @@
"""
# ---------------------------------------------------------------------------------
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Xingyi Zhou
# Refer from: https://github.com/xingyizhou/CenterNet
# Modifier: Nguyen Mau Dung (2020.08.09)
# ------------------------------------------------------------------------------
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
import torch.nn.functional as F
BN_MOMENTUM = 0.1
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv, **kwargs):
self.inplanes = 64
self.deconv_with_bias = False
self.heads = heads
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.conv_up_level1 = nn.Conv2d(768, 256, kernel_size=1, stride=1, padding=0)
self.conv_up_level2 = nn.Conv2d(384, 128, kernel_size=1, stride=1, padding=0)
self.conv_up_level3 = nn.Conv2d(192, 64, kernel_size=1, stride=1, padding=0)
fpn_channels = [256, 128, 64]
for fpn_idx, fpn_c in enumerate(fpn_channels):
for head in sorted(self.heads):
num_output = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(fpn_c, head_conv, kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, num_output, kernel_size=1, stride=1, padding=0))
else:
fc = nn.Conv2d(in_channels=fpn_c, out_channels=num_output, kernel_size=1, stride=1, padding=0)
self.__setattr__('fpn{}_{}'.format(fpn_idx, head), fc)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
_, _, input_h, input_w = x.size()
hm_h, hm_w = input_h // 4, input_w // 4
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
out_layer1 = self.layer1(x)
out_layer2 = self.layer2(out_layer1)
out_layer3 = self.layer3(out_layer2)
out_layer4 = self.layer4(out_layer3)
# up_level1: torch.Size([b, 512, 14, 14])
up_level1 = F.interpolate(out_layer4, scale_factor=2, mode='bilinear', align_corners=True)
concat_level1 = torch.cat((up_level1, out_layer3), dim=1)
# up_level2: torch.Size([b, 256, 28, 28])
up_level2 = F.interpolate(self.conv_up_level1(concat_level1), scale_factor=2, mode='bilinear',
align_corners=True)
concat_level2 = torch.cat((up_level2, out_layer2), dim=1)
# up_level3: torch.Size([b, 128, 56, 56]),
up_level3 = F.interpolate(self.conv_up_level2(concat_level2), scale_factor=2, mode='bilinear',
align_corners=True)
# up_level4: torch.Size([b, 64, 56, 56])
up_level4 = self.conv_up_level3(torch.cat((up_level3, out_layer1), dim=1))
ret = {}
for head in self.heads:
temp_outs = []
for fpn_idx, fdn_input in enumerate([up_level2, up_level3, up_level4]):
fpn_out = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))(fdn_input)
_, _, fpn_out_h, fpn_out_w = fpn_out.size()
# Make sure the added features having same size of heatmap output
if (fpn_out_w != hm_w) or (fpn_out_h != hm_h):
fpn_out = F.interpolate(fpn_out, size=(hm_h, hm_w))
temp_outs.append(fpn_out)
# Take the softmax in the keypoint feature pyramid network
final_out = self.apply_kfpn(temp_outs)
ret[head] = final_out
return ret
def apply_kfpn(self, outs):
outs = torch.cat([out.unsqueeze(-1) for out in outs], dim=-1)
softmax_outs = F.softmax(outs, dim=-1)
ret_outs = (outs * softmax_outs).sum(dim=-1)
return ret_outs
def init_weights(self, num_layers, pretrained=True):
if pretrained:
# TODO: Check initial weights for head later
for fpn_idx in [0, 1, 2]: # 3 FPN layers
for head in self.heads:
final_layer = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))
for i, m in enumerate(final_layer.modules()):
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
if m.weight.shape[0] == self.heads[head]:
if 'hm' in head:
nn.init.constant_(m.bias, -2.19)
else:
nn.init.normal_(m.weight, std=0.001)
nn.init.constant_(m.bias, 0)
# pretrained_state_dict = torch.load(pretrained)
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers, pretrained=imagenet_pretrained)
return model

+ 134
- 0
point-cloud/sfa/models/model_utils.py View File

@@ -0,0 +1,134 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.09
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: utils functions that use for model
"""
import os
import sys
import torch
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from models import resnet, fpn_resnet
def create_model(configs):
"""Create model based on architecture name"""
try:
arch_parts = configs.arch.split('_')
num_layers = int(arch_parts[-1])
except:
raise ValueError
if 'fpn_resnet' in configs.arch:
print('using ResNet architecture with feature pyramid')
model = fpn_resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv,
imagenet_pretrained=configs.imagenet_pretrained)
elif 'resnet' in configs.arch:
print('using ResNet architecture')
model = resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv,
imagenet_pretrained=configs.imagenet_pretrained)
else:
assert False, 'Undefined model backbone'
return model
def get_num_parameters(model):
"""Count number of trained parameters of the model"""
if hasattr(model, 'module'):
num_parameters = sum(p.numel() for p in model.module.parameters() if p.requires_grad)
else:
num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
return num_parameters
def make_data_parallel(model, configs):
if configs.distributed:
# For multiprocessing distributed, DistributedDataParallel constructor
# should always set the single device scope, otherwise,
# DistributedDataParallel will use all available devices.
if configs.gpu_idx is not None:
torch.cuda.set_device(configs.gpu_idx)
model.cuda(configs.gpu_idx)
# When using a single GPU per process and per
# DistributedDataParallel, we need to divide the batch size
# ourselves based on the total number of GPUs we have
configs.batch_size = int(configs.batch_size / configs.ngpus_per_node)
configs.num_workers = int((configs.num_workers + configs.ngpus_per_node - 1) / configs.ngpus_per_node)
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[configs.gpu_idx])
else:
model.cuda()
# DistributedDataParallel will divide and allocate batch_size to all
# available GPUs if device_ids are not set
model = torch.nn.parallel.DistributedDataParallel(model)
elif configs.gpu_idx is not None:
torch.cuda.set_device(configs.gpu_idx)
model = model.cuda(configs.gpu_idx)
else:
# DataParallel will divide and allocate batch_size to all available GPUs
model = torch.nn.DataParallel(model).cuda()
return model
if __name__ == '__main__':
import argparse
from torchsummary import summary
from easydict import EasyDict as edict
parser = argparse.ArgumentParser(description='RTM3D Implementation')
parser.add_argument('-a', '--arch', type=str, default='resnet_18', metavar='ARCH',
help='The name of the model architecture')
parser.add_argument('--head_conv', type=int, default=-1,
help='conv layer channels for output head'
'0 for no conv layer'
'-1 for default setting: '
'64 for resnets and 256 for dla.')
configs = edict(vars(parser.parse_args()))
if configs.head_conv == -1: # init default head_conv
configs.head_conv = 256 if 'dla' in configs.arch else 64
configs.num_classes = 3
configs.num_vertexes = 8
configs.num_center_offset = 2
configs.num_vertexes_offset = 2
configs.num_dimension = 3
configs.num_rot = 8
configs.num_depth = 1
configs.num_wh = 2
configs.heads = {
'hm_mc': configs.num_classes,
'hm_ver': configs.num_vertexes,
'vercoor': configs.num_vertexes * 2,
'cenoff': configs.num_center_offset,
'veroff': configs.num_vertexes_offset,
'dim': configs.num_dimension,
'rot': configs.num_rot,
'depth': configs.num_depth,
'wh': configs.num_wh
}
configs.device = torch.device('cuda:1')
# configs.device = torch.device('cpu')
model = create_model(configs).to(device=configs.device)
sample_input = torch.randn((1, 3, 224, 224)).to(device=configs.device)
# summary(model.cuda(1), (3, 224, 224))
output = model(sample_input)
for hm_name, hm_out in output.items():
print('hm_name: {}, hm_out size: {}'.format(hm_name, hm_out.size()))
print('number of parameters: {}'.format(get_num_parameters(model)))

+ 284
- 0
point-cloud/sfa/models/resnet.py View File

@@ -0,0 +1,284 @@
"""
# ---------------------------------------------------------------------------------
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Xingyi Zhou
# Refer from: https://github.com/xingyizhou/CenterNet
# Modifier: Nguyen Mau Dung (2020.08.09)
# ------------------------------------------------------------------------------
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
BN_MOMENTUM = 0.1
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv, **kwargs):
self.inplanes = 64
self.deconv_with_bias = False
self.heads = heads
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 256, 256],
[4, 4, 4],
)
# self.final_layer = []
for head in sorted(self.heads):
num_output = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(256, head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, num_output,
kernel_size=1, stride=1, padding=0))
else:
fc = nn.Conv2d(
in_channels=256,
out_channels=num_output,
kernel_size=1,
stride=1,
padding=0
)
self.__setattr__(head, fc)
# self.final_layer = nn.ModuleList(self.final_layer)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
layers.append(
nn.ConvTranspose2d(
in_channels=self.inplanes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias))
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.deconv_layers(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return ret
def init_weights(self, num_layers, pretrained=True):
if pretrained:
# print('=> init resnet deconv weights from normal distribution')
for _, m in self.deconv_layers.named_modules():
if isinstance(m, nn.ConvTranspose2d):
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.normal_(m.weight, std=0.001)
if self.deconv_with_bias:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
# print('=> init {}.weight as 1'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# print('=> init final conv weights from normal distribution')
for head in self.heads:
final_layer = self.__getattr__(head)
for i, m in enumerate(final_layer.modules()):
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
if m.weight.shape[0] == self.heads[head]:
if 'hm' in head:
nn.init.constant_(m.bias, -2.19)
else:
nn.init.normal_(m.weight, std=0.001)
nn.init.constant_(m.bias, 0)
# pretrained_state_dict = torch.load(pretrained)
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers, pretrained=imagenet_pretrained)
return model

+ 290
- 0
point-cloud/sfa/train.py View File

@@ -0,0 +1,290 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: This script for training
"""
import time
import numpy as np
import sys
import random
import os
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import torch
from torch.utils.tensorboard import SummaryWriter
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.utils.data.distributed
from tqdm import tqdm
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from data_process.kitti_dataloader import create_train_dataloader, create_val_dataloader
from models.model_utils import create_model, make_data_parallel, get_num_parameters
from utils.train_utils import create_optimizer, create_lr_scheduler, get_saved_state, save_checkpoint
from utils.torch_utils import reduce_tensor, to_python_float
from utils.misc import AverageMeter, ProgressMeter
from utils.logger import Logger
from config.train_config import parse_train_configs
from losses.losses import Compute_Loss
def main():
configs = parse_train_configs()
# Re-produce results
if configs.seed is not None:
random.seed(configs.seed)
np.random.seed(configs.seed)
torch.manual_seed(configs.seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
if configs.gpu_idx is not None:
print('You have chosen a specific GPU. This will completely disable data parallelism.')
if configs.dist_url == "env://" and configs.world_size == -1:
configs.world_size = int(os.environ["WORLD_SIZE"])
configs.distributed = configs.world_size > 1 or configs.multiprocessing_distributed
if configs.multiprocessing_distributed:
configs.world_size = configs.ngpus_per_node * configs.world_size
mp.spawn(main_worker, nprocs=configs.ngpus_per_node, args=(configs,))
else:
main_worker(configs.gpu_idx, configs)
def main_worker(gpu_idx, configs):
configs.gpu_idx = gpu_idx
# configs.device = torch.device('cpu' if configs.gpu_idx is None else 'cuda:{}'.format(configs.gpu_idx))
if configs.distributed:
if configs.dist_url == "env://" and configs.rank == -1:
configs.rank = int(os.environ["RANK"])
if configs.multiprocessing_distributed:
# For multiprocessing distributed training, rank needs to be the
# global rank among all the processes
configs.rank = configs.rank * configs.ngpus_per_node + gpu_idx
dist.init_process_group(backend=configs.dist_backend, init_method=configs.dist_url,
world_size=configs.world_size, rank=configs.rank)
configs.subdivisions = int(64 / configs.batch_size / configs.ngpus_per_node)
else:
configs.subdivisions = int(64 / configs.batch_size)
configs.is_master_node = (not configs.distributed) or (
configs.distributed and (configs.rank % configs.ngpus_per_node == 0))
if configs.is_master_node:
logger = Logger(configs.logs_dir, configs.saved_fn)
logger.info('>>> Created a new logger')
logger.info('>>> configs: {}'.format(configs))
tb_writer = SummaryWriter(log_dir=os.path.join(configs.logs_dir, 'tensorboard'))
else:
logger = None
tb_writer = None
# model
model = create_model(configs)
# load weight from a checkpoint
if configs.pretrained_path is not None:
# assert os.path.isfile(configs.pretrained_path), "=> no checkpoint found at '{}'".format(configs.pretrained_path)
if os.path.isfile(configs.pretrained_path):
model_path = configs.pretrained_path
else:
# 取最后一个模型
model_path = os.path.join(configs.pretrained_path, os.listdir(configs.pretrained_path)[-1])
model.load_state_dict(torch.load(model_path, map_location=configs.device))
if logger is not None:
logger.info('loaded pretrained model at {}'.format(configs.pretrained_path))
# resume weights of model from a checkpoint
if configs.resume_path is not None:
assert os.path.isfile(configs.resume_path), "=> no checkpoint found at '{}'".format(configs.resume_path)
model.load_state_dict(torch.load(configs.resume_path, map_location='cpu'))
if logger is not None:
logger.info('resume training model from checkpoint {}'.format(configs.resume_path))
# Data Parallel
model = make_data_parallel(model, configs)
# Make sure to create optimizer after moving the model to cuda
optimizer = create_optimizer(configs, model)
lr_scheduler = create_lr_scheduler(optimizer, configs)
configs.step_lr_in_epoch = False if configs.lr_type in ['multi_step', 'cosin', 'one_cycle'] else True
# resume optimizer, lr_scheduler from a checkpoint
if configs.resume_path is not None:
utils_path = configs.resume_path.replace('Model_', 'Utils_')
assert os.path.isfile(utils_path), "=> no checkpoint found at '{}'".format(utils_path)
utils_state_dict = torch.load(utils_path, map_location='cuda:{}'.format(configs.gpu_idx))
optimizer.load_state_dict(utils_state_dict['optimizer'])
lr_scheduler.load_state_dict(utils_state_dict['lr_scheduler'])
configs.start_epoch = utils_state_dict['epoch'] + 1
if configs.is_master_node:
num_parameters = get_num_parameters(model)
logger.info('number of trained parameters of the model: {}'.format(num_parameters))
if logger is not None:
logger.info(">>> Loading dataset & getting dataloader...")
# Create dataloader
train_dataloader, train_sampler = create_train_dataloader(configs)
if logger is not None:
logger.info('number of batches in training set: {}'.format(len(train_dataloader)))
if configs.evaluate:
val_dataloader = create_val_dataloader(configs)
val_loss = validate(val_dataloader, model, configs)
print('val_loss: {:.4e}'.format(val_loss))
return
for epoch in range(configs.start_epoch, configs.num_epochs + 1):
if logger is not None:
logger.info('{}'.format('*-' * 40))
logger.info('{} {}/{} {}'.format('=' * 35, epoch, configs.num_epochs, '=' * 35))
logger.info('{}'.format('*-' * 40))
logger.info('>>> Epoch: [{}/{}]'.format(epoch, configs.num_epochs))
if configs.distributed:
train_sampler.set_epoch(epoch)
# train for one epoch
train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer)
if (not configs.no_val) and (epoch % configs.checkpoint_freq == 0):
val_dataloader = create_val_dataloader(configs)
print('number of batches in val_dataloader: {}'.format(len(val_dataloader)))
val_loss = validate(val_dataloader, model, configs)
print('val_loss: {:.4e}'.format(val_loss))
if tb_writer is not None:
tb_writer.add_scalar('Val_loss', val_loss, epoch)
# Save checkpoint
if configs.is_master_node and ((epoch % configs.checkpoint_freq) == 0):
model_state_dict, utils_state_dict = get_saved_state(model, optimizer, lr_scheduler, epoch, configs)
save_checkpoint(configs.checkpoints_dir, configs.saved_fn, model_state_dict, utils_state_dict, epoch)
if not configs.step_lr_in_epoch:
lr_scheduler.step()
if tb_writer is not None:
tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], epoch)
if tb_writer is not None:
tb_writer.close()
if configs.distributed:
cleanup()
def cleanup():
dist.destroy_process_group()
def train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer):
batch_time = AverageMeter('Time', ':6.3f')
data_time = AverageMeter('Data', ':6.3f')
losses = AverageMeter('Loss', ':.4e')
progress = ProgressMeter(len(train_dataloader), [batch_time, data_time, losses],
prefix="Train - Epoch: [{}/{}]".format(epoch, configs.num_epochs))
criterion = Compute_Loss(device=configs.device)
num_iters_per_epoch = len(train_dataloader)
# switch to train mode
model.train()
start_time = time.time()
for batch_idx, batch_data in enumerate(tqdm(train_dataloader)):
data_time.update(time.time() - start_time)
imgs, targets = batch_data
batch_size = imgs.size(0)
global_step = num_iters_per_epoch * (epoch - 1) + batch_idx + 1
for k in targets.keys():
targets[k] = targets[k].to(configs.device, non_blocking=True)
imgs = imgs.to(configs.device, non_blocking=True).float()
outputs = model(imgs)
total_loss, loss_stats = criterion(outputs, targets)
# For torch.nn.DataParallel case
if (not configs.distributed) and (configs.gpu_idx is None):
total_loss = torch.mean(total_loss)
# compute gradient and perform backpropagation
total_loss.backward()
if global_step % configs.subdivisions == 0:
optimizer.step()
# zero the parameter gradients
optimizer.zero_grad()
# Adjust learning rate
if configs.step_lr_in_epoch:
lr_scheduler.step()
if tb_writer is not None:
tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], global_step)
if configs.distributed:
reduced_loss = reduce_tensor(total_loss.data, configs.world_size)
else:
reduced_loss = total_loss.data
losses.update(to_python_float(reduced_loss), batch_size)
# measure elapsed time
# torch.cuda.synchronize()
batch_time.update(time.time() - start_time)
if tb_writer is not None:
if (global_step % configs.tensorboard_freq) == 0:
loss_stats['avg_loss'] = losses.avg
tb_writer.add_scalars('Train', loss_stats, global_step)
# Log message
if logger is not None:
if (global_step % configs.print_freq) == 0:
logger.info(progress.get_message(batch_idx))
start_time = time.time()
def validate(val_dataloader, model, configs):
losses = AverageMeter('Loss', ':.4e')
criterion = Compute_Loss(device=configs.device)
# switch to train mode
model.eval()
with torch.no_grad():
for batch_idx, batch_data in enumerate(tqdm(val_dataloader)):
imgs, targets = batch_data
batch_size = imgs.size(0)
for k in targets.keys():
targets[k] = targets[k].to(configs.device, non_blocking=True)
imgs = imgs.to(configs.device, non_blocking=True).float()
outputs = model(imgs)
total_loss, loss_stats = criterion(outputs, targets)
# For torch.nn.DataParallel case
if (not configs.distributed) and (configs.gpu_idx is None):
total_loss = torch.mean(total_loss)
if configs.distributed:
reduced_loss = reduce_tensor(total_loss.data, configs.world_size)
else:
reduced_loss = total_loss.data
losses.update(to_python_float(reduced_loss), batch_size)
return losses.avg
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
try:
cleanup()
sys.exit(0)
except SystemExit:
os._exit(0)

+ 0
- 0
point-cloud/sfa/utils/__init__.py View File


+ 137
- 0
point-cloud/sfa/utils/demo_utils.py View File

@@ -0,0 +1,137 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: Demonstration utils script
"""
import argparse
import sys
import os
import warnings
import zipfile
warnings.filterwarnings("ignore", category=UserWarning)
from easydict import EasyDict as edict
import numpy as np
import wget
import torch
import cv2
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from utils.misc import make_folder, time_synchronized
from utils.evaluation_utils import decode, post_processing
from utils.torch_utils import _sigmoid
def parse_demo_configs():
parser = argparse.ArgumentParser(description='Demonstration config for the implementation')
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN',
help='The name using for saving logs, models,...')
parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH',
help='The name of the model architecture')
parser.add_argument('--pretrained_path', type=str,
default='../checkpoints/fpn_resnet_18/fpn_resnet_18_epoch_300.pth', metavar='PATH',
help='the path of the pretrained checkpoint')
parser.add_argument('--foldername', type=str, default='2011_09_26_drive_0014_sync', metavar='FN',
help='Folder name for demostration dataset')
parser.add_argument('--K', type=int, default=50,
help='the number of top K')
parser.add_argument('--no_cuda', action='store_true',
help='If true, cuda is not used.')
parser.add_argument('--gpu_idx', default=0, type=int,
help='GPU index to use.')
parser.add_argument('--peak_thresh', type=float, default=0.2)
parser.add_argument('--output_format', type=str, default='image', metavar='PATH',
help='the type of the test output (support image or video)')
parser.add_argument('--output-width', type=int, default=608,
help='the width of showing output, the height maybe vary')
configs = edict(vars(parser.parse_args()))
configs.pin_memory = True
configs.distributed = False # For testing on 1 GPU only
configs.input_size = (608, 608)
configs.hm_size = (152, 152)
configs.down_ratio = 4
configs.max_objects = 50
configs.imagenet_pretrained = False
configs.head_conv = 64
configs.num_classes = 3
configs.num_center_offset = 2
configs.num_z = 1
configs.num_dim = 3
configs.num_direction = 2 # sin, cos
configs.heads = {
'hm_cen': configs.num_classes,
'cen_offset': configs.num_center_offset,
'direction': configs.num_direction,
'z_coor': configs.num_z,
'dim': configs.num_dim
}
####################################################################
##############Dataset, Checkpoints, and results dir configs#########
####################################################################
configs.root_dir = '../'
configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo')
configs.calib_path = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo', 'calib.txt')
configs.results_dir = os.path.join(configs.root_dir, 'results', configs.saved_fn)
make_folder(configs.results_dir)
return configs
def download_and_unzip(demo_dataset_dir, download_url):
filename = download_url.split('/')[-1]
filepath = os.path.join(demo_dataset_dir, filename)
if os.path.isfile(filepath):
print('The dataset have been downloaded')
return
print('\nDownloading data for demonstration...')
wget.download(download_url, filepath)
print('\nUnzipping the downloaded data...')
with zipfile.ZipFile(filepath, "r") as zip_ref:
zip_ref.extractall(os.path.join(demo_dataset_dir, filename[:-4]))
def do_detect(configs, model, bevmap, is_front):
if not is_front:
bevmap = torch.flip(bevmap, [1, 2])
input_bev_maps = bevmap.unsqueeze(0).to(configs.device, non_blocking=True).float()
t1 = time_synchronized()
outputs = model(input_bev_maps)
outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
# detections size (batch_size, K, 10)
detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],
outputs['dim'], K=configs.K)
detections = detections.cpu().numpy().astype(np.float32)
detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh)
t2 = time_synchronized()
# Inference speed
fps = 1 / (t2 - t1)
return detections[0], bevmap, fps
def write_credit(img, org_author=(500, 400), text_author='github.com/maudzung', org_fps=(50, 1000), fps=None):
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
color = (255, 255, 255)
thickness = 2
cv2.putText(img, text_author, org_author, font, fontScale, color, thickness, cv2.LINE_AA)
cv2.putText(img, 'Speed: {:.1f} FPS'.format(fps), org_fps, font, fontScale, color, thickness, cv2.LINE_AA)

+ 183
- 0
point-cloud/sfa/utils/evaluation_utils.py View File

@@ -0,0 +1,183 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.17
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: The utils for evaluation
# Refer from: https://github.com/xingyizhou/CenterNet
"""
from __future__ import division
import os
import sys
import torch
import numpy as np
import torch.nn.functional as F
import cv2
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
import config.kitti_config as cnf
from data_process.kitti_bev_utils import drawRotatedBox
def _nms(heat, kernel=3):
pad = (kernel - 1) // 2
hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
keep = (hmax == heat).float()
return heat * keep
def _gather_feat(feat, ind, mask=None):
dim = feat.size(2)
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
feat = feat.gather(1, ind)
if mask is not None:
mask = mask.unsqueeze(2).expand_as(feat)
feat = feat[mask]
feat = feat.view(-1, dim)
return feat
def _transpose_and_gather_feat(feat, ind):
feat = feat.permute(0, 2, 3, 1).contiguous()
feat = feat.view(feat.size(0), -1, feat.size(3))
feat = _gather_feat(feat, ind)
return feat
def _topk(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
topk_inds = topk_inds % (height * width)
topk_ys = (torch.floor_divide(topk_inds, width)).float()
topk_xs = (topk_inds % width).int().float()
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
topk_clses = (torch.floor_divide(topk_ind, K)).int()
topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def _topk_channel(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
return topk_scores, topk_inds, topk_ys, topk_xs
def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40):
batch_size, num_classes, height, width = hm_cen.size()
hm_cen = _nms(hm_cen)
scores, inds, clses, ys, xs = _topk(hm_cen, K=K)
if cen_offset is not None:
cen_offset = _transpose_and_gather_feat(cen_offset, inds)
cen_offset = cen_offset.view(batch_size, K, 2)
xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1]
ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2]
else:
xs = xs.view(batch_size, K, 1) + 0.5
ys = ys.view(batch_size, K, 1) + 0.5
direction = _transpose_and_gather_feat(direction, inds)
direction = direction.view(batch_size, K, 2)
z_coor = _transpose_and_gather_feat(z_coor, inds)
z_coor = z_coor.view(batch_size, K, 1)
dim = _transpose_and_gather_feat(dim, inds)
dim = dim.view(batch_size, K, 3)
clses = clses.view(batch_size, K, 1).float()
scores = scores.view(batch_size, K, 1)
# (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
# (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
# detections: [batch_size, K, 10]
detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2)
return detections
def get_yaw(direction):
return np.arctan2(direction[:, 0:1], direction[:, 1:2])
def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2):
"""
:param detections: [batch_size, K, 10]
# (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
# (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
:return:
"""
# TODO: Need to consider rescale to the original scale: x, y
ret = []
for i in range(detections.shape[0]):
top_preds = {}
classes = detections[i, :, -1]
for j in range(num_classes):
inds = (classes == j)
# x, y, z, h, w, l, yaw
top_preds[j] = np.concatenate([
detections[i, inds, 0:1],
detections[i, inds, 1:2] * down_ratio,
detections[i, inds, 2:3] * down_ratio,
detections[i, inds, 3:4],
detections[i, inds, 4:5],
detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH,
detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT,
get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1)
# Filter by peak_thresh
if len(top_preds[j]) > 0:
keep_inds = (top_preds[j][:, 0] > peak_thresh)
top_preds[j] = top_preds[j][keep_inds]
ret.append(top_preds)
return ret
def draw_predictions(img, detections, num_classes=3):
for j in range(num_classes):
if len(detections[j]) > 0:
for det in detections[j]:
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
_score, _x, _y, _z, _h, _w, _l, _yaw = det
drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)])
return img
def convert_det_to_real_values(detections, num_classes=3):
kitti_dets = []
for cls_id in range(num_classes):
if len(detections[cls_id]) > 0:
for det in detections[cls_id]:
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
_score, _x, _y, _z, _h, _w, _l, _yaw = det
_yaw = round(-_yaw, 2)
x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2)
y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2)
z = round(_z + cnf.boundary['minZ'], 2)
w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2)
l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2)
h = round(_h/1, 2)
kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw])
return np.array(kitti_dets)

+ 49
- 0
point-cloud/sfa/utils/logger.py View File

@@ -0,0 +1,49 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.07.31
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: This script for logging
"""
import os
import logging
class Logger():
"""
Create logger to save logs during training
Args:
logs_dir:
saved_fn:
Returns:
"""
def __init__(self, logs_dir, saved_fn):
logger_fn = 'logger_{}.txt'.format(saved_fn)
logger_path = os.path.join(logs_dir, logger_fn)
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.INFO)
# formatter = logging.Formatter('%(asctime)s:File %(module)s.py:Func %(funcName)s:Line %(lineno)d:%(levelname)s: %(message)s')
formatter = logging.Formatter(
'%(asctime)s: %(module)s.py - %(funcName)s(), at Line %(lineno)d:%(levelname)s:\n%(message)s')
file_handler = logging.FileHandler(logger_path)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
self.logger.addHandler(file_handler)
self.logger.addHandler(stream_handler)
def info(self, message):
self.logger.info(message)

+ 312
- 0
point-cloud/sfa/utils/lr_scheduler.py View File

@@ -0,0 +1,312 @@
import torch
from torch.optim import SGD, lr_scheduler
import numpy as np
class _LRMomentumScheduler(lr_scheduler._LRScheduler):
def __init__(self, optimizer, last_epoch=-1):
if last_epoch == -1:
for group in optimizer.param_groups:
group.setdefault('initial_momentum', group['momentum'])
else:
for i, group in enumerate(optimizer.param_groups):
if 'initial_momentum' not in group:
raise KeyError("param 'initial_momentum' is not specified "
"in param_groups[{}] when resuming an optimizer".format(i))
self.base_momentums = list(map(lambda group: group['initial_momentum'], optimizer.param_groups))
super().__init__(optimizer, last_epoch)
def get_lr(self):
raise NotImplementedError
def get_momentum(self):
raise NotImplementedError
def step(self, epoch=None):
if epoch is None:
epoch = self.last_epoch + 1
self.last_epoch = epoch
for param_group, lr, momentum in zip(self.optimizer.param_groups, self.get_lr(), self.get_momentum()):
param_group['lr'] = lr
param_group['momentum'] = momentum
class ParameterUpdate(object):
"""A callable class used to define an arbitrary schedule defined by a list.
This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply
the given schedule.
Arguments:
params {list or numpy.array} -- List or numpy array defining parameter schedule.
base_param {float} -- Parameter value used to initialize the optimizer.
"""
def __init__(self, params, base_param):
self.params = np.hstack([params, 0])
self.base_param = base_param
def __call__(self, epoch):
return self.params[epoch] / self.base_param
def apply_lambda(last_epoch, bases, lambdas):
return [base * lmbda(last_epoch) for lmbda, base in zip(lambdas, bases)]
class LambdaScheduler(_LRMomentumScheduler):
"""Sets the learning rate and momentum of each parameter group to the initial lr and momentum
times a given function. When last_epoch=-1, sets initial lr and momentum to the optimizer
values.
Args:
optimizer (Optimizer): Wrapped optimizer.
lr_lambda (function or list): A function which computes a multiplicative
factor given an integer parameter epoch, or a list of such
functions, one for each group in optimizer.param_groups.
Default: lambda x:x.
momentum_lambda (function or list): As for lr_lambda but applied to momentum.
Default: lambda x:x.
last_epoch (int): The index of last epoch. Default: -1.
Example:
>>> # Assuming optimizer has two groups.
>>> lr_lambda = [
... lambda epoch: epoch // 30,
... lambda epoch: 0.95 ** epoch
... ]
>>> mom_lambda = [
... lambda epoch: max(0, (50 - epoch) // 50),
... lambda epoch: 0.99 ** epoch
... ]
>>> scheduler = LambdaScheduler(optimizer, lr_lambda, mom_lambda)
>>> for epoch in range(100):
>>> train(...)
>>> validate(...)
>>> scheduler.step()
"""
def __init__(self, optimizer, lr_lambda=lambda x: x, momentum_lambda=lambda x: x, last_epoch=-1):
self.optimizer = optimizer
if not isinstance(lr_lambda, (list, tuple)):
self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups)
else:
if len(lr_lambda) != len(optimizer.param_groups):
raise ValueError("Expected {} lr_lambdas, but got {}".format(
len(optimizer.param_groups), len(lr_lambda)))
self.lr_lambdas = list(lr_lambda)
if not isinstance(momentum_lambda, (list, tuple)):
self.momentum_lambdas = [momentum_lambda] * len(optimizer.param_groups)
else:
if len(momentum_lambda) != len(optimizer.param_groups):
raise ValueError("Expected {} momentum_lambdas, but got {}".format(
len(optimizer.param_groups), len(momentum_lambda)))
self.momentum_lambdas = list(momentum_lambda)
self.last_epoch = last_epoch
super().__init__(optimizer, last_epoch)
def state_dict(self):
"""Returns the state of the scheduler as a :class:`dict`.
It contains an entry for every variable in self.__dict__ which
is not the optimizer.
The learning rate and momentum lambda functions will only be saved if they are
callable objects and not if they are functions or lambdas.
"""
state_dict = {key: value for key, value in self.__dict__.items()
if key not in ('optimizer', 'lr_lambdas', 'momentum_lambdas')}
state_dict['lr_lambdas'] = [None] * len(self.lr_lambdas)
state_dict['momentum_lambdas'] = [None] * len(self.momentum_lambdas)
for idx, (lr_fn, mom_fn) in enumerate(zip(self.lr_lambdas, self.momentum_lambdas)):
if not isinstance(lr_fn, types.FunctionType):
state_dict['lr_lambdas'][idx] = lr_fn.__dict__.copy()
if not isinstance(mom_fn, types.FunctionType):
state_dict['momentum_lambdas'][idx] = mom_fn.__dict__.copy()
return state_dict
def load_state_dict(self, state_dict):
"""Loads the schedulers state.
Arguments:
state_dict (dict): scheduler state. Should be an object returned
from a call to :meth:`state_dict`.
"""
lr_lambdas = state_dict.pop('lr_lambdas')
momentum_lambdas = state_dict.pop('momentum_lambdas')
self.__dict__.update(state_dict)
for idx, fn in enumerate(lr_lambdas):
if fn is not None:
self.lr_lambdas[idx].__dict__.update(fn)
for idx, fn in enumerate(momentum_lambdas):
if fn is not None:
self.momentum_lambdas[idx].__dict__.update(fn)
def get_lr(self):
return apply_lambda(self.last_epoch, self.base_lrs, self.lr_lambdas)
def get_momentum(self):
return apply_lambda(self.last_epoch, self.base_momentums, self.momentum_lambdas)
class ParameterUpdate(object):
"""A callable class used to define an arbitrary schedule defined by a list.
This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply
the given schedule. If a base_param is zero, no updates are applied.
Arguments:
params {list or numpy.array} -- List or numpy array defining parameter schedule.
base_param {float} -- Parameter value used to initialize the optimizer.
"""
def __init__(self, params, base_param):
self.params = np.hstack([params, 0])
self.base_param = base_param
if base_param < 1e-12:
self.base_param = 1
self.params = self.params * 0.0 + 1.0
def __call__(self, epoch):
return self.params[epoch] / self.base_param
class ListScheduler(LambdaScheduler):
"""Sets the learning rate and momentum of each parameter group to values defined by lists.
When last_epoch=-1, sets initial lr and momentum to the optimizer values. One of both of lr
and momentum schedules may be specified.
Note that the parameters used to initialize the optimizer are overriden by those defined by
this scheduler.
Args:
optimizer (Optimizer): Wrapped optimizer.
lrs (list or numpy.ndarray): A list of learning rates, or a list of lists, one for each
parameter group. One- or two-dimensional numpy arrays may also be passed.
momentum (list or numpy.ndarray): A list of momentums, or a list of lists, one for each
parameter group. One- or two-dimensional numpy arrays may also be passed.
last_epoch (int): The index of last epoch. Default: -1.
Example:
>>> # Assuming optimizer has two groups.
>>> lrs = [
... np.linspace(0.01, 0.1, 100),
... np.logspace(-2, 0, 100)
... ]
>>> momentums = [
... np.linspace(0.85, 0.95, 100),
... np.linspace(0.8, 0.99, 100)
... ]
>>> scheduler = ListScheduler(optimizer, lrs, momentums)
>>> for epoch in range(100):
>>> train(...)
>>> validate(...)
>>> scheduler.step()
"""
def __init__(self, optimizer, lrs=None, momentums=None, last_epoch=-1):
groups = optimizer.param_groups
if lrs is None:
lr_lambda = lambda x: x
else:
lrs = np.array(lrs) if isinstance(lrs, (list, tuple)) else lrs
if len(lrs.shape) == 1:
lr_lambda = [ParameterUpdate(lrs, g['lr']) for g in groups]
else:
lr_lambda = [ParameterUpdate(l, g['lr']) for l, g in zip(lrs, groups)]
if momentums is None:
momentum_lambda = lambda x: x
else:
momentums = np.array(momentums) if isinstance(momentums, (list, tuple)) else momentums
if len(momentums.shape) == 1:
momentum_lambda = [ParameterUpdate(momentums, g['momentum']) for g in groups]
else:
momentum_lambda = [ParameterUpdate(l, g['momentum']) for l, g in zip(momentums, groups)]
super().__init__(optimizer, lr_lambda, momentum_lambda)
class RangeFinder(ListScheduler):
"""Scheduler class that implements the LR range search specified in:
A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch
size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820.
Logarithmically spaced learning rates from 1e-7 to 1 are searched. The number of increments in
that range is determined by 'epochs'.
Note that the parameters used to initialize the optimizer are overriden by those defined by
this scheduler.
Args:
optimizer (Optimizer): Wrapped optimizer.
epochs (int): Number of epochs over which to run test.
Example:
>>> scheduler = RangeFinder(optimizer, 100)
>>> for epoch in range(100):
>>> train(...)
>>> validate(...)
>>> scheduler.step()
"""
def __init__(self, optimizer, epochs):
lrs = np.logspace(-7, 0, epochs)
super().__init__(optimizer, lrs)
class OneCyclePolicy(ListScheduler):
"""Scheduler class that implements the 1cycle policy search specified in:
A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch
size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820.
Args:
optimizer (Optimizer): Wrapped optimizer.
lr (float or list). Maximum learning rate in range. If a list of values is passed, they
should correspond to parameter groups.
epochs (int): The number of epochs to use during search.
momentum_rng (list). Optional upper and lower momentum values (may be both equal). Set to
None to run without momentum. Default: [0.85, 0.95]. If a list of lists is passed, they
should correspond to parameter groups.
phase_ratio (float): Fraction of epochs used for the increasing and decreasing phase of
the schedule. For example, if phase_ratio=0.45 and epochs=100, the learning rate will
increase from lr/10 to lr over 45 epochs, then decrease back to lr/10 over 45 epochs,
then decrease to lr/100 over the remaining 10 epochs. Default: 0.45.
"""
def __init__(self, optimizer, lr, epochs, momentum_rng=[0.85, 0.95], phase_ratio=0.45):
phase_epochs = int(phase_ratio * epochs)
if isinstance(lr, (list, tuple)):
lrs = [
np.hstack([
np.linspace(l * 1e-1, l, phase_epochs),
np.linspace(l, l * 1e-1, phase_epochs),
np.linspace(l * 1e-1, l * 1e-2, epochs - 2 * phase_epochs),
]) for l in lr
]
else:
lrs = np.hstack([
np.linspace(lr * 1e-1, lr, phase_epochs),
np.linspace(lr, lr * 1e-1, phase_epochs),
np.linspace(lr * 1e-1, lr * 1e-2, epochs - 2 * phase_epochs),
])
if momentum_rng is not None:
momentum_rng = np.array(momentum_rng)
if len(momentum_rng.shape) == 2:
for i, g in enumerate(optimizer.param_groups):
g['momentum'] = momentum_rng[i][1]
momentums = [
np.hstack([
np.linspace(m[1], m[0], phase_epochs),
np.linspace(m[0], m[1], phase_epochs),
np.linspace(m[1], m[1], epochs - 2 * phase_epochs),
]) for m in momentum_rng
]
else:
for i, g in enumerate(optimizer.param_groups):
g['momentum'] = momentum_rng[1]
momentums = np.hstack([
np.linspace(momentum_rng[1], momentum_rng[0], phase_epochs),
np.linspace(momentum_rng[0], momentum_rng[1], phase_epochs),
np.linspace(momentum_rng[1], momentum_rng[1], epochs - 2 * phase_epochs),
])
else:
momentums = None
super().__init__(optimizer, lrs, momentums)

+ 71
- 0
point-cloud/sfa/utils/misc.py View File

@@ -0,0 +1,71 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.07.31
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: This script for logging
"""
import os
import torch
import time
def make_folder(folder_name):
if not os.path.exists(folder_name):
os.makedirs(folder_name)
# or os.makedirs(folder_name, exist_ok=True)
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self, name, fmt=':f'):
self.name = name
self.fmt = fmt
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def __str__(self):
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
return fmtstr.format(**self.__dict__)
class ProgressMeter(object):
def __init__(self, num_batches, meters, prefix=""):
self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
self.meters = meters
self.prefix = prefix
def display(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
print('\t'.join(entries))
def get_message(self, batch):
entries = [self.prefix + self.batch_fmtstr.format(batch)]
entries += [str(meter) for meter in self.meters]
return '\t'.join(entries)
def _get_batch_fmtstr(self, num_batches):
num_digits = len(str(num_batches // 1))
fmt = '{:' + str(num_digits) + 'd}'
return '[' + fmt + '/' + fmt.format(num_batches) + ']'
def time_synchronized():
torch.cuda.synchronize() if torch.cuda.is_available() else None
return time.time()

+ 45
- 0
point-cloud/sfa/utils/torch_utils.py View File

@@ -0,0 +1,45 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.09
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: some utilities of torch (conversion)
-----------------------------------------------------------------------------------
"""
import torch
import torch.distributed as dist
__all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu', 'reduce_tensor', 'to_python_float', '_sigmoid']
def convert2cpu(gpu_matrix):
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
def convert2cpu_long(gpu_matrix):
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
def to_cpu(tensor):
return tensor.detach().cpu()
def reduce_tensor(tensor, world_size):
rt = tensor.clone()
dist.all_reduce(rt, op=dist.reduce_op.SUM)
rt /= world_size
return rt
def to_python_float(t):
if hasattr(t, 'item'):
return t.item()
else:
return t[0]
def _sigmoid(x):
return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)

+ 140
- 0
point-cloud/sfa/utils/train_utils.py View File

@@ -0,0 +1,140 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.09
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: utils functions that use for training process
"""
import copy
import os
import math
import sys
import torch
from torch.optim.lr_scheduler import LambdaLR
import matplotlib.pyplot as plt
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
from utils.lr_scheduler import OneCyclePolicy
def create_optimizer(configs, model):
"""Create optimizer for training process
"""
if hasattr(model, 'module'):
train_params = [param for param in model.module.parameters() if param.requires_grad]
else:
train_params = [param for param in model.parameters() if param.requires_grad]
if configs.optimizer_type == 'sgd':
optimizer = torch.optim.SGD(train_params, lr=configs.lr, momentum=configs.momentum, nesterov=True)
elif configs.optimizer_type == 'adam':
optimizer = torch.optim.Adam(train_params, lr=configs.lr, weight_decay=configs.weight_decay)
else:
assert False, "Unknown optimizer type"
return optimizer
def create_lr_scheduler(optimizer, configs):
"""Create learning rate scheduler for training process"""
if configs.lr_type == 'multi_step':
def multi_step_scheduler(i):
if i < configs.steps[0]:
factor = 1.
elif i < configs.steps[1]:
factor = 0.1
else:
factor = 0.01
return factor
lr_scheduler = LambdaLR(optimizer, multi_step_scheduler)
elif configs.lr_type == 'cosin':
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
lf = lambda x: (((1 + math.cos(x * math.pi / configs.num_epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine
lr_scheduler = LambdaLR(optimizer, lr_lambda=lf)
elif configs.lr_type == 'one_cycle':
lr_scheduler = OneCyclePolicy(optimizer, configs.lr, configs.num_epochs, momentum_rng=[0.85, 0.95],
phase_ratio=0.45)
else:
raise ValueError
plot_lr_scheduler(optimizer, lr_scheduler, configs.num_epochs, save_dir=configs.logs_dir, lr_type=configs.lr_type)
return lr_scheduler
def get_saved_state(model, optimizer, lr_scheduler, epoch, configs):
"""Get the information to save with checkpoints"""
if hasattr(model, 'module'):
model_state_dict = model.module.state_dict()
else:
model_state_dict = model.state_dict()
utils_state_dict = {
'epoch': epoch,
'configs': configs,
'optimizer': copy.deepcopy(optimizer.state_dict()),
'lr_scheduler': copy.deepcopy(lr_scheduler.state_dict())
}
return model_state_dict, utils_state_dict
def save_checkpoint(checkpoints_dir, saved_fn, model_state_dict, utils_state_dict, epoch):
"""Save checkpoint every epoch only is best model or after every checkpoint_freq epoch"""
model_save_path = os.path.join(checkpoints_dir, 'Model_{}_epoch_{}.pth'.format(saved_fn, epoch))
utils_save_path = os.path.join(checkpoints_dir, 'Utils_{}_epoch_{}.pth'.format(saved_fn, epoch))
torch.save(model_state_dict, model_save_path)
torch.save(utils_state_dict, utils_save_path)
print('save a checkpoint at {}'.format(model_save_path))
def plot_lr_scheduler(optimizer, scheduler, num_epochs=300, save_dir='', lr_type=''):
# Plot LR simulating training for full num_epochs
optimizer, scheduler = copy.copy(optimizer), copy.copy(scheduler) # do not modify originals
y = []
for _ in range(num_epochs):
scheduler.step()
y.append(optimizer.param_groups[0]['lr'])
plt.plot(y, '.-', label='LR')
plt.xlabel('epoch')
plt.ylabel('LR')
plt.grid()
plt.xlim(0, num_epochs)
plt.ylim(0)
plt.tight_layout()
plt.savefig(os.path.join(save_dir, 'LR_{}.png'.format(lr_type)), dpi=200)
if __name__ == '__main__':
from easydict import EasyDict as edict
from torchvision.models import resnet18
configs = edict()
configs.steps = [150, 180]
configs.lr_type = 'one_cycle' # multi_step, cosin, one_csycle
configs.logs_dir = '../../logs/'
configs.num_epochs = 50
configs.lr = 2.25e-3
net = resnet18()
optimizer = torch.optim.Adam(net.parameters(), 0.0002)
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 6, 9], gamma=0.1)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.1)
scheduler = create_lr_scheduler(optimizer, configs)
for i in range(configs.num_epochs):
print(i, scheduler.get_lr())
scheduler.step()

+ 154
- 0
point-cloud/sfa/utils/visualization_utils.py View File

@@ -0,0 +1,154 @@
"""
# -*- coding: utf-8 -*-
-----------------------------------------------------------------------------------
# Author: Nguyen Mau Dung
# DoC: 2020.08.09
# email: nguyenmaudung93.kstn@gmail.com
-----------------------------------------------------------------------------------
# Description: The utils of the kitti dataset
"""
from __future__ import print_function
import os
import sys
import numpy as np
import cv2
src_dir = os.path.dirname(os.path.realpath(__file__))
# while not src_dir.endswith("sfa"):
# src_dir = os.path.dirname(src_dir)
if src_dir not in sys.path:
sys.path.append(src_dir)
import config.kitti_config as cnf
def roty(angle):
# Rotation about the y-axis.
c = np.cos(angle)
s = np.sin(angle)
return np.array([[c, 0, s],
[0, 1, 0],
[-s, 0, c]])
def compute_box_3d(dim, location, ry):
# dim: 3
# location: 3
# ry: 1
# return: 8 x 3
R = roty(ry)
h, w, l = dim
x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
corners_3d = np.dot(R, corners)
corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1)
return corners_3d.transpose(1, 0)
def project_to_image(pts_3d, P):
# pts_3d: n x 3
# P: 3 x 4
# return: n x 2
pts_3d_homo = np.concatenate([pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
return pts_2d.astype(np.int)
def draw_box_3d_v2(image, qs, color=(255, 0, 255), thickness=2):
''' Draw 3d bounding box in image
qs: (8,3) array of vertices for the 3d box in following order:
1 -------- 0
/| /|
2 -------- 3 .
| | | |
. 5 -------- 4
|/ |/
6 -------- 7
'''
qs = qs.astype(np.int32)
for k in range(0, 4):
# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
i, j = k, (k + 1) % 4
# use LINE_AA for opencv3
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness)
i, j = k + 4, (k + 1) % 4 + 4
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness)
i, j = k, k + 4
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness)
return image
def draw_box_3d(image, corners, color=(0, 0, 255)):
''' Draw 3d bounding box in image
corners: (8,3) array of vertices for the 3d box in following order:
1 -------- 0
/| /|
2 -------- 3 .
| | | |
. 5 -------- 4
|/ |/
6 -------- 7
'''
face_idx = [[0, 1, 5, 4],
[1, 2, 6, 5],
[2, 3, 7, 6],
[3, 0, 4, 7]]
for ind_f in range(3, -1, -1):
f = face_idx[ind_f]
for j in range(4):
cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
(corners[f[(j + 1) % 4], 0], corners[f[(j + 1) % 4], 1]), color, 2, lineType=cv2.LINE_AA)
if ind_f == 0:
cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
(corners[f[2], 0], corners[f[2], 1]), color, 1, lineType=cv2.LINE_AA)
cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
(corners[f[3], 0], corners[f[3], 1]), color, 1, lineType=cv2.LINE_AA)
return image
def show_rgb_image_with_boxes(img, labels, calib):
for box_idx, label in enumerate(labels):
cls_id, location, dim, ry = label[0], label[1:4], label[4:7], label[7]
if location[2] < 2.0: # The object is too close to the camera, ignore it during visualization
continue
if cls_id < 0:
continue
corners_3d = compute_box_3d(dim, location, ry)
corners_2d = project_to_image(corners_3d, calib.P2)
img = draw_box_3d(img, corners_2d, color=cnf.colors[int(cls_id)])
return img
def merge_rgb_to_bev(img_rgb, img_bev, output_width):
img_rgb_h, img_rgb_w = img_rgb.shape[:2]
ratio_rgb = output_width / img_rgb_w
output_rgb_h = int(ratio_rgb * img_rgb_h)
ret_img_rgb = cv2.resize(img_rgb, (output_width, output_rgb_h))
img_bev_h, img_bev_w = img_bev.shape[:2]
ratio_bev = output_width / img_bev_w
output_bev_h = int(ratio_bev * img_bev_h)
ret_img_bev = cv2.resize(img_bev, (output_width, output_bev_h))
out_img = np.zeros((output_rgb_h + output_bev_h, output_width, 3), dtype=np.uint8)
# Upper: RGB --> BEV
out_img[:output_rgb_h, ...] = ret_img_rgb
out_img[output_rgb_h:, ...] = ret_img_bev
return out_img

Loading…
Cancel
Save