@@ -1 +0,0 @@ | |||
Subproject commit e4c429e813608acbcf487656abe2eb87dcc4636c |
@@ -0,0 +1,9 @@ | |||
dataset | |||
# cache | |||
__pycache__ | |||
# results | |||
results | |||
# logs | |||
logs |
@@ -0,0 +1,14 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="PublishConfigData"> | |||
<serverData> | |||
<paths name="root@10.5.24.134:10000"> | |||
<serverdata> | |||
<mappings> | |||
<mapping local="$PROJECT_DIR$" web="/" /> | |||
</mappings> | |||
</serverdata> | |||
</paths> | |||
</serverData> | |||
</component> | |||
</project> |
@@ -0,0 +1,6 @@ | |||
<component name="InspectionProjectProfileManager"> | |||
<settings> | |||
<option name="USE_PROJECT_PROFILE" value="false" /> | |||
<version value="1.0" /> | |||
</settings> | |||
</component> |
@@ -0,0 +1,7 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="JavaScriptSettings"> | |||
<option name="languageLevel" value="ES6" /> | |||
</component> | |||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" /> | |||
</project> |
@@ -0,0 +1,8 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="ProjectModuleManager"> | |||
<modules> | |||
<module fileurl="file://$PROJECT_DIR$/.idea/sfa3d.iml" filepath="$PROJECT_DIR$/.idea/sfa3d.iml" /> | |||
</modules> | |||
</component> | |||
</project> |
@@ -0,0 +1,12 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<module type="PYTHON_MODULE" version="4"> | |||
<component name="NewModuleRootManager"> | |||
<content url="file://$MODULE_DIR$" /> | |||
<orderEntry type="inheritedJdk" /> | |||
<orderEntry type="sourceFolder" forTests="false" /> | |||
</component> | |||
<component name="PyDocumentationSettings"> | |||
<option name="format" value="PLAIN" /> | |||
<option name="myDocStringFormat" value="Plain" /> | |||
</component> | |||
</module> |
@@ -0,0 +1,6 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="VcsDirectoryMappings"> | |||
<mapping directory="$PROJECT_DIR$" vcs="Git" /> | |||
</component> | |||
</project> |
@@ -0,0 +1,49 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="ChangeListManager"> | |||
<list default="true" id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" /> | |||
<option name="SHOW_DIALOG" value="false" /> | |||
<option name="HIGHLIGHT_CONFLICTS" value="true" /> | |||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> | |||
<option name="LAST_RESOLUTION" value="IGNORE" /> | |||
</component> | |||
<component name="Git.Settings"> | |||
<option name="RECENT_BRANCH_BY_REPOSITORY"> | |||
<map> | |||
<entry key="$PROJECT_DIR$" value="master" /> | |||
</map> | |||
</option> | |||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> | |||
</component> | |||
<component name="ProjectId" id="2E4AHz6idZOBGHdHApv98dU5PkK" /> | |||
<component name="ProjectViewState"> | |||
<option name="hideEmptyMiddlePackages" value="true" /> | |||
<option name="showLibraryContents" value="true" /> | |||
</component> | |||
<component name="PropertiesComponent"> | |||
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" /> | |||
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" /> | |||
</component> | |||
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" /> | |||
<component name="TaskManager"> | |||
<task active="true" id="Default" summary="Default task"> | |||
<changelist id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" /> | |||
<created>1661844398596</created> | |||
<option name="number" value="Default" /> | |||
<option name="presentableId" value="Default" /> | |||
<updated>1661844398596</updated> | |||
</task> | |||
<servers /> | |||
</component> | |||
<component name="Vcs.Log.Tabs.Properties"> | |||
<option name="TAB_STATES"> | |||
<map> | |||
<entry key="MAIN"> | |||
<value> | |||
<State /> | |||
</value> | |||
</entry> | |||
</map> | |||
</option> | |||
</component> | |||
</project> |
@@ -0,0 +1,21 @@ | |||
MIT License | |||
Copyright (c) 2020 Nguyen Mau Dung | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. |
@@ -0,0 +1,116 @@ | |||
# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds | |||
[![python-image]][python-url] | |||
[![pytorch-image]][pytorch-url] | |||
--- | |||
## 1. Getting Started | |||
### 1.1 Requirement | |||
The instructions for setting up a virtual environment is [here](https://github.com/maudzung/virtual_environment_python3). | |||
```shell script | |||
cd SFA3D/ | |||
pip install -r requirements.txt | |||
``` | |||
### 1.2 Data Preparation | |||
Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). | |||
The downloaded data includes: | |||
- Velodyne point clouds _**(29 GB)**_ | |||
- Training labels of object data set _**(5 MB)**_ | |||
Please make sure that you construct the source code & dataset directories structure as below. | |||
## 2. How to run | |||
### 2.1 Inference | |||
The pre-trained model was pushed to this repo. | |||
- **CPU** | |||
``` | |||
python inference.py --no_cuda=True | |||
``` | |||
- **GPU** | |||
``` | |||
python inference.py | |||
``` | |||
Label of inference | |||
- Pedestrian | |||
- Car | |||
- Cyclist | |||
### 2.2 Training | |||
#### 2.2.1 CPU | |||
``` | |||
python train.py --no_cuda=True | |||
``` | |||
#### 2.2.2 Single machine, single gpu | |||
```shell script | |||
python train.py --gpu_idx 0 | |||
``` | |||
#### 2.2.3 Distributed Data Parallel Training | |||
- **Single machine (node), multiple GPUs** | |||
``` | |||
python train.py --multiprocessing-distributed --world-size 1 --rank 0 --batch_size 64 --num_workers 8 | |||
``` | |||
- **Two machines (two nodes), multiple GPUs** | |||
- _**First machine**_ | |||
``` | |||
python train.py --dist-url 'tcp://IP_OF_NODE1:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 0 --batch_size 64 --num_workers 8 | |||
``` | |||
- _**Second machine**_ | |||
``` | |||
python train.py --dist-url 'tcp://IP_OF_NODE2:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 1 --batch_size 64 --num_workers 8 | |||
``` | |||
## References | |||
[1] SFA3D: [PyTorch Implementation](https://github.com/maudzung/SFA3D) | |||
## Folder structure | |||
### Dataset | |||
``` | |||
└── kitti/ | |||
├── image_2/ (left color camera,非必须) | |||
├── calib/ (非必须) | |||
├── label_2/ (标注结果/标签,非必须) | |||
└── velodyne/ (点云文件,必须) | |||
``` | |||
### Checkpoints & Algorithm | |||
``` | |||
${ROOT} | |||
└── checkpoints/ | |||
├── fpn_resnet_18/ | |||
├── fpn_resnet_18_epoch_300.pth (点云目标检测标注模型) | |||
└── sfa/ (点云标注算法) | |||
├── config/ | |||
├── data_process/ | |||
├── models/ | |||
├── utils/ | |||
├── inference.py | |||
└── train.py | |||
├── README.md | |||
├── LICENSE | |||
└── requirements.txt | |||
``` | |||
[python-image]: https://img.shields.io/badge/Python-3.6-ff69b4.svg | |||
[python-url]: https://www.python.org/ | |||
[pytorch-image]: https://img.shields.io/badge/PyTorch-1.5-2BAF2B.svg | |||
[pytorch-url]: https://pytorch.org/ |
@@ -0,0 +1,55 @@ | |||
# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds | |||
--- | |||
Technical details of the implementation | |||
## 1. Network architecture | |||
- The **ResNet-based Keypoint Feature Pyramid Network** (KFPN) that was proposed in [RTM3D paper](https://arxiv.org/pdf/2001.03343.pdf). | |||
The unofficial implementation of the RTM3D paper by using PyTorch is [here](https://github.com/maudzung/RTM3D) | |||
- **Input**: | |||
- The model takes a birds-eye-view (BEV) map as input. | |||
- The BEV map is encoded by height, intensity, and density of 3D LiDAR point clouds. Assume that the size of the BEV input is `(H, W, 3)`. | |||
- **Outputs**: | |||
- Heatmap for main center with a size of `(H/S, W/S, C)` where `S=4` _(the down-sample ratio)_, and `C=3` _(the number of classes)_ | |||
- Center offset: `(H/S, W/S, 2)` | |||
- The heading angle _(yaw)_: `(H/S, W/S, 2)`. The model estimates the **im**aginary and the **re**al fraction (`sin(yaw)` and `cos(yaw)` values). | |||
- Dimension _(h, w, l)_: `(H/S, W/S, 3)` | |||
- `z` coordinate: `(H/S, W/S, 1)` | |||
- **Targets**: **7 degrees of freedom** _(7-DOF)_ of objects: `(cx, cy, cz, l, w, h, θ)` | |||
- `cx, cy, cz`: The center coordinates. | |||
- `l, w, h`: length, width, height of the bounding box. | |||
- `θ`: The heading angle in radians of the bounding box. | |||
- **Objects**: Cars, Pedestrians, Cyclists. | |||
## 2. Losses function | |||
- For main center heatmap: Used `focal loss` | |||
- For heading angle _(yaw)_: The `im` and `re` fractions are directly regressed by using `l1_loss` | |||
- For `z coordinate` and `3 dimensions` (height, width, length), I used `balanced l1 loss` that was proposed by the paper | |||
[Libra R-CNN: Towards Balanced Learning for Object Detection](https://arxiv.org/pdf/1904.02701.pdf) | |||
## 3. Training in details | |||
- Set uniform weights to the above components of losses. (`=1.0` for all) | |||
- Number of epochs: 300. | |||
- Learning rate scheduler: [`cosine`](https://arxiv.org/pdf/1812.01187.pdf), initial learning rate: 0.001. | |||
- Batch size: `16` (on a single GTX 1080Ti). | |||
## 4. Inference | |||
- A `3 × 3` max-pooling operation was applied on the center heat map, then only `50` predictions whose | |||
center confidences are larger than 0.2 were kept. | |||
- The heading angle _(yaw)_ = `arctan`(_imaginary fraction_ / _real fraction_) | |||
## 5. How to expand the work | |||
- The model could be trained with more classes and with a larger detected area by modifying configurations in | |||
the [config/kitti_dataset.py](https://github.com/maudzung/Super-Fast-Accurate-3D-Object-Detection/blob/master/src/config/kitti_config.py) file. |
@@ -0,0 +1,41 @@ | |||
absl-py==1.1.0 | |||
cachetools==4.2.4 | |||
certifi==2022.6.15 | |||
charset-normalizer==2.0.12 | |||
cycler==0.11.0 | |||
easydict==1.9 | |||
future==0.18.2 | |||
google-auth==1.35.0 | |||
google-auth-oauthlib==0.4.6 | |||
grpcio==1.46.3 | |||
idna==3.3 | |||
importlib-metadata==4.11.4 | |||
joblib==1.1.0 | |||
kiwisolver==1.4.3 | |||
Markdown==3.3.7 | |||
matplotlib==3.3.3 | |||
numpy==1.18.3 | |||
oauthlib==3.2.0 | |||
opencv-python==4.2.0.34 | |||
Pillow==8.4.0 | |||
protobuf==3.19.1 | |||
pyasn1==0.4.8 | |||
pyasn1-modules==0.2.8 | |||
pyparsing==3.0.9 | |||
python-dateutil==2.8.2 | |||
requests==2.28.0 | |||
requests-oauthlib==1.3.1 | |||
rsa==4.8 | |||
scikit-learn==0.22.2 | |||
scipy==1.8.1 | |||
six==1.16.0 | |||
tensorboard==2.2.1 | |||
tensorboard-plugin-wit==1.8.1 | |||
torch==1.5.0 | |||
torchsummary==1.5.1 | |||
torchvision==0.6.0 | |||
tqdm==4.54.0 | |||
urllib3==1.26.9 | |||
Werkzeug==2.1.2 | |||
wget==3.2 | |||
zipp==3.8.0 |
@@ -0,0 +1,99 @@ | |||
import math | |||
import numpy as np | |||
# Car and Van ==> Car class | |||
# Pedestrian and Person_Sitting ==> Pedestrian Class | |||
# for train | |||
CLASS_NAME_TO_ID = { | |||
'Pedestrian': 0, | |||
'Car': 1, | |||
'Cyclist': 2, | |||
'Van': 1, | |||
'Truck': -3, | |||
'Person_sitting': 0, | |||
'Tram': -99, | |||
'Misc': -99, | |||
'TraffiCone': -1, | |||
'DontCare': -1 | |||
} | |||
# for test | |||
CLASS_ID_TO_NAME = { | |||
0: 'Pedestrian', # Person_sitting in the same class | |||
1: 'Car', # Van in the same class | |||
2: 'Cyclist' | |||
} | |||
colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0], [255, 120, 0], | |||
[255, 120, 120], [0, 120, 0], [120, 255, 255], [120, 0, 255]] | |||
##################################################################################### | |||
boundary = { | |||
"minX": -50, | |||
"maxX": 50, | |||
"minY": -25, | |||
"maxY": 25, | |||
"minZ": -2.73, | |||
"maxZ": 1.27 | |||
} | |||
bound_size_x = boundary['maxX'] - boundary['minX'] | |||
bound_size_y = boundary['maxY'] - boundary['minY'] | |||
bound_size_z = boundary['maxZ'] - boundary['minZ'] | |||
boundary_back = { | |||
"minX": -50, | |||
"maxX": 0, | |||
"minY": -25, | |||
"maxY": 25, | |||
"minZ": -2.73, | |||
"maxZ": 1.27 | |||
} | |||
BEV_WIDTH = 608 # across y axis -25m ~ 25m | |||
BEV_HEIGHT = 1216 # across x axis 0m ~ 50m | |||
DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT | |||
DISCRETIZATION_Y = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT | |||
DISCRETIZATION_X = (boundary["maxY"] - boundary["minY"]) / BEV_WIDTH | |||
# maximum number of points per voxel | |||
T = 35 | |||
# voxel size | |||
vd = 0.1 # z | |||
vh = 0.05 # y | |||
vw = 0.05 # x | |||
# voxel grid | |||
W = math.ceil(bound_size_x / vw) | |||
H = math.ceil(bound_size_y / vh) | |||
D = math.ceil(bound_size_z / vd) | |||
# Following parameters are calculated as an average from KITTI dataset for simplicity | |||
##################################################################################### | |||
Tr_velo_to_cam = np.array([ | |||
[7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03], | |||
[1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02], | |||
[9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01], | |||
[0, 0, 0, 1] | |||
]) | |||
# cal mean from train set | |||
R0 = np.array([ | |||
[0.99992475, 0.00975976, -0.00734152, 0], | |||
[-0.0097913, 0.99994262, -0.00430371, 0], | |||
[0.00729911, 0.0043753, 0.99996319, 0], | |||
[0, 0, 0, 1] | |||
]) | |||
P2 = np.array([[719.787081, 0., 608.463003, 44.9538775], | |||
[0., 719.787081, 174.545111, 0.1066855], | |||
[0., 0., 1., 3.0106472e-03], | |||
[0., 0., 0., 0] | |||
]) | |||
R0_inv = np.linalg.inv(R0) | |||
Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam) | |||
P2_inv = np.linalg.pinv(P2) | |||
##################################################################################### |
@@ -0,0 +1,172 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: The configurations of the project will be defined here | |||
""" | |||
import os | |||
import argparse | |||
import torch | |||
from easydict import EasyDict as edict | |||
def parse_train_configs(): | |||
parser = argparse.ArgumentParser(description='The Implementation using PyTorch') | |||
parser.add_argument('--seed', type=int, default=2020, | |||
help='re-produce the results with seed random') | |||
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||
help='The name using for saving logs, models,...') | |||
parser.add_argument('--root_dir', type=str, default='../', metavar='PATH', | |||
help='The ROOT working directory') | |||
#################################################################### | |||
############## Model configs ######################## | |||
#################################################################### | |||
parser.add_argument('--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||
help='The name of the model architecture') | |||
parser.add_argument('--model_load_dir', type=str, default=None, metavar='PATH', | |||
help='the path of the pretrained checkpoint') | |||
#################################################################### | |||
############## Dataloader and Running configs ####### | |||
#################################################################### | |||
parser.add_argument('--data_url', type=str, default='../dataset/apollo/training', metavar='PATH', | |||
help='the path of the dataset') | |||
parser.add_argument('--val_data_url', type=str, default='../dataset/apollo/val', metavar='PATH', | |||
help='the path of the dataset') | |||
parser.add_argument('--train_model_out', type=str, default='../checkpoints', metavar='PATH', | |||
help='the path of the model output') | |||
parser.add_argument('--train_out', type=str, default='../logs', metavar='PATH', | |||
help='the path of the logs output') | |||
parser.add_argument('--hflip_prob', type=float, default=0.5, | |||
help='The probability of horizontal flip') | |||
parser.add_argument('--no-val', action='store_true', | |||
help='If true, dont evaluate the model on the val set') | |||
parser.add_argument('--num_samples', type=int, default=None, | |||
help='Take a subset of the dataset to run and debug') | |||
parser.add_argument('--num_workers', type=int, default=4, | |||
help='Number of threads for loading data') | |||
parser.add_argument('--batch_size', type=int, default=8, | |||
help='mini-batch size (default: 16), this is the total' | |||
'batch size of all GPUs on the current node when using' | |||
'Data Parallel or Distributed Data Parallel') | |||
parser.add_argument('--print_freq', type=int, default=50, metavar='N', | |||
help='print frequency (default: 50)') | |||
parser.add_argument('--tensorboard_freq', type=int, default=50, metavar='N', | |||
help='frequency of saving tensorboard (default: 50)') | |||
parser.add_argument('--checkpoint_freq', type=int, default=2, metavar='N', | |||
help='frequency of saving checkpoints (default: 5)') | |||
parser.add_argument('--gpu_num_per_node', type=int, default=1, | |||
help='Number of GPU') | |||
#################################################################### | |||
############## Training strategy #################### | |||
#################################################################### | |||
parser.add_argument('--start_epoch', type=int, default=1, metavar='N', | |||
help='the starting epoch') | |||
parser.add_argument('--num_epochs', type=int, default=300, metavar='N', | |||
help='number of total epochs to run') | |||
parser.add_argument('--lr_type', type=str, default='cosin', | |||
help='the type of learning rate scheduler (cosin or multi_step or one_cycle)') | |||
parser.add_argument('--lr', type=float, default=0.001, metavar='LR', | |||
help='initial learning rate') | |||
parser.add_argument('--minimum_lr', type=float, default=1e-7, metavar='MIN_LR', | |||
help='minimum learning rate during training') | |||
parser.add_argument('--momentum', type=float, default=0.949, metavar='M', | |||
help='momentum') | |||
parser.add_argument('-wd', '--weight_decay', type=float, default=0., metavar='WD', | |||
help='weight decay (default: 0.)') | |||
parser.add_argument('--optimizer_type', type=str, default='adam', metavar='OPTIMIZER', | |||
help='the type of optimizer, it can be sgd or adam') | |||
parser.add_argument('--steps', nargs='*', default=[150, 180], | |||
help='number of burn in step') | |||
#################################################################### | |||
############## Loss weight ########################## | |||
#################################################################### | |||
#################################################################### | |||
############## Distributed Data Parallel ############ | |||
#################################################################### | |||
parser.add_argument('--world-size', default=-1, type=int, metavar='N', | |||
help='number of nodes for distributed training') | |||
parser.add_argument('--rank', default=-1, type=int, metavar='N', | |||
help='node rank for distributed training') | |||
parser.add_argument('--dist-url', default='tcp://127.0.0.1:29500', type=str, | |||
help='url used to set up distributed training') | |||
parser.add_argument('--dist-backend', default='nccl', type=str, | |||
help='distributed backend') | |||
parser.add_argument('--gpu_idx', default=0, type=int, | |||
help='GPU index to use.') | |||
parser.add_argument('--no_cuda', default= False, | |||
help='If true, cuda is not used.') | |||
parser.add_argument('--multiprocessing-distributed', action='store_true', | |||
help='Use multi-processing distributed training to launch ' | |||
'N processes per node, which has N GPUs. This is the ' | |||
'fastest way to use PyTorch for either single node or ' | |||
'multi node data parallel training') | |||
#################################################################### | |||
############## Evaluation configurations ################### | |||
#################################################################### | |||
parser.add_argument('--evaluate', action='store_true', | |||
help='only evaluate the model, not training') | |||
parser.add_argument('--resume_path', type=str, default=None, metavar='PATH', | |||
help='the path of the resumed checkpoint') | |||
parser.add_argument('--K', type=int, default=50, | |||
help='the number of top K') | |||
configs = edict(vars(parser.parse_args())) | |||
#################################################################### | |||
############## Hardware configurations ############################# | |||
#################################################################### | |||
# configs.device = torch.device('cpu' if configs.no_cuda else 'cuda') | |||
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) | |||
configs.ngpus_per_node = torch.cuda.device_count() | |||
configs.pin_memory = True | |||
configs.input_size = (1216, 608) | |||
configs.hm_size = (304, 152) | |||
configs.down_ratio = 4 | |||
configs.max_objects = 50 | |||
configs.imagenet_pretrained = True | |||
configs.head_conv = 64 | |||
configs.num_classes = 3 | |||
configs.num_center_offset = 2 | |||
configs.num_z = 1 | |||
configs.num_dim = 3 | |||
configs.num_direction = 2 # sin, cos | |||
configs.heads = { | |||
'hm_cen': configs.num_classes, | |||
'cen_offset': configs.num_center_offset, | |||
'direction': configs.num_direction, | |||
'z_coor': configs.num_z, | |||
'dim': configs.num_dim | |||
} | |||
configs.num_input_features = 4 | |||
#################################################################### | |||
############## Dataset, logs, Checkpoints dir ###################### | |||
#################################################################### | |||
configs.dataset = 'apollo' # or kitti | |||
configs.dataset_dir = configs.data_url | |||
# configs.checkpoints_dir = os.path.join(configs.train_model_out, configs.saved_fn) | |||
configs.checkpoints_dir = configs.train_model_out | |||
# configs.logs_dir = os.path.join(configs.train_out, configs.saved_fn) | |||
configs.logs_dir = configs.train_out | |||
configs.pretrained_path = configs.model_load_dir | |||
if not os.path.isdir(configs.checkpoints_dir): | |||
os.makedirs(configs.checkpoints_dir) | |||
if not os.path.isdir(configs.logs_dir): | |||
os.makedirs(configs.logs_dir) | |||
return configs |
@@ -0,0 +1,99 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: This script for the KITTI dataset | |||
""" | |||
import sys | |||
import os | |||
from builtins import int | |||
from glob import glob | |||
import numpy as np | |||
from torch.utils.data import Dataset | |||
import cv2 | |||
import torch | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from data_process.kitti_data_utils import get_filtered_lidar | |||
from data_process.kitti_bev_utils import makeBEVMap | |||
import config.kitti_config as cnf | |||
class Demo_KittiDataset(Dataset): | |||
def __init__(self, configs): | |||
self.dataset_dir = os.path.join(configs.dataset_dir, configs.foldername, configs.foldername[:10], | |||
configs.foldername) | |||
self.input_size = configs.input_size | |||
self.hm_size = configs.hm_size | |||
self.num_classes = configs.num_classes | |||
self.max_objects = configs.max_objects | |||
self.image_dir = os.path.join(self.dataset_dir, "image_02", "data") | |||
self.lidar_dir = os.path.join(self.dataset_dir, "velodyne_points", "data") | |||
self.label_dir = os.path.join(self.dataset_dir, "label_2", "data") | |||
self.sample_id_list = sorted(glob(os.path.join(self.lidar_dir, '*.bin'))) | |||
self.sample_id_list = [float(os.path.basename(fn)[:-4]) for fn in self.sample_id_list] | |||
self.num_samples = len(self.sample_id_list) | |||
def __len__(self): | |||
return len(self.sample_id_list) | |||
def __getitem__(self, index): | |||
pass | |||
def load_bevmap_front(self, index): | |||
"""Load only image for the testing phase""" | |||
sample_id = int(self.sample_id_list[index]) | |||
img_path, img_rgb = self.get_image(sample_id) | |||
lidarData = self.get_lidar(sample_id) | |||
front_lidar = get_filtered_lidar(lidarData, cnf.boundary) | |||
front_bevmap = makeBEVMap(front_lidar, cnf.boundary) | |||
front_bevmap = torch.from_numpy(front_bevmap) | |||
metadatas = { | |||
'img_path': img_path, | |||
} | |||
return metadatas, front_bevmap, img_rgb | |||
def load_bevmap_front_vs_back(self, index): | |||
"""Load only image for the testing phase""" | |||
sample_id = int(self.sample_id_list[index]) | |||
img_path, img_rgb = self.get_image(sample_id) | |||
lidarData = self.get_lidar(sample_id) | |||
front_lidar = get_filtered_lidar(lidarData, cnf.boundary) | |||
front_bevmap = makeBEVMap(front_lidar, cnf.boundary) | |||
front_bevmap = torch.from_numpy(front_bevmap) | |||
back_lidar = get_filtered_lidar(lidarData, cnf.boundary_back) | |||
back_bevmap = makeBEVMap(back_lidar, cnf.boundary_back) | |||
back_bevmap = torch.from_numpy(back_bevmap) | |||
metadatas = { | |||
'img_path': img_path, | |||
} | |||
return metadatas, front_bevmap, back_bevmap, img_rgb | |||
def get_image(self, idx): | |||
img_path = os.path.join(self.image_dir, '{:010d}.png'.format(idx)) | |||
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) | |||
return img_path, img | |||
def get_lidar(self, idx): | |||
lidar_file = os.path.join(self.lidar_dir, '{:010d}.bin'.format(idx)) | |||
# assert os.path.isfile(lidar_file) | |||
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) |
@@ -0,0 +1,98 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
""" | |||
import math | |||
import os | |||
import sys | |||
import cv2 | |||
import numpy as np | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
import config.kitti_config as cnf | |||
def makeBEVMap(PointCloud_, boundary): | |||
Height = cnf.BEV_HEIGHT + 1 | |||
Width = cnf.BEV_WIDTH + 1 | |||
# Discretize Feature Map | |||
PointCloud = np.copy(PointCloud_) | |||
# PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION)) | |||
# PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION) + Width / 2) | |||
# 针对Apollo数据集,检测360° | |||
PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION_Y) + Height / 2) | |||
PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION_X) + Width / 2) | |||
# sort-3times | |||
indices = np.lexsort((-PointCloud[:, 2], PointCloud[:, 1], PointCloud[:, 0])) | |||
PointCloud = PointCloud[indices] | |||
# Height Map | |||
heightMap = np.zeros((Height, Width)) | |||
_, indices = np.unique(PointCloud[:, 0:2], axis=0, return_index=True) | |||
PointCloud_frac = PointCloud[indices] | |||
# some important problem is image coordinate is (y,x), not (x,y) | |||
max_height = float(np.abs(boundary['maxZ'] - boundary['minZ'])) | |||
heightMap[np.int_(PointCloud_frac[:, 0]), np.int_(PointCloud_frac[:, 1])] = PointCloud_frac[:, 2] / max_height #(1217,609) | |||
# Intensity Map & DensityMap | |||
intensityMap = np.zeros((Height, Width)) | |||
densityMap = np.zeros((Height, Width)) | |||
_, indices, counts = np.unique(PointCloud[:, 0:2], axis=0, return_index=True, return_counts=True) | |||
PointCloud_top = PointCloud[indices] | |||
normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64)) | |||
intensityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = PointCloud_top[:, 3] / 255.0 # hesai40p的反射强度0~255 | |||
densityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = normalizedCounts | |||
RGB_Map = np.zeros((3, Height - 1, Width - 1)) | |||
RGB_Map[2, :, :] = densityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # r_map | |||
RGB_Map[1, :, :] = heightMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # g_map | |||
RGB_Map[0, :, :] = intensityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # b_map | |||
return RGB_Map | |||
# bev image coordinates format | |||
def get_corners(x, y, w, l, yaw): | |||
bev_corners = np.zeros((4, 2), dtype=np.float32) | |||
cos_yaw = np.cos(yaw) | |||
sin_yaw = np.sin(yaw) | |||
# front left | |||
bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw | |||
bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw | |||
# rear left | |||
bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw | |||
bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw | |||
# rear right | |||
bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw | |||
bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw | |||
# front right | |||
bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw | |||
bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw | |||
return bev_corners | |||
def drawRotatedBox(img, x, y, w, l, yaw, color): | |||
img_cp = img.copy() | |||
bev_corners = get_corners(x, y, w, l, yaw) | |||
corners_int = bev_corners.reshape(-1, 1, 2).astype(int) | |||
cv2.polylines(img, [corners_int], True, color, 2) | |||
corners_int = bev_corners.reshape(-1, 2) | |||
cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2) | |||
# return img_cp |
@@ -0,0 +1,324 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: The utils of the kitti dataset | |||
""" | |||
from __future__ import print_function | |||
import os | |||
import sys | |||
import numpy as np | |||
import cv2 | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
import config.kitti_config as cnf | |||
class Object3d(object): | |||
''' 3d object label ''' | |||
def __init__(self, label_file_line): | |||
data = label_file_line.split(' ') | |||
data[1:] = [float(x) for x in data[1:]] | |||
# extract label, truncation, occlusion | |||
self.type = data[0] # 'Car', 'Pedestrian', ... | |||
self.cls_id = self.cls_type_to_id(self.type) | |||
self.truncation = data[1] # truncated pixel ratio [0..1] | |||
self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown | |||
self.alpha = data[3] # object observation angle [-pi..pi] | |||
# extract 2d bounding box in 0-based coordinates | |||
self.xmin = data[4] # left | |||
self.ymin = data[5] # top | |||
self.xmax = data[6] # right | |||
self.ymax = data[7] # bottom | |||
self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) | |||
# extract 3d bounding box information | |||
self.h = data[8] # box height | |||
self.w = data[9] # box width | |||
self.l = data[10] # box length (in meters) | |||
self.t = (data[11], data[12], data[13]) # location (x,y,z) in camera coord. | |||
self.dis_to_cam = np.linalg.norm(self.t) | |||
self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi] | |||
self.score = data[15] if data.__len__() == 16 else -1.0 | |||
self.level_str = None | |||
self.level = self.get_obj_level() | |||
def cls_type_to_id(self, cls_type): | |||
if cls_type not in cnf.CLASS_NAME_TO_ID.keys(): | |||
return -1 | |||
return cnf.CLASS_NAME_TO_ID[cls_type] | |||
def get_obj_level(self): | |||
height = float(self.box2d[3]) - float(self.box2d[1]) + 1 | |||
if height >= 40 and self.truncation <= 0.15 and self.occlusion <= 0: | |||
self.level_str = 'Easy' | |||
return 1 # Easy | |||
elif height >= 25 and self.truncation <= 0.3 and self.occlusion <= 1: | |||
self.level_str = 'Moderate' | |||
return 2 # Moderate | |||
elif height >= 25 and self.truncation <= 0.5 and self.occlusion <= 2: | |||
self.level_str = 'Hard' | |||
return 3 # Hard | |||
else: | |||
self.level_str = 'UnKnown' | |||
return 4 | |||
def print_object(self): | |||
print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \ | |||
(self.type, self.truncation, self.occlusion, self.alpha)) | |||
print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \ | |||
(self.xmin, self.ymin, self.xmax, self.ymax)) | |||
print('3d bbox h,w,l: %f, %f, %f' % \ | |||
(self.h, self.w, self.l)) | |||
print('3d bbox location, ry: (%f, %f, %f), %f' % \ | |||
(self.t[0], self.t[1], self.t[2], self.ry)) | |||
def to_kitti_format(self): | |||
kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \ | |||
% (self.type, self.truncation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1], | |||
self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.t[0], self.t[1], self.t[2], | |||
self.ry, self.score) | |||
return kitti_str | |||
def read_label(label_filename): | |||
lines = [line.rstrip() for line in open(label_filename)] | |||
objects = [Object3d(line) for line in lines] | |||
return objects | |||
class Calibration(object): | |||
''' Calibration matrices and utils | |||
3d XYZ in <label>.txt are in rect camera coord. | |||
2d box xy are in image2 coord | |||
Points in <lidar>.bin are in Velodyne coord. | |||
y_image2 = P^2_rect * x_rect | |||
y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo | |||
x_ref = Tr_velo_to_cam * x_velo | |||
x_rect = R0_rect * x_ref | |||
P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x; | |||
0, f^2_v, c^2_v, -f^2_v b^2_y; | |||
0, 0, 1, 0] | |||
= K * [1|t] | |||
image2 coord: | |||
----> x-axis (u) | |||
| | |||
| | |||
v y-axis (v) | |||
velodyne coord: | |||
front x, left y, up z | |||
rect/ref camera coord: | |||
right x, down y, front z | |||
Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf | |||
TODO(rqi): do matrix multiplication only once for each projection. | |||
''' | |||
def __init__(self, calib_filepath): | |||
calibs = self.read_calib_file(calib_filepath) | |||
# Projection matrix from rect camera coord to image2 coord | |||
self.P2 = calibs['P2'] | |||
self.P2 = np.reshape(self.P2, [3, 4]) | |||
self.P3 = calibs['P3'] | |||
self.P3 = np.reshape(self.P3, [3, 4]) | |||
# Rigid transform from Velodyne coord to reference camera coord | |||
self.V2C = calibs['Tr_velo2cam'] | |||
self.V2C = np.reshape(self.V2C, [3, 4]) | |||
# Rotation from reference camera coord to rect camera coord | |||
self.R0 = calibs['R_rect'] | |||
self.R0 = np.reshape(self.R0, [3, 3]) | |||
# Camera intrinsics and extrinsics | |||
self.c_u = self.P2[0, 2] | |||
self.c_v = self.P2[1, 2] | |||
self.f_u = self.P2[0, 0] | |||
self.f_v = self.P2[1, 1] | |||
self.b_x = self.P2[0, 3] / (-self.f_u) # relative | |||
self.b_y = self.P2[1, 3] / (-self.f_v) | |||
def read_calib_file(self, filepath): | |||
with open(filepath) as f: | |||
lines = f.readlines() | |||
obj = lines[2].strip().split(' ')[1:] | |||
P2 = np.array(obj, dtype=np.float32) | |||
obj = lines[3].strip().split(' ')[1:] | |||
P3 = np.array(obj, dtype=np.float32) | |||
obj = lines[4].strip().split(' ')[1:] | |||
R0 = np.array(obj, dtype=np.float32) | |||
obj = lines[5].strip().split(' ')[1:] | |||
Tr_velo_to_cam = np.array(obj, dtype=np.float32) | |||
return {'P2': P2.reshape(3, 4), | |||
'P3': P3.reshape(3, 4), | |||
'R_rect': R0.reshape(3, 3), | |||
'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} | |||
def cart2hom(self, pts_3d): | |||
""" | |||
:param pts: (N, 3 or 2) | |||
:return pts_hom: (N, 4 or 3) | |||
""" | |||
pts_hom = np.hstack((pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32))) | |||
return pts_hom | |||
def compute_radius(det_size, min_overlap=0.7): | |||
height, width = det_size | |||
a1 = 1 | |||
b1 = (height + width) | |||
c1 = width * height * (1 - min_overlap) / (1 + min_overlap) | |||
sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) | |||
r1 = (b1 + sq1) / 2 | |||
a2 = 4 | |||
b2 = 2 * (height + width) | |||
c2 = (1 - min_overlap) * width * height | |||
sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) | |||
r2 = (b2 + sq2) / 2 | |||
a3 = 4 * min_overlap | |||
b3 = -2 * min_overlap * (height + width) | |||
c3 = (min_overlap - 1) * width * height | |||
sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) | |||
r3 = (b3 + sq3) / 2 | |||
return min(r1, r2, r3) | |||
def gaussian2D(shape, sigma=1): | |||
m, n = [(ss - 1.) / 2. for ss in shape] | |||
y, x = np.ogrid[-m:m + 1, -n:n + 1] | |||
h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) | |||
h[h < np.finfo(h.dtype).eps * h.max()] = 0 | |||
return h | |||
def gen_hm_radius(heatmap, center, radius, k=1): | |||
diameter = 2 * radius + 1 | |||
gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) | |||
x, y = int(center[0]), int(center[1]) | |||
height, width = heatmap.shape[0:2] | |||
left, right = min(x, radius), min(width - x, radius + 1) | |||
top, bottom = min(y, radius), min(height - y, radius + 1) | |||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] | |||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] | |||
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug | |||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) | |||
return heatmap | |||
def get_filtered_lidar(lidar, boundary, labels=None): | |||
minX = boundary['minX'] | |||
maxX = boundary['maxX'] | |||
minY = boundary['minY'] | |||
maxY = boundary['maxY'] | |||
minZ = boundary['minZ'] | |||
maxZ = boundary['maxZ'] | |||
# Remove the point out of range x,y,z | |||
mask = np.where((lidar[:, 0] >= minX) & (lidar[:, 0] <= maxX) & | |||
(lidar[:, 1] >= minY) & (lidar[:, 1] <= maxY) & | |||
(lidar[:, 2] >= minZ) & (lidar[:, 2] <= maxZ)) | |||
lidar = lidar[mask] | |||
lidar[:, 2] = lidar[:, 2] - minZ | |||
if labels is not None: | |||
label_x = (labels[:, 1] >= minX) & (labels[:, 1] < maxX) | |||
label_y = (labels[:, 2] >= minY) & (labels[:, 2] < maxY) | |||
label_z = (labels[:, 3] >= minZ) & (labels[:, 3] < maxZ) | |||
mask_label = label_x & label_y & label_z | |||
labels = labels[mask_label] | |||
return lidar, labels | |||
else: | |||
return lidar | |||
def box3d_corners_to_center(box3d_corner): | |||
# (N, 8, 3) -> (N, 7) | |||
assert box3d_corner.ndim == 3 | |||
xyz = np.mean(box3d_corner, axis=1) | |||
h = abs(np.mean(box3d_corner[:, 4:, 2] - box3d_corner[:, :4, 2], axis=1, keepdims=True)) | |||
w = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 1, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
np.sqrt(np.sum((box3d_corner[:, 2, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 5, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
np.sqrt(np.sum((box3d_corner[:, 6, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 | |||
l = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
np.sqrt(np.sum((box3d_corner[:, 1, [0, 1]] - box3d_corner[:, 2, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
np.sqrt(np.sum((box3d_corner[:, 5, [0, 1]] - box3d_corner[:, 6, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 | |||
yaw = (np.arctan2(box3d_corner[:, 2, 1] - box3d_corner[:, 1, 1], | |||
box3d_corner[:, 2, 0] - box3d_corner[:, 1, 0]) + | |||
np.arctan2(box3d_corner[:, 3, 1] - box3d_corner[:, 0, 1], | |||
box3d_corner[:, 3, 0] - box3d_corner[:, 0, 0]) + | |||
np.arctan2(box3d_corner[:, 2, 0] - box3d_corner[:, 3, 0], | |||
box3d_corner[:, 3, 1] - box3d_corner[:, 2, 1]) + | |||
np.arctan2(box3d_corner[:, 1, 0] - box3d_corner[:, 0, 0], | |||
box3d_corner[:, 0, 1] - box3d_corner[:, 1, 1]))[:, np.newaxis] / 4 | |||
return np.concatenate([h, w, l, xyz, yaw], axis=1).reshape(-1, 7) | |||
def box3d_center_to_conners(box3d_center): | |||
h, w, l, x, y, z, yaw = box3d_center | |||
Box = np.array([[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], | |||
[w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], | |||
[0, 0, 0, 0, h, h, h, h]]) | |||
rotMat = np.array([ | |||
[np.cos(yaw), -np.sin(yaw), 0.0], | |||
[np.sin(yaw), np.cos(yaw), 0.0], | |||
[0.0, 0.0, 1.0]]) | |||
velo_box = np.dot(rotMat, Box) | |||
cornerPosInVelo = velo_box + np.tile(np.array([x, y, z]), (8, 1)).T | |||
box3d_corner = cornerPosInVelo.transpose() | |||
return box3d_corner.astype(np.float32) | |||
if __name__ == '__main__': | |||
heatmap = np.zeros((96, 320)) | |||
h, w = 40, 50 | |||
radius = compute_radius((h, w)) | |||
radius = max(0, int(radius)) | |||
print('h: {}, w: {}, radius: {}, sigma: {}'.format(h, w, radius, (2 * radius + 1) / 6.)) | |||
gen_hm_radius(heatmap, center=(200, 50), radius=radius) | |||
while True: | |||
cv2.imshow('heatmap', heatmap) | |||
if cv2.waitKey(0) & 0xff == 27: | |||
break | |||
max_pos = np.unravel_index(heatmap.argmax(), shape=heatmap.shape) | |||
print('max_pos: {}'.format(max_pos)) |
@@ -0,0 +1,67 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: This script for creating the dataloader for training/validation/test phase | |||
""" | |||
import os | |||
import sys | |||
import torch | |||
from torch.utils.data import DataLoader | |||
import numpy as np | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from data_process.kitti_dataset import KittiDataset | |||
from data_process.transformation import OneOf, Random_Rotation, Random_Scaling | |||
def create_train_dataloader(configs): | |||
"""Create dataloader for training""" | |||
train_lidar_aug = OneOf([ | |||
Random_Rotation(limit_angle=np.pi / 4, p=1.0), | |||
Random_Scaling(scaling_range=(0.95, 1.05), p=1.0), | |||
], p=0.66) | |||
train_dataset = KittiDataset(configs, mode='train', lidar_aug=train_lidar_aug, hflip_prob=configs.hflip_prob, | |||
num_samples=configs.num_samples) | |||
train_sampler = None | |||
if configs.distributed: | |||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) | |||
train_dataloader = DataLoader(train_dataset, batch_size=configs.batch_size, shuffle=(train_sampler is None), | |||
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=train_sampler) | |||
return train_dataloader, train_sampler | |||
def create_val_dataloader(configs): | |||
"""Create dataloader for validation""" | |||
val_sampler = None | |||
val_dataset = KittiDataset(configs, mode='val', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples) | |||
if configs.distributed: | |||
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False) | |||
val_dataloader = DataLoader(val_dataset, batch_size=configs.batch_size, shuffle=False, | |||
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=val_sampler) | |||
return val_dataloader | |||
def create_test_dataloader(configs): | |||
"""Create dataloader for testing phase""" | |||
test_dataset = KittiDataset(configs, mode='test', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples) | |||
test_sampler = None | |||
if configs.distributed: | |||
test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) | |||
test_dataloader = DataLoader(test_dataset, batch_size=configs.batch_size, shuffle=False, | |||
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=test_sampler) | |||
return test_dataloader |
@@ -0,0 +1,335 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: This script for the KITTI dataset | |||
""" | |||
import sys | |||
import os | |||
import math | |||
from builtins import int | |||
import numpy as np | |||
from torch.utils.data import Dataset | |||
import cv2 | |||
import torch | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from data_process.kitti_data_utils import gen_hm_radius, compute_radius, Calibration, get_filtered_lidar | |||
from data_process.kitti_bev_utils import makeBEVMap, drawRotatedBox, get_corners | |||
from data_process import transformation | |||
import config.kitti_config as cnf | |||
class KittiDataset(Dataset): | |||
def __init__(self, configs, mode='train', lidar_aug=None, hflip_prob=None, num_samples=None): | |||
self.dataset_dir = configs.dataset_dir | |||
self.input_size = configs.input_size | |||
self.hm_size = configs.hm_size | |||
self.num_classes = configs.num_classes | |||
self.max_objects = configs.max_objects | |||
assert mode in ['train', 'val', 'test'], 'Invalid mode: {}'.format(mode) | |||
self.mode = mode | |||
self.is_test = (self.mode == 'test') | |||
# sub_folder = 'testing' if self.is_test else 'training' | |||
self.lidar_aug = lidar_aug | |||
self.hflip_prob = hflip_prob | |||
if mode == 'val': | |||
self.val_data_url = configs.val_data_url | |||
self.lidar_dir = os.path.join(self.val_data_url, "velodyne") | |||
self.calib_dir = os.path.join(self.val_data_url, "calib") | |||
self.label_dir = os.path.join(self.val_data_url, "label_2") | |||
# self.image_dir = os.path.join(self.dataset_dir, sub_folder, "image_2") | |||
else: | |||
self.lidar_dir = os.path.join(self.dataset_dir, "velodyne") | |||
self.calib_dir = os.path.join(self.dataset_dir, "calib") | |||
self.label_dir = os.path.join(self.dataset_dir, "label_2") | |||
# split_txt_path = os.path.join('../dataset/apollo/', 'ImageSets', '{}.txt'.format(mode)) | |||
sample_list = [] | |||
sample_files = os.listdir(self.lidar_dir) | |||
for bin_file in sample_files: | |||
bin_name = bin_file.split('.')[0] | |||
sample_list.append(bin_name) | |||
self.sample_id_list = sample_list | |||
if num_samples is not None: | |||
self.sample_id_list = self.sample_id_list[:num_samples] | |||
self.num_samples = len(self.sample_id_list) | |||
def __len__(self): | |||
return len(self.sample_id_list) | |||
def __getitem__(self, index): | |||
if self.is_test: | |||
return self.load_img_only(index) | |||
else: | |||
return self.load_img_with_targets(index) | |||
def load_img_only(self, index): | |||
"""Load only image for the testing phase""" | |||
sample_id = self.sample_id_list[index] | |||
# print(sample_id) | |||
# img_path, img_rgb = self.get_image(sample_id) | |||
lidarData = self.get_lidar(sample_id) | |||
lidarData = get_filtered_lidar(lidarData, cnf.boundary) | |||
bev_map = makeBEVMap(lidarData, cnf.boundary) | |||
bev_map = torch.from_numpy(bev_map) | |||
bev_path = os.path.join(self.lidar_dir, '{}.png'.format(sample_id)) | |||
metadatas = { | |||
'bev_path': bev_path, | |||
} | |||
# return metadatas, bev_map, img_rgb | |||
return bev_map,metadatas | |||
def load_img_with_targets(self, index): | |||
"""Load images and targets for the training and validation phase""" | |||
sample_id = self.sample_id_list[index] | |||
# img_path = os.path.join(self.image_dir, '{}.png'.format(sample_id)) | |||
lidarData = self.get_lidar(sample_id) | |||
# calib = self.get_calib(sample_id) | |||
labels, has_labels = self.get_label(sample_id) | |||
# if has_labels: | |||
# labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||
if self.lidar_aug: | |||
lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:]) | |||
lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels) | |||
bev_map = makeBEVMap(lidarData, cnf.boundary) | |||
bev_map = torch.from_numpy(bev_map) | |||
hflipped = False | |||
if np.random.random() < self.hflip_prob: | |||
hflipped = True | |||
# C, H, W | |||
bev_map = torch.flip(bev_map, [-1]) | |||
targets = self.build_targets(labels, hflipped) | |||
# metadatas = { | |||
# 'img_path': img_path, | |||
# 'hflipped': hflipped | |||
# } | |||
# return metadatas, bev_map, targets | |||
return bev_map, targets | |||
def get_image(self, idx): | |||
img_path = os.path.join(self.image_dir, '{}.png'.format(idx)) | |||
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) | |||
return img_path, img | |||
def get_calib(self, idx): | |||
calib_file = os.path.join(self.calib_dir, '{}.txt'.format(idx)) | |||
# assert os.path.isfile(calib_file) | |||
return Calibration(calib_file) | |||
def get_lidar(self, idx): | |||
lidar_file = os.path.join(self.lidar_dir, '{}.bin'.format(idx)) | |||
# assert os.path.isfile(lidar_file) | |||
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) | |||
def get_label(self, idx): | |||
labels = [] | |||
label_path = os.path.join(self.label_dir, '{}.txt'.format(idx)) | |||
for line in open(label_path, 'r'): | |||
line = line.rstrip() | |||
line_parts = line.split(' ') | |||
obj_name = line_parts[0] # 'Car', 'Pedestrian', ... | |||
cat_id = int(cnf.CLASS_NAME_TO_ID[obj_name]) | |||
if cat_id <= -99: # ignore Tram and Misc | |||
continue | |||
truncated = int(float(line_parts[1])) # truncated pixel ratio [0..1] | |||
occluded = int(line_parts[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown | |||
alpha = float(line_parts[3]) # object observation angle [-pi..pi] | |||
# xmin, ymin, xmax, ymax | |||
# bbox = np.array([float(line_parts[4]), float(line_parts[5]), float(line_parts[6]), float(line_parts[7])]) | |||
# height, width, length (h, w, l) | |||
h, w, l = float(line_parts[8]), float(line_parts[9]), float(line_parts[10]) | |||
# location (x,y,z) in camera coord. | |||
x, y, z = float(line_parts[11]), float(line_parts[12]), float(line_parts[13]) | |||
ry = float(line_parts[14]) # yaw angle (around Y-axis in camera coordinates) [-pi..pi] | |||
object_label = [cat_id, x, y, z, h, w, l, ry] | |||
labels.append(object_label) | |||
if len(labels) == 0: | |||
labels = np.zeros((1, 8), dtype=np.float32) | |||
has_labels = False | |||
else: | |||
labels = np.array(labels, dtype=np.float32) | |||
has_labels = True | |||
return labels, has_labels | |||
def build_targets(self, labels, hflipped): | |||
minX = cnf.boundary['minX'] | |||
maxX = cnf.boundary['maxX'] | |||
minY = cnf.boundary['minY'] | |||
maxY = cnf.boundary['maxY'] | |||
minZ = cnf.boundary['minZ'] | |||
maxZ = cnf.boundary['maxZ'] | |||
num_objects = min(len(labels), self.max_objects) | |||
hm_l, hm_w = self.hm_size | |||
hm_main_center = np.zeros((self.num_classes, hm_l, hm_w), dtype=np.float32) | |||
cen_offset = np.zeros((self.max_objects, 2), dtype=np.float32) | |||
direction = np.zeros((self.max_objects, 2), dtype=np.float32) | |||
z_coor = np.zeros((self.max_objects, 1), dtype=np.float32) | |||
dimension = np.zeros((self.max_objects, 3), dtype=np.float32) | |||
indices_center = np.zeros((self.max_objects), dtype=np.int64) | |||
obj_mask = np.zeros((self.max_objects), dtype=np.uint8) | |||
for k in range(num_objects): | |||
cls_id, x, y, z, h, w, l, yaw = labels[k] | |||
cls_id = int(cls_id) | |||
# Invert yaw angle | |||
yaw = -yaw | |||
if not ((minX <= x <= maxX) and (minY <= y <= maxY) and (minZ <= z <= maxZ)): | |||
continue | |||
if (h <= 0) or (w <= 0) or (l <= 0): | |||
continue | |||
bbox_l = l / cnf.bound_size_x * hm_l | |||
bbox_w = w / cnf.bound_size_y * hm_w | |||
radius = compute_radius((math.ceil(bbox_l), math.ceil(bbox_w))) | |||
radius = max(0, int(radius)) | |||
center_y = (x - minX) / cnf.bound_size_x * hm_l # x --> y (invert to 2D image space) | |||
center_x = (y - minY) / cnf.bound_size_y * hm_w # y --> x | |||
center = np.array([center_x, center_y], dtype=np.float32) | |||
if hflipped: | |||
center[0] = hm_w - center[0] - 1 | |||
center_int = center.astype(np.int32) | |||
if cls_id < 0: | |||
ignore_ids = [_ for _ in range(self.num_classes)] if cls_id == - 1 else [- cls_id - 2] | |||
# Consider to make mask ignore | |||
for cls_ig in ignore_ids: | |||
gen_hm_radius(hm_main_center[cls_ig], center_int, radius) | |||
hm_main_center[ignore_ids, center_int[1], center_int[0]] = 0.9999 | |||
continue | |||
# Generate heatmaps for main center | |||
gen_hm_radius(hm_main_center[cls_id], center, radius) | |||
# Index of the center | |||
indices_center[k] = center_int[1] * hm_w + center_int[0] | |||
# targets for center offset | |||
cen_offset[k] = center - center_int | |||
# targets for dimension | |||
dimension[k, 0] = h | |||
dimension[k, 1] = w | |||
dimension[k, 2] = l | |||
# targets for direction | |||
direction[k, 0] = math.sin(float(yaw)) # im | |||
direction[k, 1] = math.cos(float(yaw)) # re | |||
# im -->> -im | |||
if hflipped: | |||
direction[k, 0] = - direction[k, 0] | |||
# targets for depth | |||
z_coor[k] = z - minZ | |||
# Generate object masks | |||
obj_mask[k] = 1 | |||
targets = { | |||
'hm_cen': hm_main_center, | |||
'cen_offset': cen_offset, | |||
'direction': direction, | |||
'z_coor': z_coor, | |||
'dim': dimension, | |||
'indices_center': indices_center, | |||
'obj_mask': obj_mask, | |||
} | |||
return targets | |||
def draw_img_with_label(self, index): | |||
sample_id = self.sample_id_list[index] | |||
lidar_path = os.path.join(self.lidar_dir, '{}.bin'.format(sample_id)) | |||
lidarData = self.get_lidar(sample_id) | |||
calib = self.get_calib(sample_id) | |||
labels, has_labels = self.get_label(sample_id) | |||
print(lidar_path) | |||
if has_labels: | |||
labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||
if self.lidar_aug: | |||
lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:]) | |||
lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels) | |||
bev_map = makeBEVMap(lidarData, cnf.boundary) | |||
print(labels) | |||
return bev_map, labels, lidar_path | |||
if __name__ == '__main__': | |||
from easydict import EasyDict as edict | |||
from data_process.transformation import OneOf, Random_Scaling, Random_Rotation, lidar_to_camera_box | |||
from utils.visualization_utils import merge_rgb_to_bev, show_rgb_image_with_boxes | |||
configs = edict() | |||
configs.distributed = False # For testing | |||
configs.pin_memory = False | |||
configs.num_samples = None | |||
configs.input_size = (1216, 608) | |||
configs.hm_size = (304, 152) | |||
configs.max_objects = 50 | |||
configs.num_classes = 3 | |||
configs.output_width = 608 | |||
# configs.dataset_dir = os.path.join('../../', 'dataset', 'kitti') | |||
# lidar_aug = OneOf([ | |||
# Random_Rotation(limit_angle=np.pi / 4, p=1.), | |||
# Random_Scaling(scaling_range=(0.95, 1.05), p=1.), | |||
# ], p=1.) | |||
lidar_aug = None | |||
dataset = KittiDataset(configs, mode='val', lidar_aug=lidar_aug, hflip_prob=0., num_samples=configs.num_samples) | |||
print('\n\nPress n to see the next sample >>> Press Esc to quit...') | |||
for idx in range(len(dataset)): | |||
bev_map, labels, lidar_path = dataset.draw_img_with_label(idx) | |||
calib = Calibration(lidar_path.replace(".bin", ".txt").replace("velodyne", "calib")) | |||
bev_map = (bev_map.transpose(1, 2, 0) * 255).astype(np.uint8) | |||
# bev_map = cv2.resize(bev_map, (cnf.BEV_HEIGHT, cnf.BEV_WIDTH)) | |||
print(bev_map.shape) | |||
for box_idx, (cls_id, x, y, z, h, w, l, yaw) in enumerate(labels): | |||
# Draw rotated box | |||
yaw = -yaw | |||
y1 = int((x - cnf.boundary['minX']) / cnf.DISCRETIZATION) | |||
x1 = int((y - cnf.boundary['minY']) / cnf.DISCRETIZATION) | |||
w1 = int(w / cnf.DISCRETIZATION) | |||
l1 = int(l / cnf.DISCRETIZATION) | |||
drawRotatedBox(bev_map, x1, y1, w1, l1, yaw, cnf.colors[int(cls_id)]) | |||
# Rotate the bev_map | |||
bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) | |||
# labels[:, 1:] = lidar_to_camera_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||
cv2.imshow('bev_map', bev_map) | |||
if cv2.waitKey(0) & 0xff == 27: | |||
break |
@@ -0,0 +1,426 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Refer: https://github.com/ghimiredhikura/Complex-YOLOv3 | |||
# Source : https://github.com/jeasinema/VoxelNet-tensorflow/blob/master/utils/utils.py | |||
""" | |||
import os | |||
import sys | |||
import math | |||
import numpy as np | |||
import torch | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from config import kitti_config as cnf | |||
def angle_in_limit(angle): | |||
# To limit the angle in -pi/2 - pi/2 | |||
limit_degree = 5 | |||
while angle >= np.pi / 2: | |||
angle -= np.pi | |||
while angle < -np.pi / 2: | |||
angle += np.pi | |||
if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi: | |||
angle = np.pi / 2 | |||
return angle | |||
def camera_to_lidar(x, y, z, V2C=None, R0=None, P2=None): | |||
p = np.array([x, y, z, 1]) | |||
if V2C is None or R0 is None: | |||
p = np.matmul(cnf.R0_inv, p) | |||
p = np.matmul(cnf.Tr_velo_to_cam_inv, p) | |||
else: | |||
R0_i = np.zeros((4, 4)) | |||
R0_i[:3, :3] = R0 | |||
R0_i[3, 3] = 1 | |||
p = np.matmul(np.linalg.inv(R0_i), p) | |||
p = np.matmul(inverse_rigid_trans(V2C), p) | |||
p = p[0:3] | |||
return tuple(p) | |||
def lidar_to_camera(x, y, z, V2C=None, R0=None, P2=None): | |||
p = np.array([x, y, z, 1]) | |||
if V2C is None or R0 is None: | |||
p = np.matmul(cnf.Tr_velo_to_cam, p) | |||
p = np.matmul(cnf.R0, p) | |||
else: | |||
p = np.matmul(V2C, p) | |||
p = np.matmul(R0, p) | |||
p = p[0:3] | |||
return tuple(p) | |||
def camera_to_lidar_point(points): | |||
# (N, 3) -> (N, 3) | |||
N = points.shape[0] | |||
points = np.hstack([points, np.ones((N, 1))]).T # (N,4) -> (4,N) | |||
points = np.matmul(cnf.R0_inv, points) | |||
points = np.matmul(cnf.Tr_velo_to_cam_inv, points).T # (4, N) -> (N, 4) | |||
points = points[:, 0:3] | |||
return points.reshape(-1, 3) | |||
def lidar_to_camera_point(points, V2C=None, R0=None): | |||
# (N, 3) -> (N, 3) | |||
N = points.shape[0] | |||
points = np.hstack([points, np.ones((N, 1))]).T | |||
if V2C is None or R0 is None: | |||
points = np.matmul(cnf.Tr_velo_to_cam, points) | |||
points = np.matmul(cnf.R0, points).T | |||
else: | |||
points = np.matmul(V2C, points) | |||
points = np.matmul(R0, points).T | |||
points = points[:, 0:3] | |||
return points.reshape(-1, 3) | |||
def camera_to_lidar_box(boxes, V2C=None, R0=None, P2=None): | |||
# (N, 7) -> (N, 7) x,y,z,h,w,l,r | |||
ret = [] | |||
for box in boxes: | |||
x, y, z, h, w, l, ry = box | |||
# print(x, y, z, h, w, l, ry) | |||
(x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -ry - np.pi / 2 | |||
# print(x, y, z, h, w, l, ry) | |||
# print("camera_to_lidar") | |||
# rz = angle_in_limit(rz) | |||
ret.append([x, y, z, h, w, l, rz]) | |||
return np.array(ret).reshape(-1, 7) | |||
def lidar_to_camera_box(boxes, V2C=None, R0=None, P2=None): | |||
# (N, 7) -> (N, 7) x,y,z,h,w,l,r | |||
ret = [] | |||
for box in boxes: | |||
x, y, z, h, w, l, rz = box | |||
# (x, y, z), h, w, l, ry = lidar_to_camera(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -rz - np.pi / 2 | |||
# ry = angle_in_limit(ry) | |||
ry = -rz - np.pi / 2 | |||
ret.append([x, y, z, h, w, l, ry]) | |||
return np.array(ret).reshape(-1, 7) | |||
def center_to_corner_box2d(boxes_center, coordinate='lidar'): | |||
# (N, 5) -> (N, 4, 2) | |||
N = boxes_center.shape[0] | |||
boxes3d_center = np.zeros((N, 7)) | |||
boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center | |||
boxes3d_corner = center_to_corner_box3d(boxes3d_center, coordinate=coordinate) | |||
return boxes3d_corner[:, 0:4, 0:2] | |||
def center_to_corner_box3d(boxes_center, coordinate='lidar'): | |||
# (N, 7) -> (N, 8, 3) | |||
N = boxes_center.shape[0] | |||
ret = np.zeros((N, 8, 3), dtype=np.float32) | |||
if coordinate == 'camera': | |||
boxes_center = camera_to_lidar_box(boxes_center) | |||
for i in range(N): | |||
box = boxes_center[i] | |||
translation = box[0:3] | |||
size = box[3:6] | |||
rotation = [0, 0, box[-1]] | |||
h, w, l = size[0], size[1], size[2] | |||
trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet | |||
[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \ | |||
[w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \ | |||
[0, 0, 0, 0, h, h, h, h]]) | |||
# re-create 3D bounding box in velodyne coordinate system | |||
yaw = rotation[2] | |||
rotMat = np.array([ | |||
[np.cos(yaw), -np.sin(yaw), 0.0], | |||
[np.sin(yaw), np.cos(yaw), 0.0], | |||
[0.0, 0.0, 1.0]]) | |||
cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8, 1)).T | |||
box3d = cornerPosInVelo.transpose() | |||
ret[i] = box3d | |||
if coordinate == 'camera': | |||
for idx in range(len(ret)): | |||
ret[idx] = lidar_to_camera_point(ret[idx]) | |||
return ret | |||
CORNER2CENTER_AVG = True | |||
def corner_to_center_box3d(boxes_corner, coordinate='camera'): | |||
# (N, 8, 3) -> (N, 7) x,y,z,h,w,l,ry/z | |||
if coordinate == 'lidar': | |||
for idx in range(len(boxes_corner)): | |||
boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx]) | |||
ret = [] | |||
for roi in boxes_corner: | |||
if CORNER2CENTER_AVG: # average version | |||
roi = np.array(roi) | |||
h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4) | |||
w = np.sum( | |||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) | |||
) / 4 | |||
l = np.sum( | |||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) | |||
) / 4 | |||
x = np.sum(roi[:, 0], axis=0) / 8 | |||
y = np.sum(roi[0:4, 1], axis=0) / 4 | |||
z = np.sum(roi[:, 2], axis=0) / 8 | |||
ry = np.sum( | |||
math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + | |||
math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + | |||
math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + | |||
math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + | |||
math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + | |||
math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + | |||
math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + | |||
math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) | |||
) / 8 | |||
if w > l: | |||
w, l = l, w | |||
ry = ry - np.pi / 2 | |||
elif l > w: | |||
l, w = w, l | |||
ry = ry - np.pi / 2 | |||
ret.append([x, y, z, h, w, l, ry]) | |||
else: # max version | |||
h = max(abs(roi[:4, 1] - roi[4:, 1])) | |||
w = np.max( | |||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) | |||
) | |||
l = np.max( | |||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + | |||
np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) | |||
) | |||
x = np.sum(roi[:, 0], axis=0) / 8 | |||
y = np.sum(roi[0:4, 1], axis=0) / 4 | |||
z = np.sum(roi[:, 2], axis=0) / 8 | |||
ry = np.sum( | |||
math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + | |||
math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + | |||
math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + | |||
math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + | |||
math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + | |||
math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + | |||
math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + | |||
math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) | |||
) / 8 | |||
if w > l: | |||
w, l = l, w | |||
ry = angle_in_limit(ry + np.pi / 2) | |||
ret.append([x, y, z, h, w, l, ry]) | |||
if coordinate == 'lidar': | |||
ret = camera_to_lidar_box(np.array(ret)) | |||
return np.array(ret) | |||
def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0): | |||
# Input: | |||
# points: (N, 3) | |||
# rx/y/z: in radians | |||
# Output: | |||
# points: (N, 3) | |||
N = points.shape[0] | |||
points = np.hstack([points, np.ones((N, 1))]) | |||
mat1 = np.eye(4) | |||
mat1[3, 0:3] = tx, ty, tz | |||
points = np.matmul(points, mat1) | |||
if rx != 0: | |||
mat = np.zeros((4, 4)) | |||
mat[0, 0] = 1 | |||
mat[3, 3] = 1 | |||
mat[1, 1] = np.cos(rx) | |||
mat[1, 2] = -np.sin(rx) | |||
mat[2, 1] = np.sin(rx) | |||
mat[2, 2] = np.cos(rx) | |||
points = np.matmul(points, mat) | |||
if ry != 0: | |||
mat = np.zeros((4, 4)) | |||
mat[1, 1] = 1 | |||
mat[3, 3] = 1 | |||
mat[0, 0] = np.cos(ry) | |||
mat[0, 2] = np.sin(ry) | |||
mat[2, 0] = -np.sin(ry) | |||
mat[2, 2] = np.cos(ry) | |||
points = np.matmul(points, mat) | |||
if rz != 0: | |||
mat = np.zeros((4, 4)) | |||
mat[2, 2] = 1 | |||
mat[3, 3] = 1 | |||
mat[0, 0] = np.cos(rz) | |||
mat[0, 1] = -np.sin(rz) | |||
mat[1, 0] = np.sin(rz) | |||
mat[1, 1] = np.cos(rz) | |||
points = np.matmul(points, mat) | |||
return points[:, 0:3] | |||
def box_transform(boxes, tx, ty, tz, r=0, coordinate='lidar'): | |||
# Input: | |||
# boxes: (N, 7) x y z h w l rz/y | |||
# Output: | |||
# boxes: (N, 7) x y z h w l rz/y | |||
boxes_corner = center_to_corner_box3d(boxes, coordinate=coordinate) # (N, 8, 3) | |||
for idx in range(len(boxes_corner)): | |||
if coordinate == 'lidar': | |||
boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r) | |||
else: | |||
boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, ry=r) | |||
return corner_to_center_box3d(boxes_corner, coordinate=coordinate) | |||
def inverse_rigid_trans(Tr): | |||
''' Inverse a rigid body transform matrix (3x4 as [R|t]) | |||
[R'|-R't; 0|1] | |||
''' | |||
inv_Tr = np.zeros_like(Tr) # 3x4 | |||
inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3]) | |||
inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3]) | |||
return inv_Tr | |||
class Compose(object): | |||
def __init__(self, transforms, p=1.0): | |||
self.transforms = transforms | |||
self.p = p | |||
def __call__(self, lidar, labels): | |||
if np.random.random() <= self.p: | |||
for t in self.transforms: | |||
lidar, labels = t(lidar, labels) | |||
return lidar, labels | |||
class OneOf(object): | |||
def __init__(self, transforms, p=1.0): | |||
self.transforms = transforms | |||
self.p = p | |||
def __call__(self, lidar, labels): | |||
if np.random.random() <= self.p: | |||
choice = np.random.randint(low=0, high=len(self.transforms)) | |||
lidar, labels = self.transforms[choice](lidar, labels) | |||
return lidar, labels | |||
class Random_Rotation(object): | |||
def __init__(self, limit_angle=np.pi / 4, p=0.5): | |||
self.limit_angle = limit_angle | |||
self.p = p | |||
def __call__(self, lidar, labels): | |||
""" | |||
:param labels: # (N', 7) x, y, z, h, w, l, r | |||
:return: | |||
""" | |||
if np.random.random() <= self.p: | |||
angle = np.random.uniform(-self.limit_angle, self.limit_angle) | |||
lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle) | |||
labels = box_transform(labels, 0, 0, 0, r=angle, coordinate='lidar') | |||
return lidar, labels | |||
class Random_Scaling(object): | |||
def __init__(self, scaling_range=(0.95, 1.05), p=0.5): | |||
self.scaling_range = scaling_range | |||
self.p = p | |||
def __call__(self, lidar, labels): | |||
""" | |||
:param labels: # (N', 7) x, y, z, h, w, l, r | |||
:return: | |||
""" | |||
if np.random.random() <= self.p: | |||
factor = np.random.uniform(self.scaling_range[0], self.scaling_range[0]) | |||
lidar[:, 0:3] = lidar[:, 0:3] * factor | |||
labels[:, 0:6] = labels[:, 0:6] * factor | |||
return lidar, labels | |||
class Cutout(object): | |||
"""Randomly mask out one or more patches from an image. | |||
Args: | |||
n_holes (int): Number of patches to cut out of each image. | |||
length (int): The length (in pixels) of each square patch. | |||
Refer from: https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py | |||
""" | |||
def __init__(self, n_holes, ratio, fill_value=0., p=1.0): | |||
self.n_holes = n_holes | |||
self.ratio = ratio | |||
assert 0. <= fill_value <= 1., "the fill value is in a range of 0 to 1" | |||
self.fill_value = fill_value | |||
self.p = p | |||
def __call__(self, img, targets): | |||
""" | |||
Args: | |||
img (Tensor): Tensor image of size (C, H, W). | |||
Returns: | |||
Tensor: Image with n_holes of dimension length x length cut out of it. | |||
""" | |||
if np.random.random() <= self.p: | |||
h = img.size(1) | |||
w = img.size(2) | |||
h_cutout = int(self.ratio * h) | |||
w_cutout = int(self.ratio * w) | |||
for n in range(self.n_holes): | |||
y = np.random.randint(h) | |||
x = np.random.randint(w) | |||
y1 = np.clip(y - h_cutout // 2, 0, h) | |||
y2 = np.clip(y + h_cutout // 2, 0, h) | |||
x1 = np.clip(x - w_cutout // 2, 0, w) | |||
x2 = np.clip(x + w_cutout // 2, 0, w) | |||
img[:, y1: y2, x1: x2] = self.fill_value # Zero out the selected area | |||
# Remove targets that are in the selected area | |||
keep_target = [] | |||
for target_idx, target in enumerate(targets): | |||
_, _, target_x, target_y, target_w, target_l, _, _ = target | |||
if (x1 <= target_x * w <= x2) and (y1 <= target_y * h <= y2): | |||
continue | |||
keep_target.append(target_idx) | |||
targets = targets[keep_target] | |||
return img, targets |
@@ -0,0 +1,378 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: Testing script | |||
""" | |||
import argparse | |||
import sys | |||
import os | |||
import time | |||
import warnings | |||
warnings.filterwarnings("ignore", category=UserWarning) | |||
from easydict import EasyDict as edict | |||
import cv2 | |||
import torch | |||
import numpy as np | |||
import torch.nn.functional as F | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from data_process.kitti_dataloader import create_test_dataloader | |||
from models.model_utils import create_model | |||
import config.kitti_config as cnf | |||
def parse_test_configs(): | |||
parser = argparse.ArgumentParser(description='Testing config for the Implementation') | |||
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||
help='The name using for saving logs, models,...') | |||
parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||
help='The name of the model architecture') | |||
parser.add_argument('--model_dir', type=str, | |||
default='/train_out_model/', metavar='PATH', | |||
help='the path of the pretrained checkpoint') | |||
parser.add_argument('--K', type=int, default=50, | |||
help='the number of top K') | |||
parser.add_argument('--no_cuda', default= False, | |||
help='If true, cuda is not used.') | |||
parser.add_argument('--gpu_idx', default=0, type=int, | |||
help='GPU index to use.') | |||
parser.add_argument('--num_samples', type=int, default=None, | |||
help='Take a subset of the dataset to run and debug') | |||
parser.add_argument('--num_workers', type=int, default=1, | |||
help='Number of threads for loading data') | |||
parser.add_argument('--batch_size', type=int, default=1, | |||
help='mini-batch size (default: 4)') | |||
parser.add_argument('--peak_thresh', type=float, default=0.2) | |||
parser.add_argument('--dataset_dir', type=str,default='/dataset_dir/', | |||
help='If true, the output image of the testing phase will be saved') | |||
parser.add_argument('--results_dir', type=str,default='/results_dir/', | |||
help='If true, the output image of the testing phase will be saved') | |||
parser.add_argument('--save_test_output', type=bool, default=True, | |||
help='save the test output or not') | |||
parser.add_argument('--output_format', type=str, default='txt', metavar='PATH', | |||
help='the type of the test output (support image, video or none)') | |||
parser.add_argument('--output_video_fn', type=str, default='out_fpn_resnet_18', metavar='PATH', | |||
help='the video filename if the output format is video') | |||
parser.add_argument('--output-width', type=int, default=608, | |||
help='the width of showing output, the height maybe vary') | |||
configs = edict(vars(parser.parse_args())) | |||
configs.pin_memory = True | |||
configs.distributed = False # For testing on 1 GPU only | |||
configs.input_size = (1216, 608) | |||
configs.hm_size = (304, 152) | |||
configs.down_ratio = 4 | |||
configs.max_objects = 50 | |||
configs.imagenet_pretrained = False | |||
configs.head_conv = 64 | |||
configs.num_classes = 3 | |||
configs.num_center_offset = 2 | |||
configs.num_z = 1 | |||
configs.num_dim = 3 | |||
configs.num_direction = 2 # sin, cos | |||
configs.heads = { | |||
'hm_cen': configs.num_classes, | |||
'cen_offset': configs.num_center_offset, | |||
'direction': configs.num_direction, | |||
'z_coor': configs.num_z, | |||
'dim': configs.num_dim | |||
} | |||
configs.num_input_features = 4 | |||
#################################################################### | |||
##############Dataset, Checkpoints, and results dir configs######### | |||
#################################################################### | |||
configs.root_dir = '../' | |||
# configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'apollo') | |||
# configs.results_dir_img = os.path.join(configs.results_dir, configs.saved_fn, 'image') | |||
# configs.results_dir_txt = os.path.join(configs.results_dir, configs.saved_fn, 'txt') | |||
# make_folder(configs.results_dir_img) | |||
# make_folder(configs.results_dir_txt) | |||
make_folder(configs.results_dir) | |||
return configs | |||
def _sigmoid(x): | |||
return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) | |||
def time_synchronized(): | |||
torch.cuda.synchronize() if torch.cuda.is_available() else None | |||
return time.time() | |||
def make_folder(folder_name): | |||
if not os.path.exists(folder_name): | |||
os.makedirs(folder_name) | |||
def drawRotatedBox(img, x, y, w, l, yaw, color): | |||
bev_corners = get_corners(x, y, w, l, yaw) | |||
corners_int = bev_corners.reshape(-1, 1, 2).astype(int) | |||
cv2.polylines(img, [corners_int], True, color, 2) | |||
corners_int = bev_corners.reshape(-1, 2) | |||
cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2) | |||
# bev image coordinates format | |||
def get_corners(x, y, w, l, yaw): | |||
bev_corners = np.zeros((4, 2), dtype=np.float32) | |||
cos_yaw = np.cos(yaw) | |||
sin_yaw = np.sin(yaw) | |||
# front left | |||
bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw | |||
bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw | |||
# rear left | |||
bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw | |||
bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw | |||
# rear right | |||
bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw | |||
bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw | |||
# front right | |||
bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw | |||
bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw | |||
return bev_corners | |||
def _nms(heat, kernel=3): | |||
pad = (kernel - 1) // 2 | |||
hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) | |||
keep = (hmax == heat).float() | |||
return heat * keep | |||
def _gather_feat(feat, ind, mask=None): | |||
dim = feat.size(2) | |||
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||
feat = feat.gather(1, ind) | |||
if mask is not None: | |||
mask = mask.unsqueeze(2).expand_as(feat) | |||
feat = feat[mask] | |||
feat = feat.view(-1, dim) | |||
return feat | |||
def _transpose_and_gather_feat(feat, ind): | |||
feat = feat.permute(0, 2, 3, 1).contiguous() | |||
feat = feat.view(feat.size(0), -1, feat.size(3)) | |||
feat = _gather_feat(feat, ind) | |||
return feat | |||
def _topk(scores, K=40): | |||
batch, cat, height, width = scores.size() | |||
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||
topk_inds = topk_inds % (height * width) | |||
topk_ys = (torch.floor_divide(topk_inds, width)).float() | |||
topk_xs = (topk_inds % width).int().float() | |||
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) | |||
topk_clses = (torch.floor_divide(topk_ind, K)).int() | |||
topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) | |||
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) | |||
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) | |||
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs | |||
def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40): | |||
batch_size, num_classes, height, width = hm_cen.size() | |||
hm_cen = _nms(hm_cen) | |||
scores, inds, clses, ys, xs = _topk(hm_cen, K=K) | |||
if cen_offset is not None: | |||
cen_offset = _transpose_and_gather_feat(cen_offset, inds) | |||
cen_offset = cen_offset.view(batch_size, K, 2) | |||
xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1] | |||
ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2] | |||
else: | |||
xs = xs.view(batch_size, K, 1) + 0.5 | |||
ys = ys.view(batch_size, K, 1) + 0.5 | |||
direction = _transpose_and_gather_feat(direction, inds) | |||
direction = direction.view(batch_size, K, 2) | |||
z_coor = _transpose_and_gather_feat(z_coor, inds) | |||
z_coor = z_coor.view(batch_size, K, 1) | |||
dim = _transpose_and_gather_feat(dim, inds) | |||
dim = dim.view(batch_size, K, 3) | |||
clses = clses.view(batch_size, K, 1).float() | |||
scores = scores.view(batch_size, K, 1) | |||
# (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
# (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
# detections: [batch_size, K, 10] | |||
detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2) | |||
return detections | |||
def get_yaw(direction): | |||
return np.arctan2(direction[:, 0:1], direction[:, 1:2]) | |||
def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2): | |||
""" | |||
:param detections: [batch_size, K, 10] | |||
# (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
# (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
:return: | |||
""" | |||
# TODO: Need to consider rescale to the original scale: x, y | |||
ret = [] | |||
for i in range(detections.shape[0]): | |||
top_preds = {} | |||
classes = detections[i, :, -1] | |||
for j in range(num_classes): | |||
inds = (classes == j) | |||
# x, y, z, h, w, l, yaw | |||
top_preds[j] = np.concatenate([ | |||
detections[i, inds, 0:1], | |||
detections[i, inds, 1:2] * down_ratio, | |||
detections[i, inds, 2:3] * down_ratio, | |||
detections[i, inds, 3:4], | |||
detections[i, inds, 4:5], | |||
detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH, | |||
detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT, | |||
get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1) | |||
# Filter by peak_thresh | |||
if len(top_preds[j]) > 0: | |||
keep_inds = (top_preds[j][:, 0] > peak_thresh) | |||
top_preds[j] = top_preds[j][keep_inds] | |||
ret.append(top_preds) | |||
return ret | |||
def draw_predictions(img, detections, num_classes=3): | |||
for j in range(num_classes): | |||
if len(detections[j]) > 0: | |||
for det in detections[j]: | |||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)]) | |||
return img | |||
def convert_det_to_real_values(detections, num_classes=3): | |||
kitti_dets = [] | |||
for cls_id in range(num_classes): | |||
if len(detections[cls_id]) > 0: | |||
for det in detections[cls_id]: | |||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
_yaw = round(-_yaw/1, 2) | |||
x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2) | |||
y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2) | |||
z = round(_z + cnf.boundary['minZ'], 2) | |||
w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2) | |||
l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2) | |||
h = round(_h/1, 2) | |||
kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw]) | |||
return np.array(kitti_dets) | |||
if __name__ == '__main__': | |||
print("=".ljust(66, "=")) | |||
configs = parse_test_configs() | |||
model = create_model(configs) | |||
print('\n\n' + '-*=' * 30 + '\n\n') | |||
# assert os.path.isfile(configs.model_dir), "No file at {}".format(configs.model_dir) | |||
if os.path.isfile(configs.model_dir): | |||
model_path = configs.model_dir | |||
else: | |||
# for file in os.listdir(configs.model_dir): | |||
# model_path = os.path.join(configs.model_dir, file) | |||
# 取最后一个模型 | |||
model_path = os.path.join(configs.model_dir, os.listdir(configs.model_dir)[-1]) | |||
print('Loaded weights from {}\n'.format(model_path)) | |||
# model.load_state_dict(torch.load(model_path)) | |||
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) | |||
model.load_state_dict(torch.load(model_path, map_location=configs.device)) | |||
model = model.to(device=configs.device) | |||
out_cap = None | |||
model.eval() | |||
test_dataloader = create_test_dataloader(configs) | |||
with torch.no_grad(): | |||
for batch_idx, batch_data in enumerate(test_dataloader): | |||
bev_maps, metadatas = batch_data | |||
input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float() | |||
t1 = time_synchronized() | |||
outputs = model(input_bev_maps) | |||
outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||
outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||
# detections size (batch_size, K, 10) | |||
detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], | |||
outputs['dim'], K=configs.K) | |||
detections = detections.cpu().numpy().astype(np.float32) | |||
detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) | |||
t2 = time_synchronized() | |||
detections = detections[0] # only first batch | |||
# Draw prediction in the image | |||
bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8) | |||
bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) | |||
bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes) | |||
# Rotate the bev_map | |||
bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) | |||
kitti_dets = convert_det_to_real_values(detections) | |||
print('\tDone testing the {}th sample, time: {:.1f}ms, speed {:.2f}FPS'.format(batch_idx, (t2 - t1) * 1000, | |||
1 / (t2 - t1))) | |||
if configs.save_test_output: | |||
img_fn = os.path.basename(metadatas['bev_path'][0])[:-4] | |||
if configs.output_format == 'image': | |||
cv2.imwrite(os.path.join(configs.results_dir_img, '{}.jpg'.format(img_fn)), bev_map) | |||
elif configs.output_format == 'video': | |||
if out_cap is None: | |||
out_cap_h, out_cap_w = bev_map.shape[:2] | |||
fourcc = cv2.VideoWriter_fourcc(*'MJPG') | |||
out_cap = cv2.VideoWriter( | |||
os.path.join(configs.results_dir_img, '{}.avi'.format(configs.output_video_fn)), | |||
fourcc, 30, (out_cap_w, out_cap_h)) | |||
out_cap.write(bev_map) | |||
else: | |||
pass | |||
txt_path = os.path.join(configs.results_dir,'{}.txt'.format(img_fn)) | |||
txt_file = open(txt_path, 'w') | |||
for det in kitti_dets: | |||
write_line = cnf.CLASS_ID_TO_NAME[det[0]] + ' 0 0 0 0 0 0 0 ' + str(det[1]) + ' ' + str(det[2]) +\ | |||
' ' + str(det[3]) + ' ' + str(det[4]) + ' ' + str(det[5]) + ' ' + str(det[6]) + ' ' + str(det[7]) +'\n' | |||
txt_file.writelines(write_line) | |||
txt_file.close() | |||
if out_cap: | |||
out_cap.release() | |||
cv2.destroyAllWindows() |
@@ -0,0 +1,163 @@ | |||
# ------------------------------------------------------------------------------ | |||
# Portions of this code are from | |||
# CornerNet (https://github.com/princeton-vl/CornerNet) | |||
# Copyright (c) 2018, University of Michigan | |||
# Licensed under the BSD 3-Clause License | |||
# Modified by Nguyen Mau Dung (2020.08.09) | |||
# ------------------------------------------------------------------------------ | |||
import os | |||
import sys | |||
import math | |||
import torch.nn as nn | |||
import torch | |||
import torch.nn.functional as F | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from utils.torch_utils import to_cpu, _sigmoid | |||
def _gather_feat(feat, ind, mask=None): | |||
dim = feat.size(2) | |||
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||
feat = feat.gather(1, ind) | |||
if mask is not None: | |||
mask = mask.unsqueeze(2).expand_as(feat) | |||
feat = feat[mask] | |||
feat = feat.view(-1, dim) | |||
return feat | |||
def _transpose_and_gather_feat(feat, ind): | |||
feat = feat.permute(0, 2, 3, 1).contiguous() | |||
feat = feat.view(feat.size(0), -1, feat.size(3)) | |||
feat = _gather_feat(feat, ind) | |||
return feat | |||
def _neg_loss(pred, gt, alpha=2, beta=4): | |||
''' Modified focal loss. Exactly the same as CornerNet. | |||
Runs faster and costs a little bit more memory | |||
Arguments: | |||
pred (batch x c x h x w) | |||
gt_regr (batch x c x h x w) | |||
''' | |||
pos_inds = gt.eq(1).float() | |||
neg_inds = gt.lt(1).float() | |||
neg_weights = torch.pow(1 - gt, beta) | |||
loss = 0 | |||
pos_loss = torch.log(pred) * torch.pow(1 - pred, alpha) * pos_inds | |||
neg_loss = torch.log(1 - pred) * torch.pow(pred, alpha) * neg_weights * neg_inds | |||
num_pos = pos_inds.float().sum() | |||
pos_loss = pos_loss.sum() | |||
neg_loss = neg_loss.sum() | |||
if num_pos == 0: | |||
loss = loss - neg_loss | |||
else: | |||
loss = loss - (pos_loss + neg_loss) / num_pos | |||
return loss | |||
class FocalLoss(nn.Module): | |||
'''nn.Module warpper for focal loss''' | |||
def __init__(self): | |||
super(FocalLoss, self).__init__() | |||
self.neg_loss = _neg_loss | |||
def forward(self, out, target): | |||
return self.neg_loss(out, target) | |||
class L1Loss(nn.Module): | |||
def __init__(self): | |||
super(L1Loss, self).__init__() | |||
def forward(self, output, mask, ind, target): | |||
pred = _transpose_and_gather_feat(output, ind) | |||
mask = mask.unsqueeze(2).expand_as(pred).float() | |||
loss = F.l1_loss(pred * mask, target * mask, size_average=False) | |||
loss = loss / (mask.sum() + 1e-4) | |||
return loss | |||
class L1Loss_Balanced(nn.Module): | |||
"""Balanced L1 Loss | |||
paper: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) | |||
Code refer from: https://github.com/OceanPang/Libra_R-CNN | |||
""" | |||
def __init__(self, alpha=0.5, gamma=1.5, beta=1.0): | |||
super(L1Loss_Balanced, self).__init__() | |||
self.alpha = alpha | |||
self.gamma = gamma | |||
assert beta > 0 | |||
self.beta = beta | |||
def forward(self, output, mask, ind, target): | |||
pred = _transpose_and_gather_feat(output, ind) | |||
mask = mask.unsqueeze(2).expand_as(pred).float() | |||
loss = self.balanced_l1_loss(pred * mask, target * mask) | |||
loss = loss.sum() / (mask.sum() + 1e-4) | |||
return loss | |||
def balanced_l1_loss(self, pred, target): | |||
assert pred.size() == target.size() and target.numel() > 0 | |||
diff = torch.abs(pred - target) | |||
b = math.exp(self.gamma / self.alpha) - 1 | |||
loss = torch.where(diff < self.beta, | |||
self.alpha / b * (b * diff + 1) * torch.log(b * diff / self.beta + 1) - self.alpha * diff, | |||
self.gamma * diff + self.gamma / b - self.alpha * self.beta) | |||
return loss | |||
class Compute_Loss(nn.Module): | |||
def __init__(self, device): | |||
super(Compute_Loss, self).__init__() | |||
self.device = device | |||
self.focal_loss = FocalLoss() | |||
self.l1_loss = L1Loss() | |||
self.l1_loss_balanced = L1Loss_Balanced(alpha=0.5, gamma=1.5, beta=1.0) | |||
self.weight_hm_cen = 1. | |||
self.weight_z_coor, self.weight_cenoff, self.weight_dim, self.weight_direction = 1., 1., 1., 1. | |||
def forward(self, outputs, tg): | |||
# tg: targets | |||
outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||
outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||
l_hm_cen = self.focal_loss(outputs['hm_cen'], tg['hm_cen']) | |||
l_cen_offset = self.l1_loss(outputs['cen_offset'], tg['obj_mask'], tg['indices_center'], tg['cen_offset']) | |||
l_direction = self.l1_loss(outputs['direction'], tg['obj_mask'], tg['indices_center'], tg['direction']) | |||
# Apply the L1_loss balanced for z coor and dimension regression | |||
l_z_coor = self.l1_loss_balanced(outputs['z_coor'], tg['obj_mask'], tg['indices_center'], tg['z_coor']) | |||
l_dim = self.l1_loss_balanced(outputs['dim'], tg['obj_mask'], tg['indices_center'], tg['dim']) | |||
total_loss = l_hm_cen * self.weight_hm_cen + l_cen_offset * self.weight_cenoff + \ | |||
l_dim * self.weight_dim + l_direction * self.weight_direction + \ | |||
l_z_coor * self.weight_z_coor | |||
loss_stats = { | |||
'total_loss': to_cpu(total_loss).item(), | |||
'hm_cen_loss': to_cpu(l_hm_cen).item(), | |||
'cen_offset_loss': to_cpu(l_cen_offset).item(), | |||
'dim_loss': to_cpu(l_dim).item(), | |||
'direction_loss': to_cpu(l_direction).item(), | |||
'z_coor_loss': to_cpu(l_z_coor).item(), | |||
} | |||
return total_loss, loss_stats |
@@ -0,0 +1,252 @@ | |||
""" | |||
# --------------------------------------------------------------------------------- | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Copyright (c) Microsoft | |||
# Licensed under the MIT License. | |||
# Written by Bin Xiao (Bin.Xiao@microsoft.com) | |||
# Modified by Xingyi Zhou | |||
# Refer from: https://github.com/xingyizhou/CenterNet | |||
# Modifier: Nguyen Mau Dung (2020.08.09) | |||
# ------------------------------------------------------------------------------ | |||
""" | |||
from __future__ import absolute_import | |||
from __future__ import division | |||
from __future__ import print_function | |||
import os | |||
import torch | |||
import torch.nn as nn | |||
import torch.utils.model_zoo as model_zoo | |||
import torch.nn.functional as F | |||
BN_MOMENTUM = 0.1 | |||
model_urls = { | |||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', | |||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', | |||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', | |||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', | |||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', | |||
} | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"""3x3 convolution with padding""" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = conv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.conv2 = conv3x3(planes, planes) | |||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class Bottleneck(nn.Module): | |||
expansion = 4 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(Bottleneck, self).__init__() | |||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | |||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) | |||
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.relu(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class PoseResNet(nn.Module): | |||
def __init__(self, block, layers, heads, head_conv, **kwargs): | |||
self.inplanes = 64 | |||
self.deconv_with_bias = False | |||
self.heads = heads | |||
super(PoseResNet, self).__init__() | |||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) | |||
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
self.conv_up_level1 = nn.Conv2d(768, 256, kernel_size=1, stride=1, padding=0) | |||
self.conv_up_level2 = nn.Conv2d(384, 128, kernel_size=1, stride=1, padding=0) | |||
self.conv_up_level3 = nn.Conv2d(192, 64, kernel_size=1, stride=1, padding=0) | |||
fpn_channels = [256, 128, 64] | |||
for fpn_idx, fpn_c in enumerate(fpn_channels): | |||
for head in sorted(self.heads): | |||
num_output = self.heads[head] | |||
if head_conv > 0: | |||
fc = nn.Sequential( | |||
nn.Conv2d(fpn_c, head_conv, kernel_size=3, padding=1, bias=True), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d(head_conv, num_output, kernel_size=1, stride=1, padding=0)) | |||
else: | |||
fc = nn.Conv2d(in_channels=fpn_c, out_channels=num_output, kernel_size=1, stride=1, padding=0) | |||
self.__setattr__('fpn{}_{}'.format(fpn_idx, head), fc) | |||
def _make_layer(self, block, planes, blocks, stride=1): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks): | |||
layers.append(block(self.inplanes, planes)) | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
_, _, input_h, input_w = x.size() | |||
hm_h, hm_w = input_h // 4, input_w // 4 | |||
x = self.conv1(x) | |||
x = self.bn1(x) | |||
x = self.relu(x) | |||
x = self.maxpool(x) | |||
out_layer1 = self.layer1(x) | |||
out_layer2 = self.layer2(out_layer1) | |||
out_layer3 = self.layer3(out_layer2) | |||
out_layer4 = self.layer4(out_layer3) | |||
# up_level1: torch.Size([b, 512, 14, 14]) | |||
up_level1 = F.interpolate(out_layer4, scale_factor=2, mode='bilinear', align_corners=True) | |||
concat_level1 = torch.cat((up_level1, out_layer3), dim=1) | |||
# up_level2: torch.Size([b, 256, 28, 28]) | |||
up_level2 = F.interpolate(self.conv_up_level1(concat_level1), scale_factor=2, mode='bilinear', | |||
align_corners=True) | |||
concat_level2 = torch.cat((up_level2, out_layer2), dim=1) | |||
# up_level3: torch.Size([b, 128, 56, 56]), | |||
up_level3 = F.interpolate(self.conv_up_level2(concat_level2), scale_factor=2, mode='bilinear', | |||
align_corners=True) | |||
# up_level4: torch.Size([b, 64, 56, 56]) | |||
up_level4 = self.conv_up_level3(torch.cat((up_level3, out_layer1), dim=1)) | |||
ret = {} | |||
for head in self.heads: | |||
temp_outs = [] | |||
for fpn_idx, fdn_input in enumerate([up_level2, up_level3, up_level4]): | |||
fpn_out = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))(fdn_input) | |||
_, _, fpn_out_h, fpn_out_w = fpn_out.size() | |||
# Make sure the added features having same size of heatmap output | |||
if (fpn_out_w != hm_w) or (fpn_out_h != hm_h): | |||
fpn_out = F.interpolate(fpn_out, size=(hm_h, hm_w)) | |||
temp_outs.append(fpn_out) | |||
# Take the softmax in the keypoint feature pyramid network | |||
final_out = self.apply_kfpn(temp_outs) | |||
ret[head] = final_out | |||
return ret | |||
def apply_kfpn(self, outs): | |||
outs = torch.cat([out.unsqueeze(-1) for out in outs], dim=-1) | |||
softmax_outs = F.softmax(outs, dim=-1) | |||
ret_outs = (outs * softmax_outs).sum(dim=-1) | |||
return ret_outs | |||
def init_weights(self, num_layers, pretrained=True): | |||
if pretrained: | |||
# TODO: Check initial weights for head later | |||
for fpn_idx in [0, 1, 2]: # 3 FPN layers | |||
for head in self.heads: | |||
final_layer = self.__getattr__('fpn{}_{}'.format(fpn_idx, head)) | |||
for i, m in enumerate(final_layer.modules()): | |||
if isinstance(m, nn.Conv2d): | |||
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |||
# print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||
# print('=> init {}.bias as 0'.format(name)) | |||
if m.weight.shape[0] == self.heads[head]: | |||
if 'hm' in head: | |||
nn.init.constant_(m.bias, -2.19) | |||
else: | |||
nn.init.normal_(m.weight, std=0.001) | |||
nn.init.constant_(m.bias, 0) | |||
# pretrained_state_dict = torch.load(pretrained) | |||
url = model_urls['resnet{}'.format(num_layers)] | |||
pretrained_state_dict = model_zoo.load_url(url) | |||
print('=> loading pretrained model {}'.format(url)) | |||
self.load_state_dict(pretrained_state_dict, strict=False) | |||
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), | |||
34: (BasicBlock, [3, 4, 6, 3]), | |||
50: (Bottleneck, [3, 4, 6, 3]), | |||
101: (Bottleneck, [3, 4, 23, 3]), | |||
152: (Bottleneck, [3, 8, 36, 3])} | |||
def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): | |||
block_class, layers = resnet_spec[num_layers] | |||
model = PoseResNet(block_class, layers, heads, head_conv=head_conv) | |||
model.init_weights(num_layers, pretrained=imagenet_pretrained) | |||
return model |
@@ -0,0 +1,134 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.09 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: utils functions that use for model | |||
""" | |||
import os | |||
import sys | |||
import torch | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from models import resnet, fpn_resnet | |||
def create_model(configs): | |||
"""Create model based on architecture name""" | |||
try: | |||
arch_parts = configs.arch.split('_') | |||
num_layers = int(arch_parts[-1]) | |||
except: | |||
raise ValueError | |||
if 'fpn_resnet' in configs.arch: | |||
print('using ResNet architecture with feature pyramid') | |||
model = fpn_resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv, | |||
imagenet_pretrained=configs.imagenet_pretrained) | |||
elif 'resnet' in configs.arch: | |||
print('using ResNet architecture') | |||
model = resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv, | |||
imagenet_pretrained=configs.imagenet_pretrained) | |||
else: | |||
assert False, 'Undefined model backbone' | |||
return model | |||
def get_num_parameters(model): | |||
"""Count number of trained parameters of the model""" | |||
if hasattr(model, 'module'): | |||
num_parameters = sum(p.numel() for p in model.module.parameters() if p.requires_grad) | |||
else: | |||
num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) | |||
return num_parameters | |||
def make_data_parallel(model, configs): | |||
if configs.distributed: | |||
# For multiprocessing distributed, DistributedDataParallel constructor | |||
# should always set the single device scope, otherwise, | |||
# DistributedDataParallel will use all available devices. | |||
if configs.gpu_idx is not None: | |||
torch.cuda.set_device(configs.gpu_idx) | |||
model.cuda(configs.gpu_idx) | |||
# When using a single GPU per process and per | |||
# DistributedDataParallel, we need to divide the batch size | |||
# ourselves based on the total number of GPUs we have | |||
configs.batch_size = int(configs.batch_size / configs.ngpus_per_node) | |||
configs.num_workers = int((configs.num_workers + configs.ngpus_per_node - 1) / configs.ngpus_per_node) | |||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[configs.gpu_idx]) | |||
else: | |||
model.cuda() | |||
# DistributedDataParallel will divide and allocate batch_size to all | |||
# available GPUs if device_ids are not set | |||
model = torch.nn.parallel.DistributedDataParallel(model) | |||
elif configs.gpu_idx is not None: | |||
torch.cuda.set_device(configs.gpu_idx) | |||
model = model.cuda(configs.gpu_idx) | |||
else: | |||
# DataParallel will divide and allocate batch_size to all available GPUs | |||
model = torch.nn.DataParallel(model).cuda() | |||
return model | |||
if __name__ == '__main__': | |||
import argparse | |||
from torchsummary import summary | |||
from easydict import EasyDict as edict | |||
parser = argparse.ArgumentParser(description='RTM3D Implementation') | |||
parser.add_argument('-a', '--arch', type=str, default='resnet_18', metavar='ARCH', | |||
help='The name of the model architecture') | |||
parser.add_argument('--head_conv', type=int, default=-1, | |||
help='conv layer channels for output head' | |||
'0 for no conv layer' | |||
'-1 for default setting: ' | |||
'64 for resnets and 256 for dla.') | |||
configs = edict(vars(parser.parse_args())) | |||
if configs.head_conv == -1: # init default head_conv | |||
configs.head_conv = 256 if 'dla' in configs.arch else 64 | |||
configs.num_classes = 3 | |||
configs.num_vertexes = 8 | |||
configs.num_center_offset = 2 | |||
configs.num_vertexes_offset = 2 | |||
configs.num_dimension = 3 | |||
configs.num_rot = 8 | |||
configs.num_depth = 1 | |||
configs.num_wh = 2 | |||
configs.heads = { | |||
'hm_mc': configs.num_classes, | |||
'hm_ver': configs.num_vertexes, | |||
'vercoor': configs.num_vertexes * 2, | |||
'cenoff': configs.num_center_offset, | |||
'veroff': configs.num_vertexes_offset, | |||
'dim': configs.num_dimension, | |||
'rot': configs.num_rot, | |||
'depth': configs.num_depth, | |||
'wh': configs.num_wh | |||
} | |||
configs.device = torch.device('cuda:1') | |||
# configs.device = torch.device('cpu') | |||
model = create_model(configs).to(device=configs.device) | |||
sample_input = torch.randn((1, 3, 224, 224)).to(device=configs.device) | |||
# summary(model.cuda(1), (3, 224, 224)) | |||
output = model(sample_input) | |||
for hm_name, hm_out in output.items(): | |||
print('hm_name: {}, hm_out size: {}'.format(hm_name, hm_out.size())) | |||
print('number of parameters: {}'.format(get_num_parameters(model))) |
@@ -0,0 +1,284 @@ | |||
""" | |||
# --------------------------------------------------------------------------------- | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Copyright (c) Microsoft | |||
# Licensed under the MIT License. | |||
# Written by Bin Xiao (Bin.Xiao@microsoft.com) | |||
# Modified by Xingyi Zhou | |||
# Refer from: https://github.com/xingyizhou/CenterNet | |||
# Modifier: Nguyen Mau Dung (2020.08.09) | |||
# ------------------------------------------------------------------------------ | |||
""" | |||
from __future__ import absolute_import | |||
from __future__ import division | |||
from __future__ import print_function | |||
import os | |||
import torch | |||
import torch.nn as nn | |||
import torch.utils.model_zoo as model_zoo | |||
BN_MOMENTUM = 0.1 | |||
model_urls = { | |||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', | |||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', | |||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', | |||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', | |||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', | |||
} | |||
def conv3x3(in_planes, out_planes, stride=1): | |||
"""3x3 convolution with padding""" | |||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
class BasicBlock(nn.Module): | |||
expansion = 1 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(BasicBlock, self).__init__() | |||
self.conv1 = conv3x3(inplanes, planes, stride) | |||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.conv2 = conv3x3(planes, planes) | |||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class Bottleneck(nn.Module): | |||
expansion = 4 | |||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
super(Bottleneck, self).__init__() | |||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||
padding=1, bias=False) | |||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, | |||
bias=False) | |||
self.bn3 = nn.BatchNorm2d(planes * self.expansion, | |||
momentum=BN_MOMENTUM) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.relu(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out += residual | |||
out = self.relu(out) | |||
return out | |||
class PoseResNet(nn.Module): | |||
def __init__(self, block, layers, heads, head_conv, **kwargs): | |||
self.inplanes = 64 | |||
self.deconv_with_bias = False | |||
self.heads = heads | |||
super(PoseResNet, self).__init__() | |||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
bias=False) | |||
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
# used for deconv layers | |||
self.deconv_layers = self._make_deconv_layer( | |||
3, | |||
[256, 256, 256], | |||
[4, 4, 4], | |||
) | |||
# self.final_layer = [] | |||
for head in sorted(self.heads): | |||
num_output = self.heads[head] | |||
if head_conv > 0: | |||
fc = nn.Sequential( | |||
nn.Conv2d(256, head_conv, | |||
kernel_size=3, padding=1, bias=True), | |||
nn.ReLU(inplace=True), | |||
nn.Conv2d(head_conv, num_output, | |||
kernel_size=1, stride=1, padding=0)) | |||
else: | |||
fc = nn.Conv2d( | |||
in_channels=256, | |||
out_channels=num_output, | |||
kernel_size=1, | |||
stride=1, | |||
padding=0 | |||
) | |||
self.__setattr__(head, fc) | |||
# self.final_layer = nn.ModuleList(self.final_layer) | |||
def _make_layer(self, block, planes, blocks, stride=1): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
nn.Conv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=1, stride=stride, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), | |||
) | |||
layers = [] | |||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks): | |||
layers.append(block(self.inplanes, planes)) | |||
return nn.Sequential(*layers) | |||
def _get_deconv_cfg(self, deconv_kernel, index): | |||
if deconv_kernel == 4: | |||
padding = 1 | |||
output_padding = 0 | |||
elif deconv_kernel == 3: | |||
padding = 1 | |||
output_padding = 1 | |||
elif deconv_kernel == 2: | |||
padding = 0 | |||
output_padding = 0 | |||
return deconv_kernel, padding, output_padding | |||
def _make_deconv_layer(self, num_layers, num_filters, num_kernels): | |||
assert num_layers == len(num_filters), \ | |||
'ERROR: num_deconv_layers is different len(num_deconv_filters)' | |||
assert num_layers == len(num_kernels), \ | |||
'ERROR: num_deconv_layers is different len(num_deconv_filters)' | |||
layers = [] | |||
for i in range(num_layers): | |||
kernel, padding, output_padding = \ | |||
self._get_deconv_cfg(num_kernels[i], i) | |||
planes = num_filters[i] | |||
layers.append( | |||
nn.ConvTranspose2d( | |||
in_channels=self.inplanes, | |||
out_channels=planes, | |||
kernel_size=kernel, | |||
stride=2, | |||
padding=padding, | |||
output_padding=output_padding, | |||
bias=self.deconv_with_bias)) | |||
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) | |||
layers.append(nn.ReLU(inplace=True)) | |||
self.inplanes = planes | |||
return nn.Sequential(*layers) | |||
def forward(self, x): | |||
x = self.conv1(x) | |||
x = self.bn1(x) | |||
x = self.relu(x) | |||
x = self.maxpool(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
x = self.deconv_layers(x) | |||
ret = {} | |||
for head in self.heads: | |||
ret[head] = self.__getattr__(head)(x) | |||
return ret | |||
def init_weights(self, num_layers, pretrained=True): | |||
if pretrained: | |||
# print('=> init resnet deconv weights from normal distribution') | |||
for _, m in self.deconv_layers.named_modules(): | |||
if isinstance(m, nn.ConvTranspose2d): | |||
# print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||
# print('=> init {}.bias as 0'.format(name)) | |||
nn.init.normal_(m.weight, std=0.001) | |||
if self.deconv_with_bias: | |||
nn.init.constant_(m.bias, 0) | |||
elif isinstance(m, nn.BatchNorm2d): | |||
# print('=> init {}.weight as 1'.format(name)) | |||
# print('=> init {}.bias as 0'.format(name)) | |||
nn.init.constant_(m.weight, 1) | |||
nn.init.constant_(m.bias, 0) | |||
# print('=> init final conv weights from normal distribution') | |||
for head in self.heads: | |||
final_layer = self.__getattr__(head) | |||
for i, m in enumerate(final_layer.modules()): | |||
if isinstance(m, nn.Conv2d): | |||
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |||
# print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||
# print('=> init {}.bias as 0'.format(name)) | |||
if m.weight.shape[0] == self.heads[head]: | |||
if 'hm' in head: | |||
nn.init.constant_(m.bias, -2.19) | |||
else: | |||
nn.init.normal_(m.weight, std=0.001) | |||
nn.init.constant_(m.bias, 0) | |||
# pretrained_state_dict = torch.load(pretrained) | |||
url = model_urls['resnet{}'.format(num_layers)] | |||
pretrained_state_dict = model_zoo.load_url(url) | |||
print('=> loading pretrained model {}'.format(url)) | |||
self.load_state_dict(pretrained_state_dict, strict=False) | |||
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), | |||
34: (BasicBlock, [3, 4, 6, 3]), | |||
50: (Bottleneck, [3, 4, 6, 3]), | |||
101: (Bottleneck, [3, 4, 23, 3]), | |||
152: (Bottleneck, [3, 8, 36, 3])} | |||
def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): | |||
block_class, layers = resnet_spec[num_layers] | |||
model = PoseResNet(block_class, layers, heads, head_conv=head_conv) | |||
model.init_weights(num_layers, pretrained=imagenet_pretrained) | |||
return model |
@@ -0,0 +1,290 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: This script for training | |||
""" | |||
import time | |||
import numpy as np | |||
import sys | |||
import random | |||
import os | |||
import warnings | |||
warnings.filterwarnings("ignore", category=UserWarning) | |||
import torch | |||
from torch.utils.tensorboard import SummaryWriter | |||
import torch.distributed as dist | |||
import torch.multiprocessing as mp | |||
import torch.utils.data.distributed | |||
from tqdm import tqdm | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from data_process.kitti_dataloader import create_train_dataloader, create_val_dataloader | |||
from models.model_utils import create_model, make_data_parallel, get_num_parameters | |||
from utils.train_utils import create_optimizer, create_lr_scheduler, get_saved_state, save_checkpoint | |||
from utils.torch_utils import reduce_tensor, to_python_float | |||
from utils.misc import AverageMeter, ProgressMeter | |||
from utils.logger import Logger | |||
from config.train_config import parse_train_configs | |||
from losses.losses import Compute_Loss | |||
def main(): | |||
configs = parse_train_configs() | |||
# Re-produce results | |||
if configs.seed is not None: | |||
random.seed(configs.seed) | |||
np.random.seed(configs.seed) | |||
torch.manual_seed(configs.seed) | |||
torch.backends.cudnn.deterministic = True | |||
torch.backends.cudnn.benchmark = False | |||
if configs.gpu_idx is not None: | |||
print('You have chosen a specific GPU. This will completely disable data parallelism.') | |||
if configs.dist_url == "env://" and configs.world_size == -1: | |||
configs.world_size = int(os.environ["WORLD_SIZE"]) | |||
configs.distributed = configs.world_size > 1 or configs.multiprocessing_distributed | |||
if configs.multiprocessing_distributed: | |||
configs.world_size = configs.ngpus_per_node * configs.world_size | |||
mp.spawn(main_worker, nprocs=configs.ngpus_per_node, args=(configs,)) | |||
else: | |||
main_worker(configs.gpu_idx, configs) | |||
def main_worker(gpu_idx, configs): | |||
configs.gpu_idx = gpu_idx | |||
# configs.device = torch.device('cpu' if configs.gpu_idx is None else 'cuda:{}'.format(configs.gpu_idx)) | |||
if configs.distributed: | |||
if configs.dist_url == "env://" and configs.rank == -1: | |||
configs.rank = int(os.environ["RANK"]) | |||
if configs.multiprocessing_distributed: | |||
# For multiprocessing distributed training, rank needs to be the | |||
# global rank among all the processes | |||
configs.rank = configs.rank * configs.ngpus_per_node + gpu_idx | |||
dist.init_process_group(backend=configs.dist_backend, init_method=configs.dist_url, | |||
world_size=configs.world_size, rank=configs.rank) | |||
configs.subdivisions = int(64 / configs.batch_size / configs.ngpus_per_node) | |||
else: | |||
configs.subdivisions = int(64 / configs.batch_size) | |||
configs.is_master_node = (not configs.distributed) or ( | |||
configs.distributed and (configs.rank % configs.ngpus_per_node == 0)) | |||
if configs.is_master_node: | |||
logger = Logger(configs.logs_dir, configs.saved_fn) | |||
logger.info('>>> Created a new logger') | |||
logger.info('>>> configs: {}'.format(configs)) | |||
tb_writer = SummaryWriter(log_dir=os.path.join(configs.logs_dir, 'tensorboard')) | |||
else: | |||
logger = None | |||
tb_writer = None | |||
# model | |||
model = create_model(configs) | |||
# load weight from a checkpoint | |||
if configs.pretrained_path is not None: | |||
# assert os.path.isfile(configs.pretrained_path), "=> no checkpoint found at '{}'".format(configs.pretrained_path) | |||
if os.path.isfile(configs.pretrained_path): | |||
model_path = configs.pretrained_path | |||
else: | |||
# 取最后一个模型 | |||
model_path = os.path.join(configs.pretrained_path, os.listdir(configs.pretrained_path)[-1]) | |||
model.load_state_dict(torch.load(model_path, map_location=configs.device)) | |||
if logger is not None: | |||
logger.info('loaded pretrained model at {}'.format(configs.pretrained_path)) | |||
# resume weights of model from a checkpoint | |||
if configs.resume_path is not None: | |||
assert os.path.isfile(configs.resume_path), "=> no checkpoint found at '{}'".format(configs.resume_path) | |||
model.load_state_dict(torch.load(configs.resume_path, map_location='cpu')) | |||
if logger is not None: | |||
logger.info('resume training model from checkpoint {}'.format(configs.resume_path)) | |||
# Data Parallel | |||
model = make_data_parallel(model, configs) | |||
# Make sure to create optimizer after moving the model to cuda | |||
optimizer = create_optimizer(configs, model) | |||
lr_scheduler = create_lr_scheduler(optimizer, configs) | |||
configs.step_lr_in_epoch = False if configs.lr_type in ['multi_step', 'cosin', 'one_cycle'] else True | |||
# resume optimizer, lr_scheduler from a checkpoint | |||
if configs.resume_path is not None: | |||
utils_path = configs.resume_path.replace('Model_', 'Utils_') | |||
assert os.path.isfile(utils_path), "=> no checkpoint found at '{}'".format(utils_path) | |||
utils_state_dict = torch.load(utils_path, map_location='cuda:{}'.format(configs.gpu_idx)) | |||
optimizer.load_state_dict(utils_state_dict['optimizer']) | |||
lr_scheduler.load_state_dict(utils_state_dict['lr_scheduler']) | |||
configs.start_epoch = utils_state_dict['epoch'] + 1 | |||
if configs.is_master_node: | |||
num_parameters = get_num_parameters(model) | |||
logger.info('number of trained parameters of the model: {}'.format(num_parameters)) | |||
if logger is not None: | |||
logger.info(">>> Loading dataset & getting dataloader...") | |||
# Create dataloader | |||
train_dataloader, train_sampler = create_train_dataloader(configs) | |||
if logger is not None: | |||
logger.info('number of batches in training set: {}'.format(len(train_dataloader))) | |||
if configs.evaluate: | |||
val_dataloader = create_val_dataloader(configs) | |||
val_loss = validate(val_dataloader, model, configs) | |||
print('val_loss: {:.4e}'.format(val_loss)) | |||
return | |||
for epoch in range(configs.start_epoch, configs.num_epochs + 1): | |||
if logger is not None: | |||
logger.info('{}'.format('*-' * 40)) | |||
logger.info('{} {}/{} {}'.format('=' * 35, epoch, configs.num_epochs, '=' * 35)) | |||
logger.info('{}'.format('*-' * 40)) | |||
logger.info('>>> Epoch: [{}/{}]'.format(epoch, configs.num_epochs)) | |||
if configs.distributed: | |||
train_sampler.set_epoch(epoch) | |||
# train for one epoch | |||
train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer) | |||
if (not configs.no_val) and (epoch % configs.checkpoint_freq == 0): | |||
val_dataloader = create_val_dataloader(configs) | |||
print('number of batches in val_dataloader: {}'.format(len(val_dataloader))) | |||
val_loss = validate(val_dataloader, model, configs) | |||
print('val_loss: {:.4e}'.format(val_loss)) | |||
if tb_writer is not None: | |||
tb_writer.add_scalar('Val_loss', val_loss, epoch) | |||
# Save checkpoint | |||
if configs.is_master_node and ((epoch % configs.checkpoint_freq) == 0): | |||
model_state_dict, utils_state_dict = get_saved_state(model, optimizer, lr_scheduler, epoch, configs) | |||
save_checkpoint(configs.checkpoints_dir, configs.saved_fn, model_state_dict, utils_state_dict, epoch) | |||
if not configs.step_lr_in_epoch: | |||
lr_scheduler.step() | |||
if tb_writer is not None: | |||
tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], epoch) | |||
if tb_writer is not None: | |||
tb_writer.close() | |||
if configs.distributed: | |||
cleanup() | |||
def cleanup(): | |||
dist.destroy_process_group() | |||
def train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer): | |||
batch_time = AverageMeter('Time', ':6.3f') | |||
data_time = AverageMeter('Data', ':6.3f') | |||
losses = AverageMeter('Loss', ':.4e') | |||
progress = ProgressMeter(len(train_dataloader), [batch_time, data_time, losses], | |||
prefix="Train - Epoch: [{}/{}]".format(epoch, configs.num_epochs)) | |||
criterion = Compute_Loss(device=configs.device) | |||
num_iters_per_epoch = len(train_dataloader) | |||
# switch to train mode | |||
model.train() | |||
start_time = time.time() | |||
for batch_idx, batch_data in enumerate(tqdm(train_dataloader)): | |||
data_time.update(time.time() - start_time) | |||
imgs, targets = batch_data | |||
batch_size = imgs.size(0) | |||
global_step = num_iters_per_epoch * (epoch - 1) + batch_idx + 1 | |||
for k in targets.keys(): | |||
targets[k] = targets[k].to(configs.device, non_blocking=True) | |||
imgs = imgs.to(configs.device, non_blocking=True).float() | |||
outputs = model(imgs) | |||
total_loss, loss_stats = criterion(outputs, targets) | |||
# For torch.nn.DataParallel case | |||
if (not configs.distributed) and (configs.gpu_idx is None): | |||
total_loss = torch.mean(total_loss) | |||
# compute gradient and perform backpropagation | |||
total_loss.backward() | |||
if global_step % configs.subdivisions == 0: | |||
optimizer.step() | |||
# zero the parameter gradients | |||
optimizer.zero_grad() | |||
# Adjust learning rate | |||
if configs.step_lr_in_epoch: | |||
lr_scheduler.step() | |||
if tb_writer is not None: | |||
tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], global_step) | |||
if configs.distributed: | |||
reduced_loss = reduce_tensor(total_loss.data, configs.world_size) | |||
else: | |||
reduced_loss = total_loss.data | |||
losses.update(to_python_float(reduced_loss), batch_size) | |||
# measure elapsed time | |||
# torch.cuda.synchronize() | |||
batch_time.update(time.time() - start_time) | |||
if tb_writer is not None: | |||
if (global_step % configs.tensorboard_freq) == 0: | |||
loss_stats['avg_loss'] = losses.avg | |||
tb_writer.add_scalars('Train', loss_stats, global_step) | |||
# Log message | |||
if logger is not None: | |||
if (global_step % configs.print_freq) == 0: | |||
logger.info(progress.get_message(batch_idx)) | |||
start_time = time.time() | |||
def validate(val_dataloader, model, configs): | |||
losses = AverageMeter('Loss', ':.4e') | |||
criterion = Compute_Loss(device=configs.device) | |||
# switch to train mode | |||
model.eval() | |||
with torch.no_grad(): | |||
for batch_idx, batch_data in enumerate(tqdm(val_dataloader)): | |||
imgs, targets = batch_data | |||
batch_size = imgs.size(0) | |||
for k in targets.keys(): | |||
targets[k] = targets[k].to(configs.device, non_blocking=True) | |||
imgs = imgs.to(configs.device, non_blocking=True).float() | |||
outputs = model(imgs) | |||
total_loss, loss_stats = criterion(outputs, targets) | |||
# For torch.nn.DataParallel case | |||
if (not configs.distributed) and (configs.gpu_idx is None): | |||
total_loss = torch.mean(total_loss) | |||
if configs.distributed: | |||
reduced_loss = reduce_tensor(total_loss.data, configs.world_size) | |||
else: | |||
reduced_loss = total_loss.data | |||
losses.update(to_python_float(reduced_loss), batch_size) | |||
return losses.avg | |||
if __name__ == '__main__': | |||
try: | |||
main() | |||
except KeyboardInterrupt: | |||
try: | |||
cleanup() | |||
sys.exit(0) | |||
except SystemExit: | |||
os._exit(0) |
@@ -0,0 +1,137 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: Demonstration utils script | |||
""" | |||
import argparse | |||
import sys | |||
import os | |||
import warnings | |||
import zipfile | |||
warnings.filterwarnings("ignore", category=UserWarning) | |||
from easydict import EasyDict as edict | |||
import numpy as np | |||
import wget | |||
import torch | |||
import cv2 | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from utils.misc import make_folder, time_synchronized | |||
from utils.evaluation_utils import decode, post_processing | |||
from utils.torch_utils import _sigmoid | |||
def parse_demo_configs(): | |||
parser = argparse.ArgumentParser(description='Demonstration config for the implementation') | |||
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||
help='The name using for saving logs, models,...') | |||
parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||
help='The name of the model architecture') | |||
parser.add_argument('--pretrained_path', type=str, | |||
default='../checkpoints/fpn_resnet_18/fpn_resnet_18_epoch_300.pth', metavar='PATH', | |||
help='the path of the pretrained checkpoint') | |||
parser.add_argument('--foldername', type=str, default='2011_09_26_drive_0014_sync', metavar='FN', | |||
help='Folder name for demostration dataset') | |||
parser.add_argument('--K', type=int, default=50, | |||
help='the number of top K') | |||
parser.add_argument('--no_cuda', action='store_true', | |||
help='If true, cuda is not used.') | |||
parser.add_argument('--gpu_idx', default=0, type=int, | |||
help='GPU index to use.') | |||
parser.add_argument('--peak_thresh', type=float, default=0.2) | |||
parser.add_argument('--output_format', type=str, default='image', metavar='PATH', | |||
help='the type of the test output (support image or video)') | |||
parser.add_argument('--output-width', type=int, default=608, | |||
help='the width of showing output, the height maybe vary') | |||
configs = edict(vars(parser.parse_args())) | |||
configs.pin_memory = True | |||
configs.distributed = False # For testing on 1 GPU only | |||
configs.input_size = (608, 608) | |||
configs.hm_size = (152, 152) | |||
configs.down_ratio = 4 | |||
configs.max_objects = 50 | |||
configs.imagenet_pretrained = False | |||
configs.head_conv = 64 | |||
configs.num_classes = 3 | |||
configs.num_center_offset = 2 | |||
configs.num_z = 1 | |||
configs.num_dim = 3 | |||
configs.num_direction = 2 # sin, cos | |||
configs.heads = { | |||
'hm_cen': configs.num_classes, | |||
'cen_offset': configs.num_center_offset, | |||
'direction': configs.num_direction, | |||
'z_coor': configs.num_z, | |||
'dim': configs.num_dim | |||
} | |||
#################################################################### | |||
##############Dataset, Checkpoints, and results dir configs######### | |||
#################################################################### | |||
configs.root_dir = '../' | |||
configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo') | |||
configs.calib_path = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo', 'calib.txt') | |||
configs.results_dir = os.path.join(configs.root_dir, 'results', configs.saved_fn) | |||
make_folder(configs.results_dir) | |||
return configs | |||
def download_and_unzip(demo_dataset_dir, download_url): | |||
filename = download_url.split('/')[-1] | |||
filepath = os.path.join(demo_dataset_dir, filename) | |||
if os.path.isfile(filepath): | |||
print('The dataset have been downloaded') | |||
return | |||
print('\nDownloading data for demonstration...') | |||
wget.download(download_url, filepath) | |||
print('\nUnzipping the downloaded data...') | |||
with zipfile.ZipFile(filepath, "r") as zip_ref: | |||
zip_ref.extractall(os.path.join(demo_dataset_dir, filename[:-4])) | |||
def do_detect(configs, model, bevmap, is_front): | |||
if not is_front: | |||
bevmap = torch.flip(bevmap, [1, 2]) | |||
input_bev_maps = bevmap.unsqueeze(0).to(configs.device, non_blocking=True).float() | |||
t1 = time_synchronized() | |||
outputs = model(input_bev_maps) | |||
outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||
outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||
# detections size (batch_size, K, 10) | |||
detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], | |||
outputs['dim'], K=configs.K) | |||
detections = detections.cpu().numpy().astype(np.float32) | |||
detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) | |||
t2 = time_synchronized() | |||
# Inference speed | |||
fps = 1 / (t2 - t1) | |||
return detections[0], bevmap, fps | |||
def write_credit(img, org_author=(500, 400), text_author='github.com/maudzung', org_fps=(50, 1000), fps=None): | |||
font = cv2.FONT_HERSHEY_SIMPLEX | |||
fontScale = 1 | |||
color = (255, 255, 255) | |||
thickness = 2 | |||
cv2.putText(img, text_author, org_author, font, fontScale, color, thickness, cv2.LINE_AA) | |||
cv2.putText(img, 'Speed: {:.1f} FPS'.format(fps), org_fps, font, fontScale, color, thickness, cv2.LINE_AA) |
@@ -0,0 +1,183 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.17 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: The utils for evaluation | |||
# Refer from: https://github.com/xingyizhou/CenterNet | |||
""" | |||
from __future__ import division | |||
import os | |||
import sys | |||
import torch | |||
import numpy as np | |||
import torch.nn.functional as F | |||
import cv2 | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
import config.kitti_config as cnf | |||
from data_process.kitti_bev_utils import drawRotatedBox | |||
def _nms(heat, kernel=3): | |||
pad = (kernel - 1) // 2 | |||
hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) | |||
keep = (hmax == heat).float() | |||
return heat * keep | |||
def _gather_feat(feat, ind, mask=None): | |||
dim = feat.size(2) | |||
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||
feat = feat.gather(1, ind) | |||
if mask is not None: | |||
mask = mask.unsqueeze(2).expand_as(feat) | |||
feat = feat[mask] | |||
feat = feat.view(-1, dim) | |||
return feat | |||
def _transpose_and_gather_feat(feat, ind): | |||
feat = feat.permute(0, 2, 3, 1).contiguous() | |||
feat = feat.view(feat.size(0), -1, feat.size(3)) | |||
feat = _gather_feat(feat, ind) | |||
return feat | |||
def _topk(scores, K=40): | |||
batch, cat, height, width = scores.size() | |||
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||
topk_inds = topk_inds % (height * width) | |||
topk_ys = (torch.floor_divide(topk_inds, width)).float() | |||
topk_xs = (topk_inds % width).int().float() | |||
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) | |||
topk_clses = (torch.floor_divide(topk_ind, K)).int() | |||
topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) | |||
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) | |||
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) | |||
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs | |||
def _topk_channel(scores, K=40): | |||
batch, cat, height, width = scores.size() | |||
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||
topk_inds = topk_inds % (height * width) | |||
topk_ys = (topk_inds / width).int().float() | |||
topk_xs = (topk_inds % width).int().float() | |||
return topk_scores, topk_inds, topk_ys, topk_xs | |||
def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40): | |||
batch_size, num_classes, height, width = hm_cen.size() | |||
hm_cen = _nms(hm_cen) | |||
scores, inds, clses, ys, xs = _topk(hm_cen, K=K) | |||
if cen_offset is not None: | |||
cen_offset = _transpose_and_gather_feat(cen_offset, inds) | |||
cen_offset = cen_offset.view(batch_size, K, 2) | |||
xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1] | |||
ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2] | |||
else: | |||
xs = xs.view(batch_size, K, 1) + 0.5 | |||
ys = ys.view(batch_size, K, 1) + 0.5 | |||
direction = _transpose_and_gather_feat(direction, inds) | |||
direction = direction.view(batch_size, K, 2) | |||
z_coor = _transpose_and_gather_feat(z_coor, inds) | |||
z_coor = z_coor.view(batch_size, K, 1) | |||
dim = _transpose_and_gather_feat(dim, inds) | |||
dim = dim.view(batch_size, K, 3) | |||
clses = clses.view(batch_size, K, 1).float() | |||
scores = scores.view(batch_size, K, 1) | |||
# (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
# (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
# detections: [batch_size, K, 10] | |||
detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2) | |||
return detections | |||
def get_yaw(direction): | |||
return np.arctan2(direction[:, 0:1], direction[:, 1:2]) | |||
def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2): | |||
""" | |||
:param detections: [batch_size, K, 10] | |||
# (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
# (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
:return: | |||
""" | |||
# TODO: Need to consider rescale to the original scale: x, y | |||
ret = [] | |||
for i in range(detections.shape[0]): | |||
top_preds = {} | |||
classes = detections[i, :, -1] | |||
for j in range(num_classes): | |||
inds = (classes == j) | |||
# x, y, z, h, w, l, yaw | |||
top_preds[j] = np.concatenate([ | |||
detections[i, inds, 0:1], | |||
detections[i, inds, 1:2] * down_ratio, | |||
detections[i, inds, 2:3] * down_ratio, | |||
detections[i, inds, 3:4], | |||
detections[i, inds, 4:5], | |||
detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH, | |||
detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT, | |||
get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1) | |||
# Filter by peak_thresh | |||
if len(top_preds[j]) > 0: | |||
keep_inds = (top_preds[j][:, 0] > peak_thresh) | |||
top_preds[j] = top_preds[j][keep_inds] | |||
ret.append(top_preds) | |||
return ret | |||
def draw_predictions(img, detections, num_classes=3): | |||
for j in range(num_classes): | |||
if len(detections[j]) > 0: | |||
for det in detections[j]: | |||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)]) | |||
return img | |||
def convert_det_to_real_values(detections, num_classes=3): | |||
kitti_dets = [] | |||
for cls_id in range(num_classes): | |||
if len(detections[cls_id]) > 0: | |||
for det in detections[cls_id]: | |||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
_yaw = round(-_yaw, 2) | |||
x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2) | |||
y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2) | |||
z = round(_z + cnf.boundary['minZ'], 2) | |||
w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2) | |||
l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2) | |||
h = round(_h/1, 2) | |||
kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw]) | |||
return np.array(kitti_dets) |
@@ -0,0 +1,49 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.07.31 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: This script for logging | |||
""" | |||
import os | |||
import logging | |||
class Logger(): | |||
""" | |||
Create logger to save logs during training | |||
Args: | |||
logs_dir: | |||
saved_fn: | |||
Returns: | |||
""" | |||
def __init__(self, logs_dir, saved_fn): | |||
logger_fn = 'logger_{}.txt'.format(saved_fn) | |||
logger_path = os.path.join(logs_dir, logger_fn) | |||
self.logger = logging.getLogger(__name__) | |||
self.logger.setLevel(logging.INFO) | |||
# formatter = logging.Formatter('%(asctime)s:File %(module)s.py:Func %(funcName)s:Line %(lineno)d:%(levelname)s: %(message)s') | |||
formatter = logging.Formatter( | |||
'%(asctime)s: %(module)s.py - %(funcName)s(), at Line %(lineno)d:%(levelname)s:\n%(message)s') | |||
file_handler = logging.FileHandler(logger_path) | |||
file_handler.setLevel(logging.INFO) | |||
file_handler.setFormatter(formatter) | |||
stream_handler = logging.StreamHandler() | |||
stream_handler.setFormatter(formatter) | |||
self.logger.addHandler(file_handler) | |||
self.logger.addHandler(stream_handler) | |||
def info(self, message): | |||
self.logger.info(message) |
@@ -0,0 +1,312 @@ | |||
import torch | |||
from torch.optim import SGD, lr_scheduler | |||
import numpy as np | |||
class _LRMomentumScheduler(lr_scheduler._LRScheduler): | |||
def __init__(self, optimizer, last_epoch=-1): | |||
if last_epoch == -1: | |||
for group in optimizer.param_groups: | |||
group.setdefault('initial_momentum', group['momentum']) | |||
else: | |||
for i, group in enumerate(optimizer.param_groups): | |||
if 'initial_momentum' not in group: | |||
raise KeyError("param 'initial_momentum' is not specified " | |||
"in param_groups[{}] when resuming an optimizer".format(i)) | |||
self.base_momentums = list(map(lambda group: group['initial_momentum'], optimizer.param_groups)) | |||
super().__init__(optimizer, last_epoch) | |||
def get_lr(self): | |||
raise NotImplementedError | |||
def get_momentum(self): | |||
raise NotImplementedError | |||
def step(self, epoch=None): | |||
if epoch is None: | |||
epoch = self.last_epoch + 1 | |||
self.last_epoch = epoch | |||
for param_group, lr, momentum in zip(self.optimizer.param_groups, self.get_lr(), self.get_momentum()): | |||
param_group['lr'] = lr | |||
param_group['momentum'] = momentum | |||
class ParameterUpdate(object): | |||
"""A callable class used to define an arbitrary schedule defined by a list. | |||
This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply | |||
the given schedule. | |||
Arguments: | |||
params {list or numpy.array} -- List or numpy array defining parameter schedule. | |||
base_param {float} -- Parameter value used to initialize the optimizer. | |||
""" | |||
def __init__(self, params, base_param): | |||
self.params = np.hstack([params, 0]) | |||
self.base_param = base_param | |||
def __call__(self, epoch): | |||
return self.params[epoch] / self.base_param | |||
def apply_lambda(last_epoch, bases, lambdas): | |||
return [base * lmbda(last_epoch) for lmbda, base in zip(lambdas, bases)] | |||
class LambdaScheduler(_LRMomentumScheduler): | |||
"""Sets the learning rate and momentum of each parameter group to the initial lr and momentum | |||
times a given function. When last_epoch=-1, sets initial lr and momentum to the optimizer | |||
values. | |||
Args: | |||
optimizer (Optimizer): Wrapped optimizer. | |||
lr_lambda (function or list): A function which computes a multiplicative | |||
factor given an integer parameter epoch, or a list of such | |||
functions, one for each group in optimizer.param_groups. | |||
Default: lambda x:x. | |||
momentum_lambda (function or list): As for lr_lambda but applied to momentum. | |||
Default: lambda x:x. | |||
last_epoch (int): The index of last epoch. Default: -1. | |||
Example: | |||
>>> # Assuming optimizer has two groups. | |||
>>> lr_lambda = [ | |||
... lambda epoch: epoch // 30, | |||
... lambda epoch: 0.95 ** epoch | |||
... ] | |||
>>> mom_lambda = [ | |||
... lambda epoch: max(0, (50 - epoch) // 50), | |||
... lambda epoch: 0.99 ** epoch | |||
... ] | |||
>>> scheduler = LambdaScheduler(optimizer, lr_lambda, mom_lambda) | |||
>>> for epoch in range(100): | |||
>>> train(...) | |||
>>> validate(...) | |||
>>> scheduler.step() | |||
""" | |||
def __init__(self, optimizer, lr_lambda=lambda x: x, momentum_lambda=lambda x: x, last_epoch=-1): | |||
self.optimizer = optimizer | |||
if not isinstance(lr_lambda, (list, tuple)): | |||
self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups) | |||
else: | |||
if len(lr_lambda) != len(optimizer.param_groups): | |||
raise ValueError("Expected {} lr_lambdas, but got {}".format( | |||
len(optimizer.param_groups), len(lr_lambda))) | |||
self.lr_lambdas = list(lr_lambda) | |||
if not isinstance(momentum_lambda, (list, tuple)): | |||
self.momentum_lambdas = [momentum_lambda] * len(optimizer.param_groups) | |||
else: | |||
if len(momentum_lambda) != len(optimizer.param_groups): | |||
raise ValueError("Expected {} momentum_lambdas, but got {}".format( | |||
len(optimizer.param_groups), len(momentum_lambda))) | |||
self.momentum_lambdas = list(momentum_lambda) | |||
self.last_epoch = last_epoch | |||
super().__init__(optimizer, last_epoch) | |||
def state_dict(self): | |||
"""Returns the state of the scheduler as a :class:`dict`. | |||
It contains an entry for every variable in self.__dict__ which | |||
is not the optimizer. | |||
The learning rate and momentum lambda functions will only be saved if they are | |||
callable objects and not if they are functions or lambdas. | |||
""" | |||
state_dict = {key: value for key, value in self.__dict__.items() | |||
if key not in ('optimizer', 'lr_lambdas', 'momentum_lambdas')} | |||
state_dict['lr_lambdas'] = [None] * len(self.lr_lambdas) | |||
state_dict['momentum_lambdas'] = [None] * len(self.momentum_lambdas) | |||
for idx, (lr_fn, mom_fn) in enumerate(zip(self.lr_lambdas, self.momentum_lambdas)): | |||
if not isinstance(lr_fn, types.FunctionType): | |||
state_dict['lr_lambdas'][idx] = lr_fn.__dict__.copy() | |||
if not isinstance(mom_fn, types.FunctionType): | |||
state_dict['momentum_lambdas'][idx] = mom_fn.__dict__.copy() | |||
return state_dict | |||
def load_state_dict(self, state_dict): | |||
"""Loads the schedulers state. | |||
Arguments: | |||
state_dict (dict): scheduler state. Should be an object returned | |||
from a call to :meth:`state_dict`. | |||
""" | |||
lr_lambdas = state_dict.pop('lr_lambdas') | |||
momentum_lambdas = state_dict.pop('momentum_lambdas') | |||
self.__dict__.update(state_dict) | |||
for idx, fn in enumerate(lr_lambdas): | |||
if fn is not None: | |||
self.lr_lambdas[idx].__dict__.update(fn) | |||
for idx, fn in enumerate(momentum_lambdas): | |||
if fn is not None: | |||
self.momentum_lambdas[idx].__dict__.update(fn) | |||
def get_lr(self): | |||
return apply_lambda(self.last_epoch, self.base_lrs, self.lr_lambdas) | |||
def get_momentum(self): | |||
return apply_lambda(self.last_epoch, self.base_momentums, self.momentum_lambdas) | |||
class ParameterUpdate(object): | |||
"""A callable class used to define an arbitrary schedule defined by a list. | |||
This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply | |||
the given schedule. If a base_param is zero, no updates are applied. | |||
Arguments: | |||
params {list or numpy.array} -- List or numpy array defining parameter schedule. | |||
base_param {float} -- Parameter value used to initialize the optimizer. | |||
""" | |||
def __init__(self, params, base_param): | |||
self.params = np.hstack([params, 0]) | |||
self.base_param = base_param | |||
if base_param < 1e-12: | |||
self.base_param = 1 | |||
self.params = self.params * 0.0 + 1.0 | |||
def __call__(self, epoch): | |||
return self.params[epoch] / self.base_param | |||
class ListScheduler(LambdaScheduler): | |||
"""Sets the learning rate and momentum of each parameter group to values defined by lists. | |||
When last_epoch=-1, sets initial lr and momentum to the optimizer values. One of both of lr | |||
and momentum schedules may be specified. | |||
Note that the parameters used to initialize the optimizer are overriden by those defined by | |||
this scheduler. | |||
Args: | |||
optimizer (Optimizer): Wrapped optimizer. | |||
lrs (list or numpy.ndarray): A list of learning rates, or a list of lists, one for each | |||
parameter group. One- or two-dimensional numpy arrays may also be passed. | |||
momentum (list or numpy.ndarray): A list of momentums, or a list of lists, one for each | |||
parameter group. One- or two-dimensional numpy arrays may also be passed. | |||
last_epoch (int): The index of last epoch. Default: -1. | |||
Example: | |||
>>> # Assuming optimizer has two groups. | |||
>>> lrs = [ | |||
... np.linspace(0.01, 0.1, 100), | |||
... np.logspace(-2, 0, 100) | |||
... ] | |||
>>> momentums = [ | |||
... np.linspace(0.85, 0.95, 100), | |||
... np.linspace(0.8, 0.99, 100) | |||
... ] | |||
>>> scheduler = ListScheduler(optimizer, lrs, momentums) | |||
>>> for epoch in range(100): | |||
>>> train(...) | |||
>>> validate(...) | |||
>>> scheduler.step() | |||
""" | |||
def __init__(self, optimizer, lrs=None, momentums=None, last_epoch=-1): | |||
groups = optimizer.param_groups | |||
if lrs is None: | |||
lr_lambda = lambda x: x | |||
else: | |||
lrs = np.array(lrs) if isinstance(lrs, (list, tuple)) else lrs | |||
if len(lrs.shape) == 1: | |||
lr_lambda = [ParameterUpdate(lrs, g['lr']) for g in groups] | |||
else: | |||
lr_lambda = [ParameterUpdate(l, g['lr']) for l, g in zip(lrs, groups)] | |||
if momentums is None: | |||
momentum_lambda = lambda x: x | |||
else: | |||
momentums = np.array(momentums) if isinstance(momentums, (list, tuple)) else momentums | |||
if len(momentums.shape) == 1: | |||
momentum_lambda = [ParameterUpdate(momentums, g['momentum']) for g in groups] | |||
else: | |||
momentum_lambda = [ParameterUpdate(l, g['momentum']) for l, g in zip(momentums, groups)] | |||
super().__init__(optimizer, lr_lambda, momentum_lambda) | |||
class RangeFinder(ListScheduler): | |||
"""Scheduler class that implements the LR range search specified in: | |||
A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch | |||
size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820. | |||
Logarithmically spaced learning rates from 1e-7 to 1 are searched. The number of increments in | |||
that range is determined by 'epochs'. | |||
Note that the parameters used to initialize the optimizer are overriden by those defined by | |||
this scheduler. | |||
Args: | |||
optimizer (Optimizer): Wrapped optimizer. | |||
epochs (int): Number of epochs over which to run test. | |||
Example: | |||
>>> scheduler = RangeFinder(optimizer, 100) | |||
>>> for epoch in range(100): | |||
>>> train(...) | |||
>>> validate(...) | |||
>>> scheduler.step() | |||
""" | |||
def __init__(self, optimizer, epochs): | |||
lrs = np.logspace(-7, 0, epochs) | |||
super().__init__(optimizer, lrs) | |||
class OneCyclePolicy(ListScheduler): | |||
"""Scheduler class that implements the 1cycle policy search specified in: | |||
A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch | |||
size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820. | |||
Args: | |||
optimizer (Optimizer): Wrapped optimizer. | |||
lr (float or list). Maximum learning rate in range. If a list of values is passed, they | |||
should correspond to parameter groups. | |||
epochs (int): The number of epochs to use during search. | |||
momentum_rng (list). Optional upper and lower momentum values (may be both equal). Set to | |||
None to run without momentum. Default: [0.85, 0.95]. If a list of lists is passed, they | |||
should correspond to parameter groups. | |||
phase_ratio (float): Fraction of epochs used for the increasing and decreasing phase of | |||
the schedule. For example, if phase_ratio=0.45 and epochs=100, the learning rate will | |||
increase from lr/10 to lr over 45 epochs, then decrease back to lr/10 over 45 epochs, | |||
then decrease to lr/100 over the remaining 10 epochs. Default: 0.45. | |||
""" | |||
def __init__(self, optimizer, lr, epochs, momentum_rng=[0.85, 0.95], phase_ratio=0.45): | |||
phase_epochs = int(phase_ratio * epochs) | |||
if isinstance(lr, (list, tuple)): | |||
lrs = [ | |||
np.hstack([ | |||
np.linspace(l * 1e-1, l, phase_epochs), | |||
np.linspace(l, l * 1e-1, phase_epochs), | |||
np.linspace(l * 1e-1, l * 1e-2, epochs - 2 * phase_epochs), | |||
]) for l in lr | |||
] | |||
else: | |||
lrs = np.hstack([ | |||
np.linspace(lr * 1e-1, lr, phase_epochs), | |||
np.linspace(lr, lr * 1e-1, phase_epochs), | |||
np.linspace(lr * 1e-1, lr * 1e-2, epochs - 2 * phase_epochs), | |||
]) | |||
if momentum_rng is not None: | |||
momentum_rng = np.array(momentum_rng) | |||
if len(momentum_rng.shape) == 2: | |||
for i, g in enumerate(optimizer.param_groups): | |||
g['momentum'] = momentum_rng[i][1] | |||
momentums = [ | |||
np.hstack([ | |||
np.linspace(m[1], m[0], phase_epochs), | |||
np.linspace(m[0], m[1], phase_epochs), | |||
np.linspace(m[1], m[1], epochs - 2 * phase_epochs), | |||
]) for m in momentum_rng | |||
] | |||
else: | |||
for i, g in enumerate(optimizer.param_groups): | |||
g['momentum'] = momentum_rng[1] | |||
momentums = np.hstack([ | |||
np.linspace(momentum_rng[1], momentum_rng[0], phase_epochs), | |||
np.linspace(momentum_rng[0], momentum_rng[1], phase_epochs), | |||
np.linspace(momentum_rng[1], momentum_rng[1], epochs - 2 * phase_epochs), | |||
]) | |||
else: | |||
momentums = None | |||
super().__init__(optimizer, lrs, momentums) |
@@ -0,0 +1,71 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.07.31 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: This script for logging | |||
""" | |||
import os | |||
import torch | |||
import time | |||
def make_folder(folder_name): | |||
if not os.path.exists(folder_name): | |||
os.makedirs(folder_name) | |||
# or os.makedirs(folder_name, exist_ok=True) | |||
class AverageMeter(object): | |||
"""Computes and stores the average and current value""" | |||
def __init__(self, name, fmt=':f'): | |||
self.name = name | |||
self.fmt = fmt | |||
self.reset() | |||
def reset(self): | |||
self.val = 0 | |||
self.avg = 0 | |||
self.sum = 0 | |||
self.count = 0 | |||
def update(self, val, n=1): | |||
self.val = val | |||
self.sum += val * n | |||
self.count += n | |||
self.avg = self.sum / self.count | |||
def __str__(self): | |||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' | |||
return fmtstr.format(**self.__dict__) | |||
class ProgressMeter(object): | |||
def __init__(self, num_batches, meters, prefix=""): | |||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches) | |||
self.meters = meters | |||
self.prefix = prefix | |||
def display(self, batch): | |||
entries = [self.prefix + self.batch_fmtstr.format(batch)] | |||
entries += [str(meter) for meter in self.meters] | |||
print('\t'.join(entries)) | |||
def get_message(self, batch): | |||
entries = [self.prefix + self.batch_fmtstr.format(batch)] | |||
entries += [str(meter) for meter in self.meters] | |||
return '\t'.join(entries) | |||
def _get_batch_fmtstr(self, num_batches): | |||
num_digits = len(str(num_batches // 1)) | |||
fmt = '{:' + str(num_digits) + 'd}' | |||
return '[' + fmt + '/' + fmt.format(num_batches) + ']' | |||
def time_synchronized(): | |||
torch.cuda.synchronize() if torch.cuda.is_available() else None | |||
return time.time() |
@@ -0,0 +1,45 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.09 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: some utilities of torch (conversion) | |||
----------------------------------------------------------------------------------- | |||
""" | |||
import torch | |||
import torch.distributed as dist | |||
__all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu', 'reduce_tensor', 'to_python_float', '_sigmoid'] | |||
def convert2cpu(gpu_matrix): | |||
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) | |||
def convert2cpu_long(gpu_matrix): | |||
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) | |||
def to_cpu(tensor): | |||
return tensor.detach().cpu() | |||
def reduce_tensor(tensor, world_size): | |||
rt = tensor.clone() | |||
dist.all_reduce(rt, op=dist.reduce_op.SUM) | |||
rt /= world_size | |||
return rt | |||
def to_python_float(t): | |||
if hasattr(t, 'item'): | |||
return t.item() | |||
else: | |||
return t[0] | |||
def _sigmoid(x): | |||
return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) |
@@ -0,0 +1,140 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.09 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: utils functions that use for training process | |||
""" | |||
import copy | |||
import os | |||
import math | |||
import sys | |||
import torch | |||
from torch.optim.lr_scheduler import LambdaLR | |||
import matplotlib.pyplot as plt | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
from utils.lr_scheduler import OneCyclePolicy | |||
def create_optimizer(configs, model): | |||
"""Create optimizer for training process | |||
""" | |||
if hasattr(model, 'module'): | |||
train_params = [param for param in model.module.parameters() if param.requires_grad] | |||
else: | |||
train_params = [param for param in model.parameters() if param.requires_grad] | |||
if configs.optimizer_type == 'sgd': | |||
optimizer = torch.optim.SGD(train_params, lr=configs.lr, momentum=configs.momentum, nesterov=True) | |||
elif configs.optimizer_type == 'adam': | |||
optimizer = torch.optim.Adam(train_params, lr=configs.lr, weight_decay=configs.weight_decay) | |||
else: | |||
assert False, "Unknown optimizer type" | |||
return optimizer | |||
def create_lr_scheduler(optimizer, configs): | |||
"""Create learning rate scheduler for training process""" | |||
if configs.lr_type == 'multi_step': | |||
def multi_step_scheduler(i): | |||
if i < configs.steps[0]: | |||
factor = 1. | |||
elif i < configs.steps[1]: | |||
factor = 0.1 | |||
else: | |||
factor = 0.01 | |||
return factor | |||
lr_scheduler = LambdaLR(optimizer, multi_step_scheduler) | |||
elif configs.lr_type == 'cosin': | |||
# Scheduler https://arxiv.org/pdf/1812.01187.pdf | |||
lf = lambda x: (((1 + math.cos(x * math.pi / configs.num_epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine | |||
lr_scheduler = LambdaLR(optimizer, lr_lambda=lf) | |||
elif configs.lr_type == 'one_cycle': | |||
lr_scheduler = OneCyclePolicy(optimizer, configs.lr, configs.num_epochs, momentum_rng=[0.85, 0.95], | |||
phase_ratio=0.45) | |||
else: | |||
raise ValueError | |||
plot_lr_scheduler(optimizer, lr_scheduler, configs.num_epochs, save_dir=configs.logs_dir, lr_type=configs.lr_type) | |||
return lr_scheduler | |||
def get_saved_state(model, optimizer, lr_scheduler, epoch, configs): | |||
"""Get the information to save with checkpoints""" | |||
if hasattr(model, 'module'): | |||
model_state_dict = model.module.state_dict() | |||
else: | |||
model_state_dict = model.state_dict() | |||
utils_state_dict = { | |||
'epoch': epoch, | |||
'configs': configs, | |||
'optimizer': copy.deepcopy(optimizer.state_dict()), | |||
'lr_scheduler': copy.deepcopy(lr_scheduler.state_dict()) | |||
} | |||
return model_state_dict, utils_state_dict | |||
def save_checkpoint(checkpoints_dir, saved_fn, model_state_dict, utils_state_dict, epoch): | |||
"""Save checkpoint every epoch only is best model or after every checkpoint_freq epoch""" | |||
model_save_path = os.path.join(checkpoints_dir, 'Model_{}_epoch_{}.pth'.format(saved_fn, epoch)) | |||
utils_save_path = os.path.join(checkpoints_dir, 'Utils_{}_epoch_{}.pth'.format(saved_fn, epoch)) | |||
torch.save(model_state_dict, model_save_path) | |||
torch.save(utils_state_dict, utils_save_path) | |||
print('save a checkpoint at {}'.format(model_save_path)) | |||
def plot_lr_scheduler(optimizer, scheduler, num_epochs=300, save_dir='', lr_type=''): | |||
# Plot LR simulating training for full num_epochs | |||
optimizer, scheduler = copy.copy(optimizer), copy.copy(scheduler) # do not modify originals | |||
y = [] | |||
for _ in range(num_epochs): | |||
scheduler.step() | |||
y.append(optimizer.param_groups[0]['lr']) | |||
plt.plot(y, '.-', label='LR') | |||
plt.xlabel('epoch') | |||
plt.ylabel('LR') | |||
plt.grid() | |||
plt.xlim(0, num_epochs) | |||
plt.ylim(0) | |||
plt.tight_layout() | |||
plt.savefig(os.path.join(save_dir, 'LR_{}.png'.format(lr_type)), dpi=200) | |||
if __name__ == '__main__': | |||
from easydict import EasyDict as edict | |||
from torchvision.models import resnet18 | |||
configs = edict() | |||
configs.steps = [150, 180] | |||
configs.lr_type = 'one_cycle' # multi_step, cosin, one_csycle | |||
configs.logs_dir = '../../logs/' | |||
configs.num_epochs = 50 | |||
configs.lr = 2.25e-3 | |||
net = resnet18() | |||
optimizer = torch.optim.Adam(net.parameters(), 0.0002) | |||
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 6, 9], gamma=0.1) | |||
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.1) | |||
scheduler = create_lr_scheduler(optimizer, configs) | |||
for i in range(configs.num_epochs): | |||
print(i, scheduler.get_lr()) | |||
scheduler.step() |
@@ -0,0 +1,154 @@ | |||
""" | |||
# -*- coding: utf-8 -*- | |||
----------------------------------------------------------------------------------- | |||
# Author: Nguyen Mau Dung | |||
# DoC: 2020.08.09 | |||
# email: nguyenmaudung93.kstn@gmail.com | |||
----------------------------------------------------------------------------------- | |||
# Description: The utils of the kitti dataset | |||
""" | |||
from __future__ import print_function | |||
import os | |||
import sys | |||
import numpy as np | |||
import cv2 | |||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
# while not src_dir.endswith("sfa"): | |||
# src_dir = os.path.dirname(src_dir) | |||
if src_dir not in sys.path: | |||
sys.path.append(src_dir) | |||
import config.kitti_config as cnf | |||
def roty(angle): | |||
# Rotation about the y-axis. | |||
c = np.cos(angle) | |||
s = np.sin(angle) | |||
return np.array([[c, 0, s], | |||
[0, 1, 0], | |||
[-s, 0, c]]) | |||
def compute_box_3d(dim, location, ry): | |||
# dim: 3 | |||
# location: 3 | |||
# ry: 1 | |||
# return: 8 x 3 | |||
R = roty(ry) | |||
h, w, l = dim | |||
x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] | |||
y_corners = [0, 0, 0, 0, -h, -h, -h, -h] | |||
z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] | |||
corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) | |||
corners_3d = np.dot(R, corners) | |||
corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1) | |||
return corners_3d.transpose(1, 0) | |||
def project_to_image(pts_3d, P): | |||
# pts_3d: n x 3 | |||
# P: 3 x 4 | |||
# return: n x 2 | |||
pts_3d_homo = np.concatenate([pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) | |||
pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) | |||
pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] | |||
return pts_2d.astype(np.int) | |||
def draw_box_3d_v2(image, qs, color=(255, 0, 255), thickness=2): | |||
''' Draw 3d bounding box in image | |||
qs: (8,3) array of vertices for the 3d box in following order: | |||
1 -------- 0 | |||
/| /| | |||
2 -------- 3 . | |||
| | | | | |||
. 5 -------- 4 | |||
|/ |/ | |||
6 -------- 7 | |||
''' | |||
qs = qs.astype(np.int32) | |||
for k in range(0, 4): | |||
# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html | |||
i, j = k, (k + 1) % 4 | |||
# use LINE_AA for opencv3 | |||
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||
i, j = k + 4, (k + 1) % 4 + 4 | |||
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||
i, j = k, k + 4 | |||
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||
return image | |||
def draw_box_3d(image, corners, color=(0, 0, 255)): | |||
''' Draw 3d bounding box in image | |||
corners: (8,3) array of vertices for the 3d box in following order: | |||
1 -------- 0 | |||
/| /| | |||
2 -------- 3 . | |||
| | | | | |||
. 5 -------- 4 | |||
|/ |/ | |||
6 -------- 7 | |||
''' | |||
face_idx = [[0, 1, 5, 4], | |||
[1, 2, 6, 5], | |||
[2, 3, 7, 6], | |||
[3, 0, 4, 7]] | |||
for ind_f in range(3, -1, -1): | |||
f = face_idx[ind_f] | |||
for j in range(4): | |||
cv2.line(image, (corners[f[j], 0], corners[f[j], 1]), | |||
(corners[f[(j + 1) % 4], 0], corners[f[(j + 1) % 4], 1]), color, 2, lineType=cv2.LINE_AA) | |||
if ind_f == 0: | |||
cv2.line(image, (corners[f[0], 0], corners[f[0], 1]), | |||
(corners[f[2], 0], corners[f[2], 1]), color, 1, lineType=cv2.LINE_AA) | |||
cv2.line(image, (corners[f[1], 0], corners[f[1], 1]), | |||
(corners[f[3], 0], corners[f[3], 1]), color, 1, lineType=cv2.LINE_AA) | |||
return image | |||
def show_rgb_image_with_boxes(img, labels, calib): | |||
for box_idx, label in enumerate(labels): | |||
cls_id, location, dim, ry = label[0], label[1:4], label[4:7], label[7] | |||
if location[2] < 2.0: # The object is too close to the camera, ignore it during visualization | |||
continue | |||
if cls_id < 0: | |||
continue | |||
corners_3d = compute_box_3d(dim, location, ry) | |||
corners_2d = project_to_image(corners_3d, calib.P2) | |||
img = draw_box_3d(img, corners_2d, color=cnf.colors[int(cls_id)]) | |||
return img | |||
def merge_rgb_to_bev(img_rgb, img_bev, output_width): | |||
img_rgb_h, img_rgb_w = img_rgb.shape[:2] | |||
ratio_rgb = output_width / img_rgb_w | |||
output_rgb_h = int(ratio_rgb * img_rgb_h) | |||
ret_img_rgb = cv2.resize(img_rgb, (output_width, output_rgb_h)) | |||
img_bev_h, img_bev_w = img_bev.shape[:2] | |||
ratio_bev = output_width / img_bev_w | |||
output_bev_h = int(ratio_bev * img_bev_h) | |||
ret_img_bev = cv2.resize(img_bev, (output_width, output_bev_h)) | |||
out_img = np.zeros((output_rgb_h + output_bev_h, output_width, 3), dtype=np.uint8) | |||
# Upper: RGB --> BEV | |||
out_img[:output_rgb_h, ...] = ret_img_rgb | |||
out_img[output_rgb_h:, ...] = ret_img_bev | |||
return out_img |