@@ -1 +0,0 @@ | |||||
Subproject commit e4c429e813608acbcf487656abe2eb87dcc4636c |
@@ -0,0 +1,9 @@ | |||||
dataset | |||||
# cache | |||||
__pycache__ | |||||
# results | |||||
results | |||||
# logs | |||||
logs |
@@ -0,0 +1,14 @@ | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<project version="4"> | |||||
<component name="PublishConfigData"> | |||||
<serverData> | |||||
<paths name="root@10.5.24.134:10000"> | |||||
<serverdata> | |||||
<mappings> | |||||
<mapping local="$PROJECT_DIR$" web="/" /> | |||||
</mappings> | |||||
</serverdata> | |||||
</paths> | |||||
</serverData> | |||||
</component> | |||||
</project> |
@@ -0,0 +1,6 @@ | |||||
<component name="InspectionProjectProfileManager"> | |||||
<settings> | |||||
<option name="USE_PROJECT_PROFILE" value="false" /> | |||||
<version value="1.0" /> | |||||
</settings> | |||||
</component> |
@@ -0,0 +1,7 @@ | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<project version="4"> | |||||
<component name="JavaScriptSettings"> | |||||
<option name="languageLevel" value="ES6" /> | |||||
</component> | |||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" /> | |||||
</project> |
@@ -0,0 +1,8 @@ | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<project version="4"> | |||||
<component name="ProjectModuleManager"> | |||||
<modules> | |||||
<module fileurl="file://$PROJECT_DIR$/.idea/sfa3d.iml" filepath="$PROJECT_DIR$/.idea/sfa3d.iml" /> | |||||
</modules> | |||||
</component> | |||||
</project> |
@@ -0,0 +1,12 @@ | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<module type="PYTHON_MODULE" version="4"> | |||||
<component name="NewModuleRootManager"> | |||||
<content url="file://$MODULE_DIR$" /> | |||||
<orderEntry type="inheritedJdk" /> | |||||
<orderEntry type="sourceFolder" forTests="false" /> | |||||
</component> | |||||
<component name="PyDocumentationSettings"> | |||||
<option name="format" value="PLAIN" /> | |||||
<option name="myDocStringFormat" value="Plain" /> | |||||
</component> | |||||
</module> |
@@ -0,0 +1,6 @@ | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<project version="4"> | |||||
<component name="VcsDirectoryMappings"> | |||||
<mapping directory="$PROJECT_DIR$" vcs="Git" /> | |||||
</component> | |||||
</project> |
@@ -0,0 +1,49 @@ | |||||
<?xml version="1.0" encoding="UTF-8"?> | |||||
<project version="4"> | |||||
<component name="ChangeListManager"> | |||||
<list default="true" id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" /> | |||||
<option name="SHOW_DIALOG" value="false" /> | |||||
<option name="HIGHLIGHT_CONFLICTS" value="true" /> | |||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> | |||||
<option name="LAST_RESOLUTION" value="IGNORE" /> | |||||
</component> | |||||
<component name="Git.Settings"> | |||||
<option name="RECENT_BRANCH_BY_REPOSITORY"> | |||||
<map> | |||||
<entry key="$PROJECT_DIR$" value="master" /> | |||||
</map> | |||||
</option> | |||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> | |||||
</component> | |||||
<component name="ProjectId" id="2E4AHz6idZOBGHdHApv98dU5PkK" /> | |||||
<component name="ProjectViewState"> | |||||
<option name="hideEmptyMiddlePackages" value="true" /> | |||||
<option name="showLibraryContents" value="true" /> | |||||
</component> | |||||
<component name="PropertiesComponent"> | |||||
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" /> | |||||
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" /> | |||||
</component> | |||||
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" /> | |||||
<component name="TaskManager"> | |||||
<task active="true" id="Default" summary="Default task"> | |||||
<changelist id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" /> | |||||
<created>1661844398596</created> | |||||
<option name="number" value="Default" /> | |||||
<option name="presentableId" value="Default" /> | |||||
<updated>1661844398596</updated> | |||||
</task> | |||||
<servers /> | |||||
</component> | |||||
<component name="Vcs.Log.Tabs.Properties"> | |||||
<option name="TAB_STATES"> | |||||
<map> | |||||
<entry key="MAIN"> | |||||
<value> | |||||
<State /> | |||||
</value> | |||||
</entry> | |||||
</map> | |||||
</option> | |||||
</component> | |||||
</project> |
@@ -0,0 +1,21 @@ | |||||
MIT License | |||||
Copyright (c) 2020 Nguyen Mau Dung | |||||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||||
of this software and associated documentation files (the "Software"), to deal | |||||
in the Software without restriction, including without limitation the rights | |||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||||
copies of the Software, and to permit persons to whom the Software is | |||||
furnished to do so, subject to the following conditions: | |||||
The above copyright notice and this permission notice shall be included in all | |||||
copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||||
SOFTWARE. |
@@ -0,0 +1,116 @@ | |||||
# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds | |||||
[![python-image]][python-url] | |||||
[![pytorch-image]][pytorch-url] | |||||
--- | |||||
## 1. Getting Started | |||||
### 1.1 Requirement | |||||
The instructions for setting up a virtual environment is [here](https://github.com/maudzung/virtual_environment_python3). | |||||
```shell script | |||||
cd SFA3D/ | |||||
pip install -r requirements.txt | |||||
``` | |||||
### 1.2 Data Preparation | |||||
Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). | |||||
The downloaded data includes: | |||||
- Velodyne point clouds _**(29 GB)**_ | |||||
- Training labels of object data set _**(5 MB)**_ | |||||
Please make sure that you construct the source code & dataset directories structure as below. | |||||
## 2. How to run | |||||
### 2.1 Inference | |||||
The pre-trained model was pushed to this repo. | |||||
- **CPU** | |||||
``` | |||||
python inference.py --no_cuda=True | |||||
``` | |||||
- **GPU** | |||||
``` | |||||
python inference.py | |||||
``` | |||||
Label of inference | |||||
- Pedestrian | |||||
- Car | |||||
- Cyclist | |||||
### 2.2 Training | |||||
#### 2.2.1 CPU | |||||
``` | |||||
python train.py --no_cuda=True | |||||
``` | |||||
#### 2.2.2 Single machine, single gpu | |||||
```shell script | |||||
python train.py --gpu_idx 0 | |||||
``` | |||||
#### 2.2.3 Distributed Data Parallel Training | |||||
- **Single machine (node), multiple GPUs** | |||||
``` | |||||
python train.py --multiprocessing-distributed --world-size 1 --rank 0 --batch_size 64 --num_workers 8 | |||||
``` | |||||
- **Two machines (two nodes), multiple GPUs** | |||||
- _**First machine**_ | |||||
``` | |||||
python train.py --dist-url 'tcp://IP_OF_NODE1:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 0 --batch_size 64 --num_workers 8 | |||||
``` | |||||
- _**Second machine**_ | |||||
``` | |||||
python train.py --dist-url 'tcp://IP_OF_NODE2:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 1 --batch_size 64 --num_workers 8 | |||||
``` | |||||
## References | |||||
[1] SFA3D: [PyTorch Implementation](https://github.com/maudzung/SFA3D) | |||||
## Folder structure | |||||
### Dataset | |||||
``` | |||||
└── kitti/ | |||||
├── image_2/ (left color camera,非必须) | |||||
├── calib/ (非必须) | |||||
├── label_2/ (标注结果/标签,非必须) | |||||
└── velodyne/ (点云文件,必须) | |||||
``` | |||||
### Checkpoints & Algorithm | |||||
``` | |||||
${ROOT} | |||||
└── checkpoints/ | |||||
├── fpn_resnet_18/ | |||||
├── fpn_resnet_18_epoch_300.pth (点云目标检测标注模型) | |||||
└── sfa/ (点云标注算法) | |||||
├── config/ | |||||
├── data_process/ | |||||
├── models/ | |||||
├── utils/ | |||||
├── inference.py | |||||
└── train.py | |||||
├── README.md | |||||
├── LICENSE | |||||
└── requirements.txt | |||||
``` | |||||
[python-image]: https://img.shields.io/badge/Python-3.6-ff69b4.svg | |||||
[python-url]: https://www.python.org/ | |||||
[pytorch-image]: https://img.shields.io/badge/PyTorch-1.5-2BAF2B.svg | |||||
[pytorch-url]: https://pytorch.org/ |
@@ -0,0 +1,55 @@ | |||||
# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds | |||||
--- | |||||
Technical details of the implementation | |||||
## 1. Network architecture | |||||
- The **ResNet-based Keypoint Feature Pyramid Network** (KFPN) that was proposed in [RTM3D paper](https://arxiv.org/pdf/2001.03343.pdf). | |||||
The unofficial implementation of the RTM3D paper by using PyTorch is [here](https://github.com/maudzung/RTM3D) | |||||
- **Input**: | |||||
- The model takes a birds-eye-view (BEV) map as input. | |||||
- The BEV map is encoded by height, intensity, and density of 3D LiDAR point clouds. Assume that the size of the BEV input is `(H, W, 3)`. | |||||
- **Outputs**: | |||||
- Heatmap for main center with a size of `(H/S, W/S, C)` where `S=4` _(the down-sample ratio)_, and `C=3` _(the number of classes)_ | |||||
- Center offset: `(H/S, W/S, 2)` | |||||
- The heading angle _(yaw)_: `(H/S, W/S, 2)`. The model estimates the **im**aginary and the **re**al fraction (`sin(yaw)` and `cos(yaw)` values). | |||||
- Dimension _(h, w, l)_: `(H/S, W/S, 3)` | |||||
- `z` coordinate: `(H/S, W/S, 1)` | |||||
- **Targets**: **7 degrees of freedom** _(7-DOF)_ of objects: `(cx, cy, cz, l, w, h, θ)` | |||||
- `cx, cy, cz`: The center coordinates. | |||||
- `l, w, h`: length, width, height of the bounding box. | |||||
- `θ`: The heading angle in radians of the bounding box. | |||||
- **Objects**: Cars, Pedestrians, Cyclists. | |||||
## 2. Losses function | |||||
- For main center heatmap: Used `focal loss` | |||||
- For heading angle _(yaw)_: The `im` and `re` fractions are directly regressed by using `l1_loss` | |||||
- For `z coordinate` and `3 dimensions` (height, width, length), I used `balanced l1 loss` that was proposed by the paper | |||||
[Libra R-CNN: Towards Balanced Learning for Object Detection](https://arxiv.org/pdf/1904.02701.pdf) | |||||
## 3. Training in details | |||||
- Set uniform weights to the above components of losses. (`=1.0` for all) | |||||
- Number of epochs: 300. | |||||
- Learning rate scheduler: [`cosine`](https://arxiv.org/pdf/1812.01187.pdf), initial learning rate: 0.001. | |||||
- Batch size: `16` (on a single GTX 1080Ti). | |||||
## 4. Inference | |||||
- A `3 × 3` max-pooling operation was applied on the center heat map, then only `50` predictions whose | |||||
center confidences are larger than 0.2 were kept. | |||||
- The heading angle _(yaw)_ = `arctan`(_imaginary fraction_ / _real fraction_) | |||||
## 5. How to expand the work | |||||
- The model could be trained with more classes and with a larger detected area by modifying configurations in | |||||
the [config/kitti_dataset.py](https://github.com/maudzung/Super-Fast-Accurate-3D-Object-Detection/blob/master/src/config/kitti_config.py) file. |
@@ -0,0 +1,41 @@ | |||||
absl-py==1.1.0 | |||||
cachetools==4.2.4 | |||||
certifi==2022.6.15 | |||||
charset-normalizer==2.0.12 | |||||
cycler==0.11.0 | |||||
easydict==1.9 | |||||
future==0.18.2 | |||||
google-auth==1.35.0 | |||||
google-auth-oauthlib==0.4.6 | |||||
grpcio==1.46.3 | |||||
idna==3.3 | |||||
importlib-metadata==4.11.4 | |||||
joblib==1.1.0 | |||||
kiwisolver==1.4.3 | |||||
Markdown==3.3.7 | |||||
matplotlib==3.3.3 | |||||
numpy==1.18.3 | |||||
oauthlib==3.2.0 | |||||
opencv-python==4.2.0.34 | |||||
Pillow==8.4.0 | |||||
protobuf==3.19.1 | |||||
pyasn1==0.4.8 | |||||
pyasn1-modules==0.2.8 | |||||
pyparsing==3.0.9 | |||||
python-dateutil==2.8.2 | |||||
requests==2.28.0 | |||||
requests-oauthlib==1.3.1 | |||||
rsa==4.8 | |||||
scikit-learn==0.22.2 | |||||
scipy==1.8.1 | |||||
six==1.16.0 | |||||
tensorboard==2.2.1 | |||||
tensorboard-plugin-wit==1.8.1 | |||||
torch==1.5.0 | |||||
torchsummary==1.5.1 | |||||
torchvision==0.6.0 | |||||
tqdm==4.54.0 | |||||
urllib3==1.26.9 | |||||
Werkzeug==2.1.2 | |||||
wget==3.2 | |||||
zipp==3.8.0 |
@@ -0,0 +1,99 @@ | |||||
import math | |||||
import numpy as np | |||||
# Car and Van ==> Car class | |||||
# Pedestrian and Person_Sitting ==> Pedestrian Class | |||||
# for train | |||||
CLASS_NAME_TO_ID = { | |||||
'Pedestrian': 0, | |||||
'Car': 1, | |||||
'Cyclist': 2, | |||||
'Van': 1, | |||||
'Truck': -3, | |||||
'Person_sitting': 0, | |||||
'Tram': -99, | |||||
'Misc': -99, | |||||
'TraffiCone': -1, | |||||
'DontCare': -1 | |||||
} | |||||
# for test | |||||
CLASS_ID_TO_NAME = { | |||||
0: 'Pedestrian', # Person_sitting in the same class | |||||
1: 'Car', # Van in the same class | |||||
2: 'Cyclist' | |||||
} | |||||
colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0], [255, 120, 0], | |||||
[255, 120, 120], [0, 120, 0], [120, 255, 255], [120, 0, 255]] | |||||
##################################################################################### | |||||
boundary = { | |||||
"minX": -50, | |||||
"maxX": 50, | |||||
"minY": -25, | |||||
"maxY": 25, | |||||
"minZ": -2.73, | |||||
"maxZ": 1.27 | |||||
} | |||||
bound_size_x = boundary['maxX'] - boundary['minX'] | |||||
bound_size_y = boundary['maxY'] - boundary['minY'] | |||||
bound_size_z = boundary['maxZ'] - boundary['minZ'] | |||||
boundary_back = { | |||||
"minX": -50, | |||||
"maxX": 0, | |||||
"minY": -25, | |||||
"maxY": 25, | |||||
"minZ": -2.73, | |||||
"maxZ": 1.27 | |||||
} | |||||
BEV_WIDTH = 608 # across y axis -25m ~ 25m | |||||
BEV_HEIGHT = 1216 # across x axis 0m ~ 50m | |||||
DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT | |||||
DISCRETIZATION_Y = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT | |||||
DISCRETIZATION_X = (boundary["maxY"] - boundary["minY"]) / BEV_WIDTH | |||||
# maximum number of points per voxel | |||||
T = 35 | |||||
# voxel size | |||||
vd = 0.1 # z | |||||
vh = 0.05 # y | |||||
vw = 0.05 # x | |||||
# voxel grid | |||||
W = math.ceil(bound_size_x / vw) | |||||
H = math.ceil(bound_size_y / vh) | |||||
D = math.ceil(bound_size_z / vd) | |||||
# Following parameters are calculated as an average from KITTI dataset for simplicity | |||||
##################################################################################### | |||||
Tr_velo_to_cam = np.array([ | |||||
[7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03], | |||||
[1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02], | |||||
[9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01], | |||||
[0, 0, 0, 1] | |||||
]) | |||||
# cal mean from train set | |||||
R0 = np.array([ | |||||
[0.99992475, 0.00975976, -0.00734152, 0], | |||||
[-0.0097913, 0.99994262, -0.00430371, 0], | |||||
[0.00729911, 0.0043753, 0.99996319, 0], | |||||
[0, 0, 0, 1] | |||||
]) | |||||
P2 = np.array([[719.787081, 0., 608.463003, 44.9538775], | |||||
[0., 719.787081, 174.545111, 0.1066855], | |||||
[0., 0., 1., 3.0106472e-03], | |||||
[0., 0., 0., 0] | |||||
]) | |||||
R0_inv = np.linalg.inv(R0) | |||||
Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam) | |||||
P2_inv = np.linalg.pinv(P2) | |||||
##################################################################################### |
@@ -0,0 +1,172 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: The configurations of the project will be defined here | |||||
""" | |||||
import os | |||||
import argparse | |||||
import torch | |||||
from easydict import EasyDict as edict | |||||
def parse_train_configs(): | |||||
parser = argparse.ArgumentParser(description='The Implementation using PyTorch') | |||||
parser.add_argument('--seed', type=int, default=2020, | |||||
help='re-produce the results with seed random') | |||||
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||||
help='The name using for saving logs, models,...') | |||||
parser.add_argument('--root_dir', type=str, default='../', metavar='PATH', | |||||
help='The ROOT working directory') | |||||
#################################################################### | |||||
############## Model configs ######################## | |||||
#################################################################### | |||||
parser.add_argument('--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||||
help='The name of the model architecture') | |||||
parser.add_argument('--model_load_dir', type=str, default=None, metavar='PATH', | |||||
help='the path of the pretrained checkpoint') | |||||
#################################################################### | |||||
############## Dataloader and Running configs ####### | |||||
#################################################################### | |||||
parser.add_argument('--data_url', type=str, default='../dataset/apollo/training', metavar='PATH', | |||||
help='the path of the dataset') | |||||
parser.add_argument('--val_data_url', type=str, default='../dataset/apollo/val', metavar='PATH', | |||||
help='the path of the dataset') | |||||
parser.add_argument('--train_model_out', type=str, default='../checkpoints', metavar='PATH', | |||||
help='the path of the model output') | |||||
parser.add_argument('--train_out', type=str, default='../logs', metavar='PATH', | |||||
help='the path of the logs output') | |||||
parser.add_argument('--hflip_prob', type=float, default=0.5, | |||||
help='The probability of horizontal flip') | |||||
parser.add_argument('--no-val', action='store_true', | |||||
help='If true, dont evaluate the model on the val set') | |||||
parser.add_argument('--num_samples', type=int, default=None, | |||||
help='Take a subset of the dataset to run and debug') | |||||
parser.add_argument('--num_workers', type=int, default=4, | |||||
help='Number of threads for loading data') | |||||
parser.add_argument('--batch_size', type=int, default=8, | |||||
help='mini-batch size (default: 16), this is the total' | |||||
'batch size of all GPUs on the current node when using' | |||||
'Data Parallel or Distributed Data Parallel') | |||||
parser.add_argument('--print_freq', type=int, default=50, metavar='N', | |||||
help='print frequency (default: 50)') | |||||
parser.add_argument('--tensorboard_freq', type=int, default=50, metavar='N', | |||||
help='frequency of saving tensorboard (default: 50)') | |||||
parser.add_argument('--checkpoint_freq', type=int, default=2, metavar='N', | |||||
help='frequency of saving checkpoints (default: 5)') | |||||
parser.add_argument('--gpu_num_per_node', type=int, default=1, | |||||
help='Number of GPU') | |||||
#################################################################### | |||||
############## Training strategy #################### | |||||
#################################################################### | |||||
parser.add_argument('--start_epoch', type=int, default=1, metavar='N', | |||||
help='the starting epoch') | |||||
parser.add_argument('--num_epochs', type=int, default=300, metavar='N', | |||||
help='number of total epochs to run') | |||||
parser.add_argument('--lr_type', type=str, default='cosin', | |||||
help='the type of learning rate scheduler (cosin or multi_step or one_cycle)') | |||||
parser.add_argument('--lr', type=float, default=0.001, metavar='LR', | |||||
help='initial learning rate') | |||||
parser.add_argument('--minimum_lr', type=float, default=1e-7, metavar='MIN_LR', | |||||
help='minimum learning rate during training') | |||||
parser.add_argument('--momentum', type=float, default=0.949, metavar='M', | |||||
help='momentum') | |||||
parser.add_argument('-wd', '--weight_decay', type=float, default=0., metavar='WD', | |||||
help='weight decay (default: 0.)') | |||||
parser.add_argument('--optimizer_type', type=str, default='adam', metavar='OPTIMIZER', | |||||
help='the type of optimizer, it can be sgd or adam') | |||||
parser.add_argument('--steps', nargs='*', default=[150, 180], | |||||
help='number of burn in step') | |||||
#################################################################### | |||||
############## Loss weight ########################## | |||||
#################################################################### | |||||
#################################################################### | |||||
############## Distributed Data Parallel ############ | |||||
#################################################################### | |||||
parser.add_argument('--world-size', default=-1, type=int, metavar='N', | |||||
help='number of nodes for distributed training') | |||||
parser.add_argument('--rank', default=-1, type=int, metavar='N', | |||||
help='node rank for distributed training') | |||||
parser.add_argument('--dist-url', default='tcp://127.0.0.1:29500', type=str, | |||||
help='url used to set up distributed training') | |||||
parser.add_argument('--dist-backend', default='nccl', type=str, | |||||
help='distributed backend') | |||||
parser.add_argument('--gpu_idx', default=0, type=int, | |||||
help='GPU index to use.') | |||||
parser.add_argument('--no_cuda', default= False, | |||||
help='If true, cuda is not used.') | |||||
parser.add_argument('--multiprocessing-distributed', action='store_true', | |||||
help='Use multi-processing distributed training to launch ' | |||||
'N processes per node, which has N GPUs. This is the ' | |||||
'fastest way to use PyTorch for either single node or ' | |||||
'multi node data parallel training') | |||||
#################################################################### | |||||
############## Evaluation configurations ################### | |||||
#################################################################### | |||||
parser.add_argument('--evaluate', action='store_true', | |||||
help='only evaluate the model, not training') | |||||
parser.add_argument('--resume_path', type=str, default=None, metavar='PATH', | |||||
help='the path of the resumed checkpoint') | |||||
parser.add_argument('--K', type=int, default=50, | |||||
help='the number of top K') | |||||
configs = edict(vars(parser.parse_args())) | |||||
#################################################################### | |||||
############## Hardware configurations ############################# | |||||
#################################################################### | |||||
# configs.device = torch.device('cpu' if configs.no_cuda else 'cuda') | |||||
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) | |||||
configs.ngpus_per_node = torch.cuda.device_count() | |||||
configs.pin_memory = True | |||||
configs.input_size = (1216, 608) | |||||
configs.hm_size = (304, 152) | |||||
configs.down_ratio = 4 | |||||
configs.max_objects = 50 | |||||
configs.imagenet_pretrained = True | |||||
configs.head_conv = 64 | |||||
configs.num_classes = 3 | |||||
configs.num_center_offset = 2 | |||||
configs.num_z = 1 | |||||
configs.num_dim = 3 | |||||
configs.num_direction = 2 # sin, cos | |||||
configs.heads = { | |||||
'hm_cen': configs.num_classes, | |||||
'cen_offset': configs.num_center_offset, | |||||
'direction': configs.num_direction, | |||||
'z_coor': configs.num_z, | |||||
'dim': configs.num_dim | |||||
} | |||||
configs.num_input_features = 4 | |||||
#################################################################### | |||||
############## Dataset, logs, Checkpoints dir ###################### | |||||
#################################################################### | |||||
configs.dataset = 'apollo' # or kitti | |||||
configs.dataset_dir = configs.data_url | |||||
# configs.checkpoints_dir = os.path.join(configs.train_model_out, configs.saved_fn) | |||||
configs.checkpoints_dir = configs.train_model_out | |||||
# configs.logs_dir = os.path.join(configs.train_out, configs.saved_fn) | |||||
configs.logs_dir = configs.train_out | |||||
configs.pretrained_path = configs.model_load_dir | |||||
if not os.path.isdir(configs.checkpoints_dir): | |||||
os.makedirs(configs.checkpoints_dir) | |||||
if not os.path.isdir(configs.logs_dir): | |||||
os.makedirs(configs.logs_dir) | |||||
return configs |
@@ -0,0 +1,99 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: This script for the KITTI dataset | |||||
""" | |||||
import sys | |||||
import os | |||||
from builtins import int | |||||
from glob import glob | |||||
import numpy as np | |||||
from torch.utils.data import Dataset | |||||
import cv2 | |||||
import torch | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from data_process.kitti_data_utils import get_filtered_lidar | |||||
from data_process.kitti_bev_utils import makeBEVMap | |||||
import config.kitti_config as cnf | |||||
class Demo_KittiDataset(Dataset): | |||||
def __init__(self, configs): | |||||
self.dataset_dir = os.path.join(configs.dataset_dir, configs.foldername, configs.foldername[:10], | |||||
configs.foldername) | |||||
self.input_size = configs.input_size | |||||
self.hm_size = configs.hm_size | |||||
self.num_classes = configs.num_classes | |||||
self.max_objects = configs.max_objects | |||||
self.image_dir = os.path.join(self.dataset_dir, "image_02", "data") | |||||
self.lidar_dir = os.path.join(self.dataset_dir, "velodyne_points", "data") | |||||
self.label_dir = os.path.join(self.dataset_dir, "label_2", "data") | |||||
self.sample_id_list = sorted(glob(os.path.join(self.lidar_dir, '*.bin'))) | |||||
self.sample_id_list = [float(os.path.basename(fn)[:-4]) for fn in self.sample_id_list] | |||||
self.num_samples = len(self.sample_id_list) | |||||
def __len__(self): | |||||
return len(self.sample_id_list) | |||||
def __getitem__(self, index): | |||||
pass | |||||
def load_bevmap_front(self, index): | |||||
"""Load only image for the testing phase""" | |||||
sample_id = int(self.sample_id_list[index]) | |||||
img_path, img_rgb = self.get_image(sample_id) | |||||
lidarData = self.get_lidar(sample_id) | |||||
front_lidar = get_filtered_lidar(lidarData, cnf.boundary) | |||||
front_bevmap = makeBEVMap(front_lidar, cnf.boundary) | |||||
front_bevmap = torch.from_numpy(front_bevmap) | |||||
metadatas = { | |||||
'img_path': img_path, | |||||
} | |||||
return metadatas, front_bevmap, img_rgb | |||||
def load_bevmap_front_vs_back(self, index): | |||||
"""Load only image for the testing phase""" | |||||
sample_id = int(self.sample_id_list[index]) | |||||
img_path, img_rgb = self.get_image(sample_id) | |||||
lidarData = self.get_lidar(sample_id) | |||||
front_lidar = get_filtered_lidar(lidarData, cnf.boundary) | |||||
front_bevmap = makeBEVMap(front_lidar, cnf.boundary) | |||||
front_bevmap = torch.from_numpy(front_bevmap) | |||||
back_lidar = get_filtered_lidar(lidarData, cnf.boundary_back) | |||||
back_bevmap = makeBEVMap(back_lidar, cnf.boundary_back) | |||||
back_bevmap = torch.from_numpy(back_bevmap) | |||||
metadatas = { | |||||
'img_path': img_path, | |||||
} | |||||
return metadatas, front_bevmap, back_bevmap, img_rgb | |||||
def get_image(self, idx): | |||||
img_path = os.path.join(self.image_dir, '{:010d}.png'.format(idx)) | |||||
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) | |||||
return img_path, img | |||||
def get_lidar(self, idx): | |||||
lidar_file = os.path.join(self.lidar_dir, '{:010d}.bin'.format(idx)) | |||||
# assert os.path.isfile(lidar_file) | |||||
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) |
@@ -0,0 +1,98 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
""" | |||||
import math | |||||
import os | |||||
import sys | |||||
import cv2 | |||||
import numpy as np | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
import config.kitti_config as cnf | |||||
def makeBEVMap(PointCloud_, boundary): | |||||
Height = cnf.BEV_HEIGHT + 1 | |||||
Width = cnf.BEV_WIDTH + 1 | |||||
# Discretize Feature Map | |||||
PointCloud = np.copy(PointCloud_) | |||||
# PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION)) | |||||
# PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION) + Width / 2) | |||||
# 针对Apollo数据集,检测360° | |||||
PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION_Y) + Height / 2) | |||||
PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION_X) + Width / 2) | |||||
# sort-3times | |||||
indices = np.lexsort((-PointCloud[:, 2], PointCloud[:, 1], PointCloud[:, 0])) | |||||
PointCloud = PointCloud[indices] | |||||
# Height Map | |||||
heightMap = np.zeros((Height, Width)) | |||||
_, indices = np.unique(PointCloud[:, 0:2], axis=0, return_index=True) | |||||
PointCloud_frac = PointCloud[indices] | |||||
# some important problem is image coordinate is (y,x), not (x,y) | |||||
max_height = float(np.abs(boundary['maxZ'] - boundary['minZ'])) | |||||
heightMap[np.int_(PointCloud_frac[:, 0]), np.int_(PointCloud_frac[:, 1])] = PointCloud_frac[:, 2] / max_height #(1217,609) | |||||
# Intensity Map & DensityMap | |||||
intensityMap = np.zeros((Height, Width)) | |||||
densityMap = np.zeros((Height, Width)) | |||||
_, indices, counts = np.unique(PointCloud[:, 0:2], axis=0, return_index=True, return_counts=True) | |||||
PointCloud_top = PointCloud[indices] | |||||
normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64)) | |||||
intensityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = PointCloud_top[:, 3] / 255.0 # hesai40p的反射强度0~255 | |||||
densityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = normalizedCounts | |||||
RGB_Map = np.zeros((3, Height - 1, Width - 1)) | |||||
RGB_Map[2, :, :] = densityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # r_map | |||||
RGB_Map[1, :, :] = heightMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # g_map | |||||
RGB_Map[0, :, :] = intensityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # b_map | |||||
return RGB_Map | |||||
# bev image coordinates format | |||||
def get_corners(x, y, w, l, yaw): | |||||
bev_corners = np.zeros((4, 2), dtype=np.float32) | |||||
cos_yaw = np.cos(yaw) | |||||
sin_yaw = np.sin(yaw) | |||||
# front left | |||||
bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw | |||||
bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw | |||||
# rear left | |||||
bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw | |||||
bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw | |||||
# rear right | |||||
bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw | |||||
bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw | |||||
# front right | |||||
bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw | |||||
bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw | |||||
return bev_corners | |||||
def drawRotatedBox(img, x, y, w, l, yaw, color): | |||||
img_cp = img.copy() | |||||
bev_corners = get_corners(x, y, w, l, yaw) | |||||
corners_int = bev_corners.reshape(-1, 1, 2).astype(int) | |||||
cv2.polylines(img, [corners_int], True, color, 2) | |||||
corners_int = bev_corners.reshape(-1, 2) | |||||
cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2) | |||||
# return img_cp |
@@ -0,0 +1,324 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: The utils of the kitti dataset | |||||
""" | |||||
from __future__ import print_function | |||||
import os | |||||
import sys | |||||
import numpy as np | |||||
import cv2 | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
import config.kitti_config as cnf | |||||
class Object3d(object): | |||||
''' 3d object label ''' | |||||
def __init__(self, label_file_line): | |||||
data = label_file_line.split(' ') | |||||
data[1:] = [float(x) for x in data[1:]] | |||||
# extract label, truncation, occlusion | |||||
self.type = data[0] # 'Car', 'Pedestrian', ... | |||||
self.cls_id = self.cls_type_to_id(self.type) | |||||
self.truncation = data[1] # truncated pixel ratio [0..1] | |||||
self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown | |||||
self.alpha = data[3] # object observation angle [-pi..pi] | |||||
# extract 2d bounding box in 0-based coordinates | |||||
self.xmin = data[4] # left | |||||
self.ymin = data[5] # top | |||||
self.xmax = data[6] # right | |||||
self.ymax = data[7] # bottom | |||||
self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) | |||||
# extract 3d bounding box information | |||||
self.h = data[8] # box height | |||||
self.w = data[9] # box width | |||||
self.l = data[10] # box length (in meters) | |||||
self.t = (data[11], data[12], data[13]) # location (x,y,z) in camera coord. | |||||
self.dis_to_cam = np.linalg.norm(self.t) | |||||
self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi] | |||||
self.score = data[15] if data.__len__() == 16 else -1.0 | |||||
self.level_str = None | |||||
self.level = self.get_obj_level() | |||||
def cls_type_to_id(self, cls_type): | |||||
if cls_type not in cnf.CLASS_NAME_TO_ID.keys(): | |||||
return -1 | |||||
return cnf.CLASS_NAME_TO_ID[cls_type] | |||||
def get_obj_level(self): | |||||
height = float(self.box2d[3]) - float(self.box2d[1]) + 1 | |||||
if height >= 40 and self.truncation <= 0.15 and self.occlusion <= 0: | |||||
self.level_str = 'Easy' | |||||
return 1 # Easy | |||||
elif height >= 25 and self.truncation <= 0.3 and self.occlusion <= 1: | |||||
self.level_str = 'Moderate' | |||||
return 2 # Moderate | |||||
elif height >= 25 and self.truncation <= 0.5 and self.occlusion <= 2: | |||||
self.level_str = 'Hard' | |||||
return 3 # Hard | |||||
else: | |||||
self.level_str = 'UnKnown' | |||||
return 4 | |||||
def print_object(self): | |||||
print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \ | |||||
(self.type, self.truncation, self.occlusion, self.alpha)) | |||||
print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \ | |||||
(self.xmin, self.ymin, self.xmax, self.ymax)) | |||||
print('3d bbox h,w,l: %f, %f, %f' % \ | |||||
(self.h, self.w, self.l)) | |||||
print('3d bbox location, ry: (%f, %f, %f), %f' % \ | |||||
(self.t[0], self.t[1], self.t[2], self.ry)) | |||||
def to_kitti_format(self): | |||||
kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \ | |||||
% (self.type, self.truncation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1], | |||||
self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.t[0], self.t[1], self.t[2], | |||||
self.ry, self.score) | |||||
return kitti_str | |||||
def read_label(label_filename): | |||||
lines = [line.rstrip() for line in open(label_filename)] | |||||
objects = [Object3d(line) for line in lines] | |||||
return objects | |||||
class Calibration(object): | |||||
''' Calibration matrices and utils | |||||
3d XYZ in <label>.txt are in rect camera coord. | |||||
2d box xy are in image2 coord | |||||
Points in <lidar>.bin are in Velodyne coord. | |||||
y_image2 = P^2_rect * x_rect | |||||
y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo | |||||
x_ref = Tr_velo_to_cam * x_velo | |||||
x_rect = R0_rect * x_ref | |||||
P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x; | |||||
0, f^2_v, c^2_v, -f^2_v b^2_y; | |||||
0, 0, 1, 0] | |||||
= K * [1|t] | |||||
image2 coord: | |||||
----> x-axis (u) | |||||
| | |||||
| | |||||
v y-axis (v) | |||||
velodyne coord: | |||||
front x, left y, up z | |||||
rect/ref camera coord: | |||||
right x, down y, front z | |||||
Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf | |||||
TODO(rqi): do matrix multiplication only once for each projection. | |||||
''' | |||||
def __init__(self, calib_filepath): | |||||
calibs = self.read_calib_file(calib_filepath) | |||||
# Projection matrix from rect camera coord to image2 coord | |||||
self.P2 = calibs['P2'] | |||||
self.P2 = np.reshape(self.P2, [3, 4]) | |||||
self.P3 = calibs['P3'] | |||||
self.P3 = np.reshape(self.P3, [3, 4]) | |||||
# Rigid transform from Velodyne coord to reference camera coord | |||||
self.V2C = calibs['Tr_velo2cam'] | |||||
self.V2C = np.reshape(self.V2C, [3, 4]) | |||||
# Rotation from reference camera coord to rect camera coord | |||||
self.R0 = calibs['R_rect'] | |||||
self.R0 = np.reshape(self.R0, [3, 3]) | |||||
# Camera intrinsics and extrinsics | |||||
self.c_u = self.P2[0, 2] | |||||
self.c_v = self.P2[1, 2] | |||||
self.f_u = self.P2[0, 0] | |||||
self.f_v = self.P2[1, 1] | |||||
self.b_x = self.P2[0, 3] / (-self.f_u) # relative | |||||
self.b_y = self.P2[1, 3] / (-self.f_v) | |||||
def read_calib_file(self, filepath): | |||||
with open(filepath) as f: | |||||
lines = f.readlines() | |||||
obj = lines[2].strip().split(' ')[1:] | |||||
P2 = np.array(obj, dtype=np.float32) | |||||
obj = lines[3].strip().split(' ')[1:] | |||||
P3 = np.array(obj, dtype=np.float32) | |||||
obj = lines[4].strip().split(' ')[1:] | |||||
R0 = np.array(obj, dtype=np.float32) | |||||
obj = lines[5].strip().split(' ')[1:] | |||||
Tr_velo_to_cam = np.array(obj, dtype=np.float32) | |||||
return {'P2': P2.reshape(3, 4), | |||||
'P3': P3.reshape(3, 4), | |||||
'R_rect': R0.reshape(3, 3), | |||||
'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} | |||||
def cart2hom(self, pts_3d): | |||||
""" | |||||
:param pts: (N, 3 or 2) | |||||
:return pts_hom: (N, 4 or 3) | |||||
""" | |||||
pts_hom = np.hstack((pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32))) | |||||
return pts_hom | |||||
def compute_radius(det_size, min_overlap=0.7): | |||||
height, width = det_size | |||||
a1 = 1 | |||||
b1 = (height + width) | |||||
c1 = width * height * (1 - min_overlap) / (1 + min_overlap) | |||||
sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) | |||||
r1 = (b1 + sq1) / 2 | |||||
a2 = 4 | |||||
b2 = 2 * (height + width) | |||||
c2 = (1 - min_overlap) * width * height | |||||
sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) | |||||
r2 = (b2 + sq2) / 2 | |||||
a3 = 4 * min_overlap | |||||
b3 = -2 * min_overlap * (height + width) | |||||
c3 = (min_overlap - 1) * width * height | |||||
sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) | |||||
r3 = (b3 + sq3) / 2 | |||||
return min(r1, r2, r3) | |||||
def gaussian2D(shape, sigma=1): | |||||
m, n = [(ss - 1.) / 2. for ss in shape] | |||||
y, x = np.ogrid[-m:m + 1, -n:n + 1] | |||||
h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) | |||||
h[h < np.finfo(h.dtype).eps * h.max()] = 0 | |||||
return h | |||||
def gen_hm_radius(heatmap, center, radius, k=1): | |||||
diameter = 2 * radius + 1 | |||||
gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) | |||||
x, y = int(center[0]), int(center[1]) | |||||
height, width = heatmap.shape[0:2] | |||||
left, right = min(x, radius), min(width - x, radius + 1) | |||||
top, bottom = min(y, radius), min(height - y, radius + 1) | |||||
masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] | |||||
masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] | |||||
if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug | |||||
np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) | |||||
return heatmap | |||||
def get_filtered_lidar(lidar, boundary, labels=None): | |||||
minX = boundary['minX'] | |||||
maxX = boundary['maxX'] | |||||
minY = boundary['minY'] | |||||
maxY = boundary['maxY'] | |||||
minZ = boundary['minZ'] | |||||
maxZ = boundary['maxZ'] | |||||
# Remove the point out of range x,y,z | |||||
mask = np.where((lidar[:, 0] >= minX) & (lidar[:, 0] <= maxX) & | |||||
(lidar[:, 1] >= minY) & (lidar[:, 1] <= maxY) & | |||||
(lidar[:, 2] >= minZ) & (lidar[:, 2] <= maxZ)) | |||||
lidar = lidar[mask] | |||||
lidar[:, 2] = lidar[:, 2] - minZ | |||||
if labels is not None: | |||||
label_x = (labels[:, 1] >= minX) & (labels[:, 1] < maxX) | |||||
label_y = (labels[:, 2] >= minY) & (labels[:, 2] < maxY) | |||||
label_z = (labels[:, 3] >= minZ) & (labels[:, 3] < maxZ) | |||||
mask_label = label_x & label_y & label_z | |||||
labels = labels[mask_label] | |||||
return lidar, labels | |||||
else: | |||||
return lidar | |||||
def box3d_corners_to_center(box3d_corner): | |||||
# (N, 8, 3) -> (N, 7) | |||||
assert box3d_corner.ndim == 3 | |||||
xyz = np.mean(box3d_corner, axis=1) | |||||
h = abs(np.mean(box3d_corner[:, 4:, 2] - box3d_corner[:, :4, 2], axis=1, keepdims=True)) | |||||
w = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 1, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||||
np.sqrt(np.sum((box3d_corner[:, 2, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||||
np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 5, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||||
np.sqrt(np.sum((box3d_corner[:, 6, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 | |||||
l = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||||
np.sqrt(np.sum((box3d_corner[:, 1, [0, 1]] - box3d_corner[:, 2, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||||
np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||||
np.sqrt(np.sum((box3d_corner[:, 5, [0, 1]] - box3d_corner[:, 6, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 | |||||
yaw = (np.arctan2(box3d_corner[:, 2, 1] - box3d_corner[:, 1, 1], | |||||
box3d_corner[:, 2, 0] - box3d_corner[:, 1, 0]) + | |||||
np.arctan2(box3d_corner[:, 3, 1] - box3d_corner[:, 0, 1], | |||||
box3d_corner[:, 3, 0] - box3d_corner[:, 0, 0]) + | |||||
np.arctan2(box3d_corner[:, 2, 0] - box3d_corner[:, 3, 0], | |||||
box3d_corner[:, 3, 1] - box3d_corner[:, 2, 1]) + | |||||
np.arctan2(box3d_corner[:, 1, 0] - box3d_corner[:, 0, 0], | |||||
box3d_corner[:, 0, 1] - box3d_corner[:, 1, 1]))[:, np.newaxis] / 4 | |||||
return np.concatenate([h, w, l, xyz, yaw], axis=1).reshape(-1, 7) | |||||
def box3d_center_to_conners(box3d_center): | |||||
h, w, l, x, y, z, yaw = box3d_center | |||||
Box = np.array([[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], | |||||
[w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], | |||||
[0, 0, 0, 0, h, h, h, h]]) | |||||
rotMat = np.array([ | |||||
[np.cos(yaw), -np.sin(yaw), 0.0], | |||||
[np.sin(yaw), np.cos(yaw), 0.0], | |||||
[0.0, 0.0, 1.0]]) | |||||
velo_box = np.dot(rotMat, Box) | |||||
cornerPosInVelo = velo_box + np.tile(np.array([x, y, z]), (8, 1)).T | |||||
box3d_corner = cornerPosInVelo.transpose() | |||||
return box3d_corner.astype(np.float32) | |||||
if __name__ == '__main__': | |||||
heatmap = np.zeros((96, 320)) | |||||
h, w = 40, 50 | |||||
radius = compute_radius((h, w)) | |||||
radius = max(0, int(radius)) | |||||
print('h: {}, w: {}, radius: {}, sigma: {}'.format(h, w, radius, (2 * radius + 1) / 6.)) | |||||
gen_hm_radius(heatmap, center=(200, 50), radius=radius) | |||||
while True: | |||||
cv2.imshow('heatmap', heatmap) | |||||
if cv2.waitKey(0) & 0xff == 27: | |||||
break | |||||
max_pos = np.unravel_index(heatmap.argmax(), shape=heatmap.shape) | |||||
print('max_pos: {}'.format(max_pos)) |
@@ -0,0 +1,67 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: This script for creating the dataloader for training/validation/test phase | |||||
""" | |||||
import os | |||||
import sys | |||||
import torch | |||||
from torch.utils.data import DataLoader | |||||
import numpy as np | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from data_process.kitti_dataset import KittiDataset | |||||
from data_process.transformation import OneOf, Random_Rotation, Random_Scaling | |||||
def create_train_dataloader(configs): | |||||
"""Create dataloader for training""" | |||||
train_lidar_aug = OneOf([ | |||||
Random_Rotation(limit_angle=np.pi / 4, p=1.0), | |||||
Random_Scaling(scaling_range=(0.95, 1.05), p=1.0), | |||||
], p=0.66) | |||||
train_dataset = KittiDataset(configs, mode='train', lidar_aug=train_lidar_aug, hflip_prob=configs.hflip_prob, | |||||
num_samples=configs.num_samples) | |||||
train_sampler = None | |||||
if configs.distributed: | |||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) | |||||
train_dataloader = DataLoader(train_dataset, batch_size=configs.batch_size, shuffle=(train_sampler is None), | |||||
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=train_sampler) | |||||
return train_dataloader, train_sampler | |||||
def create_val_dataloader(configs): | |||||
"""Create dataloader for validation""" | |||||
val_sampler = None | |||||
val_dataset = KittiDataset(configs, mode='val', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples) | |||||
if configs.distributed: | |||||
val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False) | |||||
val_dataloader = DataLoader(val_dataset, batch_size=configs.batch_size, shuffle=False, | |||||
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=val_sampler) | |||||
return val_dataloader | |||||
def create_test_dataloader(configs): | |||||
"""Create dataloader for testing phase""" | |||||
test_dataset = KittiDataset(configs, mode='test', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples) | |||||
test_sampler = None | |||||
if configs.distributed: | |||||
test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) | |||||
test_dataloader = DataLoader(test_dataset, batch_size=configs.batch_size, shuffle=False, | |||||
pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=test_sampler) | |||||
return test_dataloader |
@@ -0,0 +1,335 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: This script for the KITTI dataset | |||||
""" | |||||
import sys | |||||
import os | |||||
import math | |||||
from builtins import int | |||||
import numpy as np | |||||
from torch.utils.data import Dataset | |||||
import cv2 | |||||
import torch | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from data_process.kitti_data_utils import gen_hm_radius, compute_radius, Calibration, get_filtered_lidar | |||||
from data_process.kitti_bev_utils import makeBEVMap, drawRotatedBox, get_corners | |||||
from data_process import transformation | |||||
import config.kitti_config as cnf | |||||
class KittiDataset(Dataset): | |||||
def __init__(self, configs, mode='train', lidar_aug=None, hflip_prob=None, num_samples=None): | |||||
self.dataset_dir = configs.dataset_dir | |||||
self.input_size = configs.input_size | |||||
self.hm_size = configs.hm_size | |||||
self.num_classes = configs.num_classes | |||||
self.max_objects = configs.max_objects | |||||
assert mode in ['train', 'val', 'test'], 'Invalid mode: {}'.format(mode) | |||||
self.mode = mode | |||||
self.is_test = (self.mode == 'test') | |||||
# sub_folder = 'testing' if self.is_test else 'training' | |||||
self.lidar_aug = lidar_aug | |||||
self.hflip_prob = hflip_prob | |||||
if mode == 'val': | |||||
self.val_data_url = configs.val_data_url | |||||
self.lidar_dir = os.path.join(self.val_data_url, "velodyne") | |||||
self.calib_dir = os.path.join(self.val_data_url, "calib") | |||||
self.label_dir = os.path.join(self.val_data_url, "label_2") | |||||
# self.image_dir = os.path.join(self.dataset_dir, sub_folder, "image_2") | |||||
else: | |||||
self.lidar_dir = os.path.join(self.dataset_dir, "velodyne") | |||||
self.calib_dir = os.path.join(self.dataset_dir, "calib") | |||||
self.label_dir = os.path.join(self.dataset_dir, "label_2") | |||||
# split_txt_path = os.path.join('../dataset/apollo/', 'ImageSets', '{}.txt'.format(mode)) | |||||
sample_list = [] | |||||
sample_files = os.listdir(self.lidar_dir) | |||||
for bin_file in sample_files: | |||||
bin_name = bin_file.split('.')[0] | |||||
sample_list.append(bin_name) | |||||
self.sample_id_list = sample_list | |||||
if num_samples is not None: | |||||
self.sample_id_list = self.sample_id_list[:num_samples] | |||||
self.num_samples = len(self.sample_id_list) | |||||
def __len__(self): | |||||
return len(self.sample_id_list) | |||||
def __getitem__(self, index): | |||||
if self.is_test: | |||||
return self.load_img_only(index) | |||||
else: | |||||
return self.load_img_with_targets(index) | |||||
def load_img_only(self, index): | |||||
"""Load only image for the testing phase""" | |||||
sample_id = self.sample_id_list[index] | |||||
# print(sample_id) | |||||
# img_path, img_rgb = self.get_image(sample_id) | |||||
lidarData = self.get_lidar(sample_id) | |||||
lidarData = get_filtered_lidar(lidarData, cnf.boundary) | |||||
bev_map = makeBEVMap(lidarData, cnf.boundary) | |||||
bev_map = torch.from_numpy(bev_map) | |||||
bev_path = os.path.join(self.lidar_dir, '{}.png'.format(sample_id)) | |||||
metadatas = { | |||||
'bev_path': bev_path, | |||||
} | |||||
# return metadatas, bev_map, img_rgb | |||||
return bev_map,metadatas | |||||
def load_img_with_targets(self, index): | |||||
"""Load images and targets for the training and validation phase""" | |||||
sample_id = self.sample_id_list[index] | |||||
# img_path = os.path.join(self.image_dir, '{}.png'.format(sample_id)) | |||||
lidarData = self.get_lidar(sample_id) | |||||
# calib = self.get_calib(sample_id) | |||||
labels, has_labels = self.get_label(sample_id) | |||||
# if has_labels: | |||||
# labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||||
if self.lidar_aug: | |||||
lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:]) | |||||
lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels) | |||||
bev_map = makeBEVMap(lidarData, cnf.boundary) | |||||
bev_map = torch.from_numpy(bev_map) | |||||
hflipped = False | |||||
if np.random.random() < self.hflip_prob: | |||||
hflipped = True | |||||
# C, H, W | |||||
bev_map = torch.flip(bev_map, [-1]) | |||||
targets = self.build_targets(labels, hflipped) | |||||
# metadatas = { | |||||
# 'img_path': img_path, | |||||
# 'hflipped': hflipped | |||||
# } | |||||
# return metadatas, bev_map, targets | |||||
return bev_map, targets | |||||
def get_image(self, idx): | |||||
img_path = os.path.join(self.image_dir, '{}.png'.format(idx)) | |||||
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) | |||||
return img_path, img | |||||
def get_calib(self, idx): | |||||
calib_file = os.path.join(self.calib_dir, '{}.txt'.format(idx)) | |||||
# assert os.path.isfile(calib_file) | |||||
return Calibration(calib_file) | |||||
def get_lidar(self, idx): | |||||
lidar_file = os.path.join(self.lidar_dir, '{}.bin'.format(idx)) | |||||
# assert os.path.isfile(lidar_file) | |||||
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) | |||||
def get_label(self, idx): | |||||
labels = [] | |||||
label_path = os.path.join(self.label_dir, '{}.txt'.format(idx)) | |||||
for line in open(label_path, 'r'): | |||||
line = line.rstrip() | |||||
line_parts = line.split(' ') | |||||
obj_name = line_parts[0] # 'Car', 'Pedestrian', ... | |||||
cat_id = int(cnf.CLASS_NAME_TO_ID[obj_name]) | |||||
if cat_id <= -99: # ignore Tram and Misc | |||||
continue | |||||
truncated = int(float(line_parts[1])) # truncated pixel ratio [0..1] | |||||
occluded = int(line_parts[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown | |||||
alpha = float(line_parts[3]) # object observation angle [-pi..pi] | |||||
# xmin, ymin, xmax, ymax | |||||
# bbox = np.array([float(line_parts[4]), float(line_parts[5]), float(line_parts[6]), float(line_parts[7])]) | |||||
# height, width, length (h, w, l) | |||||
h, w, l = float(line_parts[8]), float(line_parts[9]), float(line_parts[10]) | |||||
# location (x,y,z) in camera coord. | |||||
x, y, z = float(line_parts[11]), float(line_parts[12]), float(line_parts[13]) | |||||
ry = float(line_parts[14]) # yaw angle (around Y-axis in camera coordinates) [-pi..pi] | |||||
object_label = [cat_id, x, y, z, h, w, l, ry] | |||||
labels.append(object_label) | |||||
if len(labels) == 0: | |||||
labels = np.zeros((1, 8), dtype=np.float32) | |||||
has_labels = False | |||||
else: | |||||
labels = np.array(labels, dtype=np.float32) | |||||
has_labels = True | |||||
return labels, has_labels | |||||
def build_targets(self, labels, hflipped): | |||||
minX = cnf.boundary['minX'] | |||||
maxX = cnf.boundary['maxX'] | |||||
minY = cnf.boundary['minY'] | |||||
maxY = cnf.boundary['maxY'] | |||||
minZ = cnf.boundary['minZ'] | |||||
maxZ = cnf.boundary['maxZ'] | |||||
num_objects = min(len(labels), self.max_objects) | |||||
hm_l, hm_w = self.hm_size | |||||
hm_main_center = np.zeros((self.num_classes, hm_l, hm_w), dtype=np.float32) | |||||
cen_offset = np.zeros((self.max_objects, 2), dtype=np.float32) | |||||
direction = np.zeros((self.max_objects, 2), dtype=np.float32) | |||||
z_coor = np.zeros((self.max_objects, 1), dtype=np.float32) | |||||
dimension = np.zeros((self.max_objects, 3), dtype=np.float32) | |||||
indices_center = np.zeros((self.max_objects), dtype=np.int64) | |||||
obj_mask = np.zeros((self.max_objects), dtype=np.uint8) | |||||
for k in range(num_objects): | |||||
cls_id, x, y, z, h, w, l, yaw = labels[k] | |||||
cls_id = int(cls_id) | |||||
# Invert yaw angle | |||||
yaw = -yaw | |||||
if not ((minX <= x <= maxX) and (minY <= y <= maxY) and (minZ <= z <= maxZ)): | |||||
continue | |||||
if (h <= 0) or (w <= 0) or (l <= 0): | |||||
continue | |||||
bbox_l = l / cnf.bound_size_x * hm_l | |||||
bbox_w = w / cnf.bound_size_y * hm_w | |||||
radius = compute_radius((math.ceil(bbox_l), math.ceil(bbox_w))) | |||||
radius = max(0, int(radius)) | |||||
center_y = (x - minX) / cnf.bound_size_x * hm_l # x --> y (invert to 2D image space) | |||||
center_x = (y - minY) / cnf.bound_size_y * hm_w # y --> x | |||||
center = np.array([center_x, center_y], dtype=np.float32) | |||||
if hflipped: | |||||
center[0] = hm_w - center[0] - 1 | |||||
center_int = center.astype(np.int32) | |||||
if cls_id < 0: | |||||
ignore_ids = [_ for _ in range(self.num_classes)] if cls_id == - 1 else [- cls_id - 2] | |||||
# Consider to make mask ignore | |||||
for cls_ig in ignore_ids: | |||||
gen_hm_radius(hm_main_center[cls_ig], center_int, radius) | |||||
hm_main_center[ignore_ids, center_int[1], center_int[0]] = 0.9999 | |||||
continue | |||||
# Generate heatmaps for main center | |||||
gen_hm_radius(hm_main_center[cls_id], center, radius) | |||||
# Index of the center | |||||
indices_center[k] = center_int[1] * hm_w + center_int[0] | |||||
# targets for center offset | |||||
cen_offset[k] = center - center_int | |||||
# targets for dimension | |||||
dimension[k, 0] = h | |||||
dimension[k, 1] = w | |||||
dimension[k, 2] = l | |||||
# targets for direction | |||||
direction[k, 0] = math.sin(float(yaw)) # im | |||||
direction[k, 1] = math.cos(float(yaw)) # re | |||||
# im -->> -im | |||||
if hflipped: | |||||
direction[k, 0] = - direction[k, 0] | |||||
# targets for depth | |||||
z_coor[k] = z - minZ | |||||
# Generate object masks | |||||
obj_mask[k] = 1 | |||||
targets = { | |||||
'hm_cen': hm_main_center, | |||||
'cen_offset': cen_offset, | |||||
'direction': direction, | |||||
'z_coor': z_coor, | |||||
'dim': dimension, | |||||
'indices_center': indices_center, | |||||
'obj_mask': obj_mask, | |||||
} | |||||
return targets | |||||
def draw_img_with_label(self, index): | |||||
sample_id = self.sample_id_list[index] | |||||
lidar_path = os.path.join(self.lidar_dir, '{}.bin'.format(sample_id)) | |||||
lidarData = self.get_lidar(sample_id) | |||||
calib = self.get_calib(sample_id) | |||||
labels, has_labels = self.get_label(sample_id) | |||||
print(lidar_path) | |||||
if has_labels: | |||||
labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||||
if self.lidar_aug: | |||||
lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:]) | |||||
lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels) | |||||
bev_map = makeBEVMap(lidarData, cnf.boundary) | |||||
print(labels) | |||||
return bev_map, labels, lidar_path | |||||
if __name__ == '__main__': | |||||
from easydict import EasyDict as edict | |||||
from data_process.transformation import OneOf, Random_Scaling, Random_Rotation, lidar_to_camera_box | |||||
from utils.visualization_utils import merge_rgb_to_bev, show_rgb_image_with_boxes | |||||
configs = edict() | |||||
configs.distributed = False # For testing | |||||
configs.pin_memory = False | |||||
configs.num_samples = None | |||||
configs.input_size = (1216, 608) | |||||
configs.hm_size = (304, 152) | |||||
configs.max_objects = 50 | |||||
configs.num_classes = 3 | |||||
configs.output_width = 608 | |||||
# configs.dataset_dir = os.path.join('../../', 'dataset', 'kitti') | |||||
# lidar_aug = OneOf([ | |||||
# Random_Rotation(limit_angle=np.pi / 4, p=1.), | |||||
# Random_Scaling(scaling_range=(0.95, 1.05), p=1.), | |||||
# ], p=1.) | |||||
lidar_aug = None | |||||
dataset = KittiDataset(configs, mode='val', lidar_aug=lidar_aug, hflip_prob=0., num_samples=configs.num_samples) | |||||
print('\n\nPress n to see the next sample >>> Press Esc to quit...') | |||||
for idx in range(len(dataset)): | |||||
bev_map, labels, lidar_path = dataset.draw_img_with_label(idx) | |||||
calib = Calibration(lidar_path.replace(".bin", ".txt").replace("velodyne", "calib")) | |||||
bev_map = (bev_map.transpose(1, 2, 0) * 255).astype(np.uint8) | |||||
# bev_map = cv2.resize(bev_map, (cnf.BEV_HEIGHT, cnf.BEV_WIDTH)) | |||||
print(bev_map.shape) | |||||
for box_idx, (cls_id, x, y, z, h, w, l, yaw) in enumerate(labels): | |||||
# Draw rotated box | |||||
yaw = -yaw | |||||
y1 = int((x - cnf.boundary['minX']) / cnf.DISCRETIZATION) | |||||
x1 = int((y - cnf.boundary['minY']) / cnf.DISCRETIZATION) | |||||
w1 = int(w / cnf.DISCRETIZATION) | |||||
l1 = int(l / cnf.DISCRETIZATION) | |||||
drawRotatedBox(bev_map, x1, y1, w1, l1, yaw, cnf.colors[int(cls_id)]) | |||||
# Rotate the bev_map | |||||
bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) | |||||
# labels[:, 1:] = lidar_to_camera_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||||
cv2.imshow('bev_map', bev_map) | |||||
if cv2.waitKey(0) & 0xff == 27: | |||||
break |
@@ -0,0 +1,426 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Refer: https://github.com/ghimiredhikura/Complex-YOLOv3 | |||||
# Source : https://github.com/jeasinema/VoxelNet-tensorflow/blob/master/utils/utils.py | |||||
""" | |||||
import os | |||||
import sys | |||||
import math | |||||
import numpy as np | |||||
import torch | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from config import kitti_config as cnf | |||||
def angle_in_limit(angle): | |||||
# To limit the angle in -pi/2 - pi/2 | |||||
limit_degree = 5 | |||||
while angle >= np.pi / 2: | |||||
angle -= np.pi | |||||
while angle < -np.pi / 2: | |||||
angle += np.pi | |||||
if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi: | |||||
angle = np.pi / 2 | |||||
return angle | |||||
def camera_to_lidar(x, y, z, V2C=None, R0=None, P2=None): | |||||
p = np.array([x, y, z, 1]) | |||||
if V2C is None or R0 is None: | |||||
p = np.matmul(cnf.R0_inv, p) | |||||
p = np.matmul(cnf.Tr_velo_to_cam_inv, p) | |||||
else: | |||||
R0_i = np.zeros((4, 4)) | |||||
R0_i[:3, :3] = R0 | |||||
R0_i[3, 3] = 1 | |||||
p = np.matmul(np.linalg.inv(R0_i), p) | |||||
p = np.matmul(inverse_rigid_trans(V2C), p) | |||||
p = p[0:3] | |||||
return tuple(p) | |||||
def lidar_to_camera(x, y, z, V2C=None, R0=None, P2=None): | |||||
p = np.array([x, y, z, 1]) | |||||
if V2C is None or R0 is None: | |||||
p = np.matmul(cnf.Tr_velo_to_cam, p) | |||||
p = np.matmul(cnf.R0, p) | |||||
else: | |||||
p = np.matmul(V2C, p) | |||||
p = np.matmul(R0, p) | |||||
p = p[0:3] | |||||
return tuple(p) | |||||
def camera_to_lidar_point(points): | |||||
# (N, 3) -> (N, 3) | |||||
N = points.shape[0] | |||||
points = np.hstack([points, np.ones((N, 1))]).T # (N,4) -> (4,N) | |||||
points = np.matmul(cnf.R0_inv, points) | |||||
points = np.matmul(cnf.Tr_velo_to_cam_inv, points).T # (4, N) -> (N, 4) | |||||
points = points[:, 0:3] | |||||
return points.reshape(-1, 3) | |||||
def lidar_to_camera_point(points, V2C=None, R0=None): | |||||
# (N, 3) -> (N, 3) | |||||
N = points.shape[0] | |||||
points = np.hstack([points, np.ones((N, 1))]).T | |||||
if V2C is None or R0 is None: | |||||
points = np.matmul(cnf.Tr_velo_to_cam, points) | |||||
points = np.matmul(cnf.R0, points).T | |||||
else: | |||||
points = np.matmul(V2C, points) | |||||
points = np.matmul(R0, points).T | |||||
points = points[:, 0:3] | |||||
return points.reshape(-1, 3) | |||||
def camera_to_lidar_box(boxes, V2C=None, R0=None, P2=None): | |||||
# (N, 7) -> (N, 7) x,y,z,h,w,l,r | |||||
ret = [] | |||||
for box in boxes: | |||||
x, y, z, h, w, l, ry = box | |||||
# print(x, y, z, h, w, l, ry) | |||||
(x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -ry - np.pi / 2 | |||||
# print(x, y, z, h, w, l, ry) | |||||
# print("camera_to_lidar") | |||||
# rz = angle_in_limit(rz) | |||||
ret.append([x, y, z, h, w, l, rz]) | |||||
return np.array(ret).reshape(-1, 7) | |||||
def lidar_to_camera_box(boxes, V2C=None, R0=None, P2=None): | |||||
# (N, 7) -> (N, 7) x,y,z,h,w,l,r | |||||
ret = [] | |||||
for box in boxes: | |||||
x, y, z, h, w, l, rz = box | |||||
# (x, y, z), h, w, l, ry = lidar_to_camera(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -rz - np.pi / 2 | |||||
# ry = angle_in_limit(ry) | |||||
ry = -rz - np.pi / 2 | |||||
ret.append([x, y, z, h, w, l, ry]) | |||||
return np.array(ret).reshape(-1, 7) | |||||
def center_to_corner_box2d(boxes_center, coordinate='lidar'): | |||||
# (N, 5) -> (N, 4, 2) | |||||
N = boxes_center.shape[0] | |||||
boxes3d_center = np.zeros((N, 7)) | |||||
boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center | |||||
boxes3d_corner = center_to_corner_box3d(boxes3d_center, coordinate=coordinate) | |||||
return boxes3d_corner[:, 0:4, 0:2] | |||||
def center_to_corner_box3d(boxes_center, coordinate='lidar'): | |||||
# (N, 7) -> (N, 8, 3) | |||||
N = boxes_center.shape[0] | |||||
ret = np.zeros((N, 8, 3), dtype=np.float32) | |||||
if coordinate == 'camera': | |||||
boxes_center = camera_to_lidar_box(boxes_center) | |||||
for i in range(N): | |||||
box = boxes_center[i] | |||||
translation = box[0:3] | |||||
size = box[3:6] | |||||
rotation = [0, 0, box[-1]] | |||||
h, w, l = size[0], size[1], size[2] | |||||
trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet | |||||
[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \ | |||||
[w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \ | |||||
[0, 0, 0, 0, h, h, h, h]]) | |||||
# re-create 3D bounding box in velodyne coordinate system | |||||
yaw = rotation[2] | |||||
rotMat = np.array([ | |||||
[np.cos(yaw), -np.sin(yaw), 0.0], | |||||
[np.sin(yaw), np.cos(yaw), 0.0], | |||||
[0.0, 0.0, 1.0]]) | |||||
cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8, 1)).T | |||||
box3d = cornerPosInVelo.transpose() | |||||
ret[i] = box3d | |||||
if coordinate == 'camera': | |||||
for idx in range(len(ret)): | |||||
ret[idx] = lidar_to_camera_point(ret[idx]) | |||||
return ret | |||||
CORNER2CENTER_AVG = True | |||||
def corner_to_center_box3d(boxes_corner, coordinate='camera'): | |||||
# (N, 8, 3) -> (N, 7) x,y,z,h,w,l,ry/z | |||||
if coordinate == 'lidar': | |||||
for idx in range(len(boxes_corner)): | |||||
boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx]) | |||||
ret = [] | |||||
for roi in boxes_corner: | |||||
if CORNER2CENTER_AVG: # average version | |||||
roi = np.array(roi) | |||||
h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4) | |||||
w = np.sum( | |||||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) | |||||
) / 4 | |||||
l = np.sum( | |||||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) | |||||
) / 4 | |||||
x = np.sum(roi[:, 0], axis=0) / 8 | |||||
y = np.sum(roi[0:4, 1], axis=0) / 4 | |||||
z = np.sum(roi[:, 2], axis=0) / 8 | |||||
ry = np.sum( | |||||
math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + | |||||
math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + | |||||
math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + | |||||
math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + | |||||
math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + | |||||
math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + | |||||
math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + | |||||
math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) | |||||
) / 8 | |||||
if w > l: | |||||
w, l = l, w | |||||
ry = ry - np.pi / 2 | |||||
elif l > w: | |||||
l, w = w, l | |||||
ry = ry - np.pi / 2 | |||||
ret.append([x, y, z, h, w, l, ry]) | |||||
else: # max version | |||||
h = max(abs(roi[:4, 1] - roi[4:, 1])) | |||||
w = np.max( | |||||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) | |||||
) | |||||
l = np.max( | |||||
np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + | |||||
np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) | |||||
) | |||||
x = np.sum(roi[:, 0], axis=0) / 8 | |||||
y = np.sum(roi[0:4, 1], axis=0) / 4 | |||||
z = np.sum(roi[:, 2], axis=0) / 8 | |||||
ry = np.sum( | |||||
math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + | |||||
math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + | |||||
math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + | |||||
math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + | |||||
math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + | |||||
math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + | |||||
math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + | |||||
math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) | |||||
) / 8 | |||||
if w > l: | |||||
w, l = l, w | |||||
ry = angle_in_limit(ry + np.pi / 2) | |||||
ret.append([x, y, z, h, w, l, ry]) | |||||
if coordinate == 'lidar': | |||||
ret = camera_to_lidar_box(np.array(ret)) | |||||
return np.array(ret) | |||||
def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0): | |||||
# Input: | |||||
# points: (N, 3) | |||||
# rx/y/z: in radians | |||||
# Output: | |||||
# points: (N, 3) | |||||
N = points.shape[0] | |||||
points = np.hstack([points, np.ones((N, 1))]) | |||||
mat1 = np.eye(4) | |||||
mat1[3, 0:3] = tx, ty, tz | |||||
points = np.matmul(points, mat1) | |||||
if rx != 0: | |||||
mat = np.zeros((4, 4)) | |||||
mat[0, 0] = 1 | |||||
mat[3, 3] = 1 | |||||
mat[1, 1] = np.cos(rx) | |||||
mat[1, 2] = -np.sin(rx) | |||||
mat[2, 1] = np.sin(rx) | |||||
mat[2, 2] = np.cos(rx) | |||||
points = np.matmul(points, mat) | |||||
if ry != 0: | |||||
mat = np.zeros((4, 4)) | |||||
mat[1, 1] = 1 | |||||
mat[3, 3] = 1 | |||||
mat[0, 0] = np.cos(ry) | |||||
mat[0, 2] = np.sin(ry) | |||||
mat[2, 0] = -np.sin(ry) | |||||
mat[2, 2] = np.cos(ry) | |||||
points = np.matmul(points, mat) | |||||
if rz != 0: | |||||
mat = np.zeros((4, 4)) | |||||
mat[2, 2] = 1 | |||||
mat[3, 3] = 1 | |||||
mat[0, 0] = np.cos(rz) | |||||
mat[0, 1] = -np.sin(rz) | |||||
mat[1, 0] = np.sin(rz) | |||||
mat[1, 1] = np.cos(rz) | |||||
points = np.matmul(points, mat) | |||||
return points[:, 0:3] | |||||
def box_transform(boxes, tx, ty, tz, r=0, coordinate='lidar'): | |||||
# Input: | |||||
# boxes: (N, 7) x y z h w l rz/y | |||||
# Output: | |||||
# boxes: (N, 7) x y z h w l rz/y | |||||
boxes_corner = center_to_corner_box3d(boxes, coordinate=coordinate) # (N, 8, 3) | |||||
for idx in range(len(boxes_corner)): | |||||
if coordinate == 'lidar': | |||||
boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r) | |||||
else: | |||||
boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, ry=r) | |||||
return corner_to_center_box3d(boxes_corner, coordinate=coordinate) | |||||
def inverse_rigid_trans(Tr): | |||||
''' Inverse a rigid body transform matrix (3x4 as [R|t]) | |||||
[R'|-R't; 0|1] | |||||
''' | |||||
inv_Tr = np.zeros_like(Tr) # 3x4 | |||||
inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3]) | |||||
inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3]) | |||||
return inv_Tr | |||||
class Compose(object): | |||||
def __init__(self, transforms, p=1.0): | |||||
self.transforms = transforms | |||||
self.p = p | |||||
def __call__(self, lidar, labels): | |||||
if np.random.random() <= self.p: | |||||
for t in self.transforms: | |||||
lidar, labels = t(lidar, labels) | |||||
return lidar, labels | |||||
class OneOf(object): | |||||
def __init__(self, transforms, p=1.0): | |||||
self.transforms = transforms | |||||
self.p = p | |||||
def __call__(self, lidar, labels): | |||||
if np.random.random() <= self.p: | |||||
choice = np.random.randint(low=0, high=len(self.transforms)) | |||||
lidar, labels = self.transforms[choice](lidar, labels) | |||||
return lidar, labels | |||||
class Random_Rotation(object): | |||||
def __init__(self, limit_angle=np.pi / 4, p=0.5): | |||||
self.limit_angle = limit_angle | |||||
self.p = p | |||||
def __call__(self, lidar, labels): | |||||
""" | |||||
:param labels: # (N', 7) x, y, z, h, w, l, r | |||||
:return: | |||||
""" | |||||
if np.random.random() <= self.p: | |||||
angle = np.random.uniform(-self.limit_angle, self.limit_angle) | |||||
lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle) | |||||
labels = box_transform(labels, 0, 0, 0, r=angle, coordinate='lidar') | |||||
return lidar, labels | |||||
class Random_Scaling(object): | |||||
def __init__(self, scaling_range=(0.95, 1.05), p=0.5): | |||||
self.scaling_range = scaling_range | |||||
self.p = p | |||||
def __call__(self, lidar, labels): | |||||
""" | |||||
:param labels: # (N', 7) x, y, z, h, w, l, r | |||||
:return: | |||||
""" | |||||
if np.random.random() <= self.p: | |||||
factor = np.random.uniform(self.scaling_range[0], self.scaling_range[0]) | |||||
lidar[:, 0:3] = lidar[:, 0:3] * factor | |||||
labels[:, 0:6] = labels[:, 0:6] * factor | |||||
return lidar, labels | |||||
class Cutout(object): | |||||
"""Randomly mask out one or more patches from an image. | |||||
Args: | |||||
n_holes (int): Number of patches to cut out of each image. | |||||
length (int): The length (in pixels) of each square patch. | |||||
Refer from: https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py | |||||
""" | |||||
def __init__(self, n_holes, ratio, fill_value=0., p=1.0): | |||||
self.n_holes = n_holes | |||||
self.ratio = ratio | |||||
assert 0. <= fill_value <= 1., "the fill value is in a range of 0 to 1" | |||||
self.fill_value = fill_value | |||||
self.p = p | |||||
def __call__(self, img, targets): | |||||
""" | |||||
Args: | |||||
img (Tensor): Tensor image of size (C, H, W). | |||||
Returns: | |||||
Tensor: Image with n_holes of dimension length x length cut out of it. | |||||
""" | |||||
if np.random.random() <= self.p: | |||||
h = img.size(1) | |||||
w = img.size(2) | |||||
h_cutout = int(self.ratio * h) | |||||
w_cutout = int(self.ratio * w) | |||||
for n in range(self.n_holes): | |||||
y = np.random.randint(h) | |||||
x = np.random.randint(w) | |||||
y1 = np.clip(y - h_cutout // 2, 0, h) | |||||
y2 = np.clip(y + h_cutout // 2, 0, h) | |||||
x1 = np.clip(x - w_cutout // 2, 0, w) | |||||
x2 = np.clip(x + w_cutout // 2, 0, w) | |||||
img[:, y1: y2, x1: x2] = self.fill_value # Zero out the selected area | |||||
# Remove targets that are in the selected area | |||||
keep_target = [] | |||||
for target_idx, target in enumerate(targets): | |||||
_, _, target_x, target_y, target_w, target_l, _, _ = target | |||||
if (x1 <= target_x * w <= x2) and (y1 <= target_y * h <= y2): | |||||
continue | |||||
keep_target.append(target_idx) | |||||
targets = targets[keep_target] | |||||
return img, targets |
@@ -0,0 +1,378 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: Testing script | |||||
""" | |||||
import argparse | |||||
import sys | |||||
import os | |||||
import time | |||||
import warnings | |||||
warnings.filterwarnings("ignore", category=UserWarning) | |||||
from easydict import EasyDict as edict | |||||
import cv2 | |||||
import torch | |||||
import numpy as np | |||||
import torch.nn.functional as F | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from data_process.kitti_dataloader import create_test_dataloader | |||||
from models.model_utils import create_model | |||||
import config.kitti_config as cnf | |||||
def parse_test_configs(): | |||||
parser = argparse.ArgumentParser(description='Testing config for the Implementation') | |||||
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||||
help='The name using for saving logs, models,...') | |||||
parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||||
help='The name of the model architecture') | |||||
parser.add_argument('--model_dir', type=str, | |||||
default='/train_out_model/', metavar='PATH', | |||||
help='the path of the pretrained checkpoint') | |||||
parser.add_argument('--K', type=int, default=50, | |||||
help='the number of top K') | |||||
parser.add_argument('--no_cuda', default= False, | |||||
help='If true, cuda is not used.') | |||||
parser.add_argument('--gpu_idx', default=0, type=int, | |||||
help='GPU index to use.') | |||||
parser.add_argument('--num_samples', type=int, default=None, | |||||
help='Take a subset of the dataset to run and debug') | |||||
parser.add_argument('--num_workers', type=int, default=1, | |||||
help='Number of threads for loading data') | |||||
parser.add_argument('--batch_size', type=int, default=1, | |||||
help='mini-batch size (default: 4)') | |||||
parser.add_argument('--peak_thresh', type=float, default=0.2) | |||||
parser.add_argument('--dataset_dir', type=str,default='/dataset_dir/', | |||||
help='If true, the output image of the testing phase will be saved') | |||||
parser.add_argument('--results_dir', type=str,default='/results_dir/', | |||||
help='If true, the output image of the testing phase will be saved') | |||||
parser.add_argument('--save_test_output', type=bool, default=True, | |||||
help='save the test output or not') | |||||
parser.add_argument('--output_format', type=str, default='txt', metavar='PATH', | |||||
help='the type of the test output (support image, video or none)') | |||||
parser.add_argument('--output_video_fn', type=str, default='out_fpn_resnet_18', metavar='PATH', | |||||
help='the video filename if the output format is video') | |||||
parser.add_argument('--output-width', type=int, default=608, | |||||
help='the width of showing output, the height maybe vary') | |||||
configs = edict(vars(parser.parse_args())) | |||||
configs.pin_memory = True | |||||
configs.distributed = False # For testing on 1 GPU only | |||||
configs.input_size = (1216, 608) | |||||
configs.hm_size = (304, 152) | |||||
configs.down_ratio = 4 | |||||
configs.max_objects = 50 | |||||
configs.imagenet_pretrained = False | |||||
configs.head_conv = 64 | |||||
configs.num_classes = 3 | |||||
configs.num_center_offset = 2 | |||||
configs.num_z = 1 | |||||
configs.num_dim = 3 | |||||
configs.num_direction = 2 # sin, cos | |||||
configs.heads = { | |||||
'hm_cen': configs.num_classes, | |||||
'cen_offset': configs.num_center_offset, | |||||
'direction': configs.num_direction, | |||||
'z_coor': configs.num_z, | |||||
'dim': configs.num_dim | |||||
} | |||||
configs.num_input_features = 4 | |||||
#################################################################### | |||||
##############Dataset, Checkpoints, and results dir configs######### | |||||
#################################################################### | |||||
configs.root_dir = '../' | |||||
# configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'apollo') | |||||
# configs.results_dir_img = os.path.join(configs.results_dir, configs.saved_fn, 'image') | |||||
# configs.results_dir_txt = os.path.join(configs.results_dir, configs.saved_fn, 'txt') | |||||
# make_folder(configs.results_dir_img) | |||||
# make_folder(configs.results_dir_txt) | |||||
make_folder(configs.results_dir) | |||||
return configs | |||||
def _sigmoid(x): | |||||
return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) | |||||
def time_synchronized(): | |||||
torch.cuda.synchronize() if torch.cuda.is_available() else None | |||||
return time.time() | |||||
def make_folder(folder_name): | |||||
if not os.path.exists(folder_name): | |||||
os.makedirs(folder_name) | |||||
def drawRotatedBox(img, x, y, w, l, yaw, color): | |||||
bev_corners = get_corners(x, y, w, l, yaw) | |||||
corners_int = bev_corners.reshape(-1, 1, 2).astype(int) | |||||
cv2.polylines(img, [corners_int], True, color, 2) | |||||
corners_int = bev_corners.reshape(-1, 2) | |||||
cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2) | |||||
# bev image coordinates format | |||||
def get_corners(x, y, w, l, yaw): | |||||
bev_corners = np.zeros((4, 2), dtype=np.float32) | |||||
cos_yaw = np.cos(yaw) | |||||
sin_yaw = np.sin(yaw) | |||||
# front left | |||||
bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw | |||||
bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw | |||||
# rear left | |||||
bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw | |||||
bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw | |||||
# rear right | |||||
bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw | |||||
bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw | |||||
# front right | |||||
bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw | |||||
bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw | |||||
return bev_corners | |||||
def _nms(heat, kernel=3): | |||||
pad = (kernel - 1) // 2 | |||||
hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) | |||||
keep = (hmax == heat).float() | |||||
return heat * keep | |||||
def _gather_feat(feat, ind, mask=None): | |||||
dim = feat.size(2) | |||||
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||||
feat = feat.gather(1, ind) | |||||
if mask is not None: | |||||
mask = mask.unsqueeze(2).expand_as(feat) | |||||
feat = feat[mask] | |||||
feat = feat.view(-1, dim) | |||||
return feat | |||||
def _transpose_and_gather_feat(feat, ind): | |||||
feat = feat.permute(0, 2, 3, 1).contiguous() | |||||
feat = feat.view(feat.size(0), -1, feat.size(3)) | |||||
feat = _gather_feat(feat, ind) | |||||
return feat | |||||
def _topk(scores, K=40): | |||||
batch, cat, height, width = scores.size() | |||||
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||||
topk_inds = topk_inds % (height * width) | |||||
topk_ys = (torch.floor_divide(topk_inds, width)).float() | |||||
topk_xs = (topk_inds % width).int().float() | |||||
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) | |||||
topk_clses = (torch.floor_divide(topk_ind, K)).int() | |||||
topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) | |||||
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) | |||||
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) | |||||
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs | |||||
def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40): | |||||
batch_size, num_classes, height, width = hm_cen.size() | |||||
hm_cen = _nms(hm_cen) | |||||
scores, inds, clses, ys, xs = _topk(hm_cen, K=K) | |||||
if cen_offset is not None: | |||||
cen_offset = _transpose_and_gather_feat(cen_offset, inds) | |||||
cen_offset = cen_offset.view(batch_size, K, 2) | |||||
xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1] | |||||
ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2] | |||||
else: | |||||
xs = xs.view(batch_size, K, 1) + 0.5 | |||||
ys = ys.view(batch_size, K, 1) + 0.5 | |||||
direction = _transpose_and_gather_feat(direction, inds) | |||||
direction = direction.view(batch_size, K, 2) | |||||
z_coor = _transpose_and_gather_feat(z_coor, inds) | |||||
z_coor = z_coor.view(batch_size, K, 1) | |||||
dim = _transpose_and_gather_feat(dim, inds) | |||||
dim = dim.view(batch_size, K, 3) | |||||
clses = clses.view(batch_size, K, 1).float() | |||||
scores = scores.view(batch_size, K, 1) | |||||
# (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||||
# (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||||
# detections: [batch_size, K, 10] | |||||
detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2) | |||||
return detections | |||||
def get_yaw(direction): | |||||
return np.arctan2(direction[:, 0:1], direction[:, 1:2]) | |||||
def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2): | |||||
""" | |||||
:param detections: [batch_size, K, 10] | |||||
# (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||||
# (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||||
:return: | |||||
""" | |||||
# TODO: Need to consider rescale to the original scale: x, y | |||||
ret = [] | |||||
for i in range(detections.shape[0]): | |||||
top_preds = {} | |||||
classes = detections[i, :, -1] | |||||
for j in range(num_classes): | |||||
inds = (classes == j) | |||||
# x, y, z, h, w, l, yaw | |||||
top_preds[j] = np.concatenate([ | |||||
detections[i, inds, 0:1], | |||||
detections[i, inds, 1:2] * down_ratio, | |||||
detections[i, inds, 2:3] * down_ratio, | |||||
detections[i, inds, 3:4], | |||||
detections[i, inds, 4:5], | |||||
detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH, | |||||
detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT, | |||||
get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1) | |||||
# Filter by peak_thresh | |||||
if len(top_preds[j]) > 0: | |||||
keep_inds = (top_preds[j][:, 0] > peak_thresh) | |||||
top_preds[j] = top_preds[j][keep_inds] | |||||
ret.append(top_preds) | |||||
return ret | |||||
def draw_predictions(img, detections, num_classes=3): | |||||
for j in range(num_classes): | |||||
if len(detections[j]) > 0: | |||||
for det in detections[j]: | |||||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||||
drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)]) | |||||
return img | |||||
def convert_det_to_real_values(detections, num_classes=3): | |||||
kitti_dets = [] | |||||
for cls_id in range(num_classes): | |||||
if len(detections[cls_id]) > 0: | |||||
for det in detections[cls_id]: | |||||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||||
_yaw = round(-_yaw/1, 2) | |||||
x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2) | |||||
y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2) | |||||
z = round(_z + cnf.boundary['minZ'], 2) | |||||
w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2) | |||||
l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2) | |||||
h = round(_h/1, 2) | |||||
kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw]) | |||||
return np.array(kitti_dets) | |||||
if __name__ == '__main__': | |||||
print("=".ljust(66, "=")) | |||||
configs = parse_test_configs() | |||||
model = create_model(configs) | |||||
print('\n\n' + '-*=' * 30 + '\n\n') | |||||
# assert os.path.isfile(configs.model_dir), "No file at {}".format(configs.model_dir) | |||||
if os.path.isfile(configs.model_dir): | |||||
model_path = configs.model_dir | |||||
else: | |||||
# for file in os.listdir(configs.model_dir): | |||||
# model_path = os.path.join(configs.model_dir, file) | |||||
# 取最后一个模型 | |||||
model_path = os.path.join(configs.model_dir, os.listdir(configs.model_dir)[-1]) | |||||
print('Loaded weights from {}\n'.format(model_path)) | |||||
# model.load_state_dict(torch.load(model_path)) | |||||
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) | |||||
model.load_state_dict(torch.load(model_path, map_location=configs.device)) | |||||
model = model.to(device=configs.device) | |||||
out_cap = None | |||||
model.eval() | |||||
test_dataloader = create_test_dataloader(configs) | |||||
with torch.no_grad(): | |||||
for batch_idx, batch_data in enumerate(test_dataloader): | |||||
bev_maps, metadatas = batch_data | |||||
input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float() | |||||
t1 = time_synchronized() | |||||
outputs = model(input_bev_maps) | |||||
outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||||
outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||||
# detections size (batch_size, K, 10) | |||||
detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], | |||||
outputs['dim'], K=configs.K) | |||||
detections = detections.cpu().numpy().astype(np.float32) | |||||
detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) | |||||
t2 = time_synchronized() | |||||
detections = detections[0] # only first batch | |||||
# Draw prediction in the image | |||||
bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8) | |||||
bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) | |||||
bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes) | |||||
# Rotate the bev_map | |||||
bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) | |||||
kitti_dets = convert_det_to_real_values(detections) | |||||
print('\tDone testing the {}th sample, time: {:.1f}ms, speed {:.2f}FPS'.format(batch_idx, (t2 - t1) * 1000, | |||||
1 / (t2 - t1))) | |||||
if configs.save_test_output: | |||||
img_fn = os.path.basename(metadatas['bev_path'][0])[:-4] | |||||
if configs.output_format == 'image': | |||||
cv2.imwrite(os.path.join(configs.results_dir_img, '{}.jpg'.format(img_fn)), bev_map) | |||||
elif configs.output_format == 'video': | |||||
if out_cap is None: | |||||
out_cap_h, out_cap_w = bev_map.shape[:2] | |||||
fourcc = cv2.VideoWriter_fourcc(*'MJPG') | |||||
out_cap = cv2.VideoWriter( | |||||
os.path.join(configs.results_dir_img, '{}.avi'.format(configs.output_video_fn)), | |||||
fourcc, 30, (out_cap_w, out_cap_h)) | |||||
out_cap.write(bev_map) | |||||
else: | |||||
pass | |||||
txt_path = os.path.join(configs.results_dir,'{}.txt'.format(img_fn)) | |||||
txt_file = open(txt_path, 'w') | |||||
for det in kitti_dets: | |||||
write_line = cnf.CLASS_ID_TO_NAME[det[0]] + ' 0 0 0 0 0 0 0 ' + str(det[1]) + ' ' + str(det[2]) +\ | |||||
' ' + str(det[3]) + ' ' + str(det[4]) + ' ' + str(det[5]) + ' ' + str(det[6]) + ' ' + str(det[7]) +'\n' | |||||
txt_file.writelines(write_line) | |||||
txt_file.close() | |||||
if out_cap: | |||||
out_cap.release() | |||||
cv2.destroyAllWindows() |
@@ -0,0 +1,163 @@ | |||||
# ------------------------------------------------------------------------------ | |||||
# Portions of this code are from | |||||
# CornerNet (https://github.com/princeton-vl/CornerNet) | |||||
# Copyright (c) 2018, University of Michigan | |||||
# Licensed under the BSD 3-Clause License | |||||
# Modified by Nguyen Mau Dung (2020.08.09) | |||||
# ------------------------------------------------------------------------------ | |||||
import os | |||||
import sys | |||||
import math | |||||
import torch.nn as nn | |||||
import torch | |||||
import torch.nn.functional as F | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from utils.torch_utils import to_cpu, _sigmoid | |||||
def _gather_feat(feat, ind, mask=None): | |||||
dim = feat.size(2) | |||||
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||||
feat = feat.gather(1, ind) | |||||
if mask is not None: | |||||
mask = mask.unsqueeze(2).expand_as(feat) | |||||
feat = feat[mask] | |||||
feat = feat.view(-1, dim) | |||||
return feat | |||||
def _transpose_and_gather_feat(feat, ind): | |||||
feat = feat.permute(0, 2, 3, 1).contiguous() | |||||
feat = feat.view(feat.size(0), -1, feat.size(3)) | |||||
feat = _gather_feat(feat, ind) | |||||
return feat | |||||
def _neg_loss(pred, gt, alpha=2, beta=4): | |||||
''' Modified focal loss. Exactly the same as CornerNet. | |||||
Runs faster and costs a little bit more memory | |||||
Arguments: | |||||
pred (batch x c x h x w) | |||||
gt_regr (batch x c x h x w) | |||||
''' | |||||
pos_inds = gt.eq(1).float() | |||||
neg_inds = gt.lt(1).float() | |||||
neg_weights = torch.pow(1 - gt, beta) | |||||
loss = 0 | |||||
pos_loss = torch.log(pred) * torch.pow(1 - pred, alpha) * pos_inds | |||||
neg_loss = torch.log(1 - pred) * torch.pow(pred, alpha) * neg_weights * neg_inds | |||||
num_pos = pos_inds.float().sum() | |||||
pos_loss = pos_loss.sum() | |||||
neg_loss = neg_loss.sum() | |||||
if num_pos == 0: | |||||
loss = loss - neg_loss | |||||
else: | |||||
loss = loss - (pos_loss + neg_loss) / num_pos | |||||
return loss | |||||
class FocalLoss(nn.Module): | |||||
'''nn.Module warpper for focal loss''' | |||||
def __init__(self): | |||||
super(FocalLoss, self).__init__() | |||||
self.neg_loss = _neg_loss | |||||
def forward(self, out, target): | |||||
return self.neg_loss(out, target) | |||||
class L1Loss(nn.Module): | |||||
def __init__(self): | |||||
super(L1Loss, self).__init__() | |||||
def forward(self, output, mask, ind, target): | |||||
pred = _transpose_and_gather_feat(output, ind) | |||||
mask = mask.unsqueeze(2).expand_as(pred).float() | |||||
loss = F.l1_loss(pred * mask, target * mask, size_average=False) | |||||
loss = loss / (mask.sum() + 1e-4) | |||||
return loss | |||||
class L1Loss_Balanced(nn.Module): | |||||
"""Balanced L1 Loss | |||||
paper: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) | |||||
Code refer from: https://github.com/OceanPang/Libra_R-CNN | |||||
""" | |||||
def __init__(self, alpha=0.5, gamma=1.5, beta=1.0): | |||||
super(L1Loss_Balanced, self).__init__() | |||||
self.alpha = alpha | |||||
self.gamma = gamma | |||||
assert beta > 0 | |||||
self.beta = beta | |||||
def forward(self, output, mask, ind, target): | |||||
pred = _transpose_and_gather_feat(output, ind) | |||||
mask = mask.unsqueeze(2).expand_as(pred).float() | |||||
loss = self.balanced_l1_loss(pred * mask, target * mask) | |||||
loss = loss.sum() / (mask.sum() + 1e-4) | |||||
return loss | |||||
def balanced_l1_loss(self, pred, target): | |||||
assert pred.size() == target.size() and target.numel() > 0 | |||||
diff = torch.abs(pred - target) | |||||
b = math.exp(self.gamma / self.alpha) - 1 | |||||
loss = torch.where(diff < self.beta, | |||||
self.alpha / b * (b * diff + 1) * torch.log(b * diff / self.beta + 1) - self.alpha * diff, | |||||
self.gamma * diff + self.gamma / b - self.alpha * self.beta) | |||||
return loss | |||||
class Compute_Loss(nn.Module): | |||||
def __init__(self, device): | |||||
super(Compute_Loss, self).__init__() | |||||
self.device = device | |||||
self.focal_loss = FocalLoss() | |||||
self.l1_loss = L1Loss() | |||||
self.l1_loss_balanced = L1Loss_Balanced(alpha=0.5, gamma=1.5, beta=1.0) | |||||
self.weight_hm_cen = 1. | |||||
self.weight_z_coor, self.weight_cenoff, self.weight_dim, self.weight_direction = 1., 1., 1., 1. | |||||
def forward(self, outputs, tg): | |||||
# tg: targets | |||||
outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||||
outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||||
l_hm_cen = self.focal_loss(outputs['hm_cen'], tg['hm_cen']) | |||||
l_cen_offset = self.l1_loss(outputs['cen_offset'], tg['obj_mask'], tg['indices_center'], tg['cen_offset']) | |||||
l_direction = self.l1_loss(outputs['direction'], tg['obj_mask'], tg['indices_center'], tg['direction']) | |||||
# Apply the L1_loss balanced for z coor and dimension regression | |||||
l_z_coor = self.l1_loss_balanced(outputs['z_coor'], tg['obj_mask'], tg['indices_center'], tg['z_coor']) | |||||
l_dim = self.l1_loss_balanced(outputs['dim'], tg['obj_mask'], tg['indices_center'], tg['dim']) | |||||
total_loss = l_hm_cen * self.weight_hm_cen + l_cen_offset * self.weight_cenoff + \ | |||||
l_dim * self.weight_dim + l_direction * self.weight_direction + \ | |||||
l_z_coor * self.weight_z_coor | |||||
loss_stats = { | |||||
'total_loss': to_cpu(total_loss).item(), | |||||
'hm_cen_loss': to_cpu(l_hm_cen).item(), | |||||
'cen_offset_loss': to_cpu(l_cen_offset).item(), | |||||
'dim_loss': to_cpu(l_dim).item(), | |||||
'direction_loss': to_cpu(l_direction).item(), | |||||
'z_coor_loss': to_cpu(l_z_coor).item(), | |||||
} | |||||
return total_loss, loss_stats |
@@ -0,0 +1,252 @@ | |||||
""" | |||||
# --------------------------------------------------------------------------------- | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Copyright (c) Microsoft | |||||
# Licensed under the MIT License. | |||||
# Written by Bin Xiao (Bin.Xiao@microsoft.com) | |||||
# Modified by Xingyi Zhou | |||||
# Refer from: https://github.com/xingyizhou/CenterNet | |||||
# Modifier: Nguyen Mau Dung (2020.08.09) | |||||
# ------------------------------------------------------------------------------ | |||||
""" | |||||
from __future__ import absolute_import | |||||
from __future__ import division | |||||
from __future__ import print_function | |||||
import os | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.utils.model_zoo as model_zoo | |||||
import torch.nn.functional as F | |||||
BN_MOMENTUM = 0.1 | |||||
model_urls = { | |||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', | |||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', | |||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', | |||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', | |||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', | |||||
} | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"""3x3 convolution with padding""" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = conv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.conv2 = conv3x3(planes, planes) | |||||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class Bottleneck(nn.Module): | |||||
expansion = 4 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(Bottleneck, self).__init__() | |||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | |||||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) | |||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
out = self.relu(out) | |||||
out = self.conv3(out) | |||||
out = self.bn3(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class PoseResNet(nn.Module): | |||||
def __init__(self, block, layers, heads, head_conv, **kwargs): | |||||
self.inplanes = 64 | |||||
self.deconv_with_bias = False | |||||
self.heads = heads | |||||
super(PoseResNet, self).__init__() | |||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) | |||||
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
self.conv_up_level1 = nn.Conv2d(768, 256, kernel_size=1, stride=1, padding=0) | |||||
self.conv_up_level2 = nn.Conv2d(384, 128, kernel_size=1, stride=1, padding=0) | |||||
self.conv_up_level3 = nn.Conv2d(192, 64, kernel_size=1, stride=1, padding=0) | |||||
fpn_channels = [256, 128, 64] | |||||
for fpn_idx, fpn_c in enumerate(fpn_channels): | |||||
for head in sorted(self.heads): | |||||
num_output = self.heads[head] | |||||
if head_conv > 0: | |||||
fc = nn.Sequential( | |||||
nn.Conv2d(fpn_c, head_conv, kernel_size=3, padding=1, bias=True), | |||||
nn.ReLU(inplace=True), | |||||
nn.Conv2d(head_conv, num_output, kernel_size=1, stride=1, padding=0)) | |||||
else: | |||||
fc = nn.Conv2d(in_channels=fpn_c, out_channels=num_output, kernel_size=1, stride=1, padding=0) | |||||
self.__setattr__('fpn{}_{}'.format(fpn_idx, head), fc) | |||||
def _make_layer(self, block, planes, blocks, stride=1): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks): | |||||
layers.append(block(self.inplanes, planes)) | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
_, _, input_h, input_w = x.size() | |||||
hm_h, hm_w = input_h // 4, input_w // 4 | |||||
x = self.conv1(x) | |||||
x = self.bn1(x) | |||||
x = self.relu(x) | |||||
x = self.maxpool(x) | |||||
out_layer1 = self.layer1(x) | |||||
out_layer2 = self.layer2(out_layer1) | |||||
out_layer3 = self.layer3(out_layer2) | |||||
out_layer4 = self.layer4(out_layer3) | |||||
# up_level1: torch.Size([b, 512, 14, 14]) | |||||
up_level1 = F.interpolate(out_layer4, scale_factor=2, mode='bilinear', align_corners=True) | |||||
concat_level1 = torch.cat((up_level1, out_layer3), dim=1) | |||||
# up_level2: torch.Size([b, 256, 28, 28]) | |||||
up_level2 = F.interpolate(self.conv_up_level1(concat_level1), scale_factor=2, mode='bilinear', | |||||
align_corners=True) | |||||
concat_level2 = torch.cat((up_level2, out_layer2), dim=1) | |||||
# up_level3: torch.Size([b, 128, 56, 56]), | |||||
up_level3 = F.interpolate(self.conv_up_level2(concat_level2), scale_factor=2, mode='bilinear', | |||||
align_corners=True) | |||||
# up_level4: torch.Size([b, 64, 56, 56]) | |||||
up_level4 = self.conv_up_level3(torch.cat((up_level3, out_layer1), dim=1)) | |||||
ret = {} | |||||
for head in self.heads: | |||||
temp_outs = [] | |||||
for fpn_idx, fdn_input in enumerate([up_level2, up_level3, up_level4]): | |||||
fpn_out = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))(fdn_input) | |||||
_, _, fpn_out_h, fpn_out_w = fpn_out.size() | |||||
# Make sure the added features having same size of heatmap output | |||||
if (fpn_out_w != hm_w) or (fpn_out_h != hm_h): | |||||
fpn_out = F.interpolate(fpn_out, size=(hm_h, hm_w)) | |||||
temp_outs.append(fpn_out) | |||||
# Take the softmax in the keypoint feature pyramid network | |||||
final_out = self.apply_kfpn(temp_outs) | |||||
ret[head] = final_out | |||||
return ret | |||||
def apply_kfpn(self, outs): | |||||
outs = torch.cat([out.unsqueeze(-1) for out in outs], dim=-1) | |||||
softmax_outs = F.softmax(outs, dim=-1) | |||||
ret_outs = (outs * softmax_outs).sum(dim=-1) | |||||
return ret_outs | |||||
def init_weights(self, num_layers, pretrained=True): | |||||
if pretrained: | |||||
# TODO: Check initial weights for head later | |||||
for fpn_idx in [0, 1, 2]: # 3 FPN layers | |||||
for head in self.heads: | |||||
final_layer = self.__getattr__('fpn{}_{}'.format(fpn_idx, head)) | |||||
for i, m in enumerate(final_layer.modules()): | |||||
if isinstance(m, nn.Conv2d): | |||||
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |||||
# print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||||
# print('=> init {}.bias as 0'.format(name)) | |||||
if m.weight.shape[0] == self.heads[head]: | |||||
if 'hm' in head: | |||||
nn.init.constant_(m.bias, -2.19) | |||||
else: | |||||
nn.init.normal_(m.weight, std=0.001) | |||||
nn.init.constant_(m.bias, 0) | |||||
# pretrained_state_dict = torch.load(pretrained) | |||||
url = model_urls['resnet{}'.format(num_layers)] | |||||
pretrained_state_dict = model_zoo.load_url(url) | |||||
print('=> loading pretrained model {}'.format(url)) | |||||
self.load_state_dict(pretrained_state_dict, strict=False) | |||||
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), | |||||
34: (BasicBlock, [3, 4, 6, 3]), | |||||
50: (Bottleneck, [3, 4, 6, 3]), | |||||
101: (Bottleneck, [3, 4, 23, 3]), | |||||
152: (Bottleneck, [3, 8, 36, 3])} | |||||
def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): | |||||
block_class, layers = resnet_spec[num_layers] | |||||
model = PoseResNet(block_class, layers, heads, head_conv=head_conv) | |||||
model.init_weights(num_layers, pretrained=imagenet_pretrained) | |||||
return model |
@@ -0,0 +1,134 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.09 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: utils functions that use for model | |||||
""" | |||||
import os | |||||
import sys | |||||
import torch | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from models import resnet, fpn_resnet | |||||
def create_model(configs): | |||||
"""Create model based on architecture name""" | |||||
try: | |||||
arch_parts = configs.arch.split('_') | |||||
num_layers = int(arch_parts[-1]) | |||||
except: | |||||
raise ValueError | |||||
if 'fpn_resnet' in configs.arch: | |||||
print('using ResNet architecture with feature pyramid') | |||||
model = fpn_resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv, | |||||
imagenet_pretrained=configs.imagenet_pretrained) | |||||
elif 'resnet' in configs.arch: | |||||
print('using ResNet architecture') | |||||
model = resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv, | |||||
imagenet_pretrained=configs.imagenet_pretrained) | |||||
else: | |||||
assert False, 'Undefined model backbone' | |||||
return model | |||||
def get_num_parameters(model): | |||||
"""Count number of trained parameters of the model""" | |||||
if hasattr(model, 'module'): | |||||
num_parameters = sum(p.numel() for p in model.module.parameters() if p.requires_grad) | |||||
else: | |||||
num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) | |||||
return num_parameters | |||||
def make_data_parallel(model, configs): | |||||
if configs.distributed: | |||||
# For multiprocessing distributed, DistributedDataParallel constructor | |||||
# should always set the single device scope, otherwise, | |||||
# DistributedDataParallel will use all available devices. | |||||
if configs.gpu_idx is not None: | |||||
torch.cuda.set_device(configs.gpu_idx) | |||||
model.cuda(configs.gpu_idx) | |||||
# When using a single GPU per process and per | |||||
# DistributedDataParallel, we need to divide the batch size | |||||
# ourselves based on the total number of GPUs we have | |||||
configs.batch_size = int(configs.batch_size / configs.ngpus_per_node) | |||||
configs.num_workers = int((configs.num_workers + configs.ngpus_per_node - 1) / configs.ngpus_per_node) | |||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[configs.gpu_idx]) | |||||
else: | |||||
model.cuda() | |||||
# DistributedDataParallel will divide and allocate batch_size to all | |||||
# available GPUs if device_ids are not set | |||||
model = torch.nn.parallel.DistributedDataParallel(model) | |||||
elif configs.gpu_idx is not None: | |||||
torch.cuda.set_device(configs.gpu_idx) | |||||
model = model.cuda(configs.gpu_idx) | |||||
else: | |||||
# DataParallel will divide and allocate batch_size to all available GPUs | |||||
model = torch.nn.DataParallel(model).cuda() | |||||
return model | |||||
if __name__ == '__main__': | |||||
import argparse | |||||
from torchsummary import summary | |||||
from easydict import EasyDict as edict | |||||
parser = argparse.ArgumentParser(description='RTM3D Implementation') | |||||
parser.add_argument('-a', '--arch', type=str, default='resnet_18', metavar='ARCH', | |||||
help='The name of the model architecture') | |||||
parser.add_argument('--head_conv', type=int, default=-1, | |||||
help='conv layer channels for output head' | |||||
'0 for no conv layer' | |||||
'-1 for default setting: ' | |||||
'64 for resnets and 256 for dla.') | |||||
configs = edict(vars(parser.parse_args())) | |||||
if configs.head_conv == -1: # init default head_conv | |||||
configs.head_conv = 256 if 'dla' in configs.arch else 64 | |||||
configs.num_classes = 3 | |||||
configs.num_vertexes = 8 | |||||
configs.num_center_offset = 2 | |||||
configs.num_vertexes_offset = 2 | |||||
configs.num_dimension = 3 | |||||
configs.num_rot = 8 | |||||
configs.num_depth = 1 | |||||
configs.num_wh = 2 | |||||
configs.heads = { | |||||
'hm_mc': configs.num_classes, | |||||
'hm_ver': configs.num_vertexes, | |||||
'vercoor': configs.num_vertexes * 2, | |||||
'cenoff': configs.num_center_offset, | |||||
'veroff': configs.num_vertexes_offset, | |||||
'dim': configs.num_dimension, | |||||
'rot': configs.num_rot, | |||||
'depth': configs.num_depth, | |||||
'wh': configs.num_wh | |||||
} | |||||
configs.device = torch.device('cuda:1') | |||||
# configs.device = torch.device('cpu') | |||||
model = create_model(configs).to(device=configs.device) | |||||
sample_input = torch.randn((1, 3, 224, 224)).to(device=configs.device) | |||||
# summary(model.cuda(1), (3, 224, 224)) | |||||
output = model(sample_input) | |||||
for hm_name, hm_out in output.items(): | |||||
print('hm_name: {}, hm_out size: {}'.format(hm_name, hm_out.size())) | |||||
print('number of parameters: {}'.format(get_num_parameters(model))) |
@@ -0,0 +1,284 @@ | |||||
""" | |||||
# --------------------------------------------------------------------------------- | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Copyright (c) Microsoft | |||||
# Licensed under the MIT License. | |||||
# Written by Bin Xiao (Bin.Xiao@microsoft.com) | |||||
# Modified by Xingyi Zhou | |||||
# Refer from: https://github.com/xingyizhou/CenterNet | |||||
# Modifier: Nguyen Mau Dung (2020.08.09) | |||||
# ------------------------------------------------------------------------------ | |||||
""" | |||||
from __future__ import absolute_import | |||||
from __future__ import division | |||||
from __future__ import print_function | |||||
import os | |||||
import torch | |||||
import torch.nn as nn | |||||
import torch.utils.model_zoo as model_zoo | |||||
BN_MOMENTUM = 0.1 | |||||
model_urls = { | |||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', | |||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', | |||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', | |||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', | |||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', | |||||
} | |||||
def conv3x3(in_planes, out_planes, stride=1): | |||||
"""3x3 convolution with padding""" | |||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
class BasicBlock(nn.Module): | |||||
expansion = 1 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(BasicBlock, self).__init__() | |||||
self.conv1 = conv3x3(inplanes, planes, stride) | |||||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.conv2 = conv3x3(planes, planes) | |||||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class Bottleneck(nn.Module): | |||||
expansion = 4 | |||||
def __init__(self, inplanes, planes, stride=1, downsample=None): | |||||
super(Bottleneck, self).__init__() | |||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||||
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||||
padding=1, bias=False) | |||||
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, | |||||
bias=False) | |||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion, | |||||
momentum=BN_MOMENTUM) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.downsample = downsample | |||||
self.stride = stride | |||||
def forward(self, x): | |||||
residual = x | |||||
out = self.conv1(x) | |||||
out = self.bn1(out) | |||||
out = self.relu(out) | |||||
out = self.conv2(out) | |||||
out = self.bn2(out) | |||||
out = self.relu(out) | |||||
out = self.conv3(out) | |||||
out = self.bn3(out) | |||||
if self.downsample is not None: | |||||
residual = self.downsample(x) | |||||
out += residual | |||||
out = self.relu(out) | |||||
return out | |||||
class PoseResNet(nn.Module): | |||||
def __init__(self, block, layers, heads, head_conv, **kwargs): | |||||
self.inplanes = 64 | |||||
self.deconv_with_bias = False | |||||
self.heads = heads | |||||
super(PoseResNet, self).__init__() | |||||
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||||
bias=False) | |||||
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) | |||||
self.relu = nn.ReLU(inplace=True) | |||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||||
self.layer1 = self._make_layer(block, 64, layers[0]) | |||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||||
# used for deconv layers | |||||
self.deconv_layers = self._make_deconv_layer( | |||||
3, | |||||
[256, 256, 256], | |||||
[4, 4, 4], | |||||
) | |||||
# self.final_layer = [] | |||||
for head in sorted(self.heads): | |||||
num_output = self.heads[head] | |||||
if head_conv > 0: | |||||
fc = nn.Sequential( | |||||
nn.Conv2d(256, head_conv, | |||||
kernel_size=3, padding=1, bias=True), | |||||
nn.ReLU(inplace=True), | |||||
nn.Conv2d(head_conv, num_output, | |||||
kernel_size=1, stride=1, padding=0)) | |||||
else: | |||||
fc = nn.Conv2d( | |||||
in_channels=256, | |||||
out_channels=num_output, | |||||
kernel_size=1, | |||||
stride=1, | |||||
padding=0 | |||||
) | |||||
self.__setattr__(head, fc) | |||||
# self.final_layer = nn.ModuleList(self.final_layer) | |||||
def _make_layer(self, block, planes, blocks, stride=1): | |||||
downsample = None | |||||
if stride != 1 or self.inplanes != planes * block.expansion: | |||||
downsample = nn.Sequential( | |||||
nn.Conv2d(self.inplanes, planes * block.expansion, | |||||
kernel_size=1, stride=stride, bias=False), | |||||
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), | |||||
) | |||||
layers = [] | |||||
layers.append(block(self.inplanes, planes, stride, downsample)) | |||||
self.inplanes = planes * block.expansion | |||||
for i in range(1, blocks): | |||||
layers.append(block(self.inplanes, planes)) | |||||
return nn.Sequential(*layers) | |||||
def _get_deconv_cfg(self, deconv_kernel, index): | |||||
if deconv_kernel == 4: | |||||
padding = 1 | |||||
output_padding = 0 | |||||
elif deconv_kernel == 3: | |||||
padding = 1 | |||||
output_padding = 1 | |||||
elif deconv_kernel == 2: | |||||
padding = 0 | |||||
output_padding = 0 | |||||
return deconv_kernel, padding, output_padding | |||||
def _make_deconv_layer(self, num_layers, num_filters, num_kernels): | |||||
assert num_layers == len(num_filters), \ | |||||
'ERROR: num_deconv_layers is different len(num_deconv_filters)' | |||||
assert num_layers == len(num_kernels), \ | |||||
'ERROR: num_deconv_layers is different len(num_deconv_filters)' | |||||
layers = [] | |||||
for i in range(num_layers): | |||||
kernel, padding, output_padding = \ | |||||
self._get_deconv_cfg(num_kernels[i], i) | |||||
planes = num_filters[i] | |||||
layers.append( | |||||
nn.ConvTranspose2d( | |||||
in_channels=self.inplanes, | |||||
out_channels=planes, | |||||
kernel_size=kernel, | |||||
stride=2, | |||||
padding=padding, | |||||
output_padding=output_padding, | |||||
bias=self.deconv_with_bias)) | |||||
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) | |||||
layers.append(nn.ReLU(inplace=True)) | |||||
self.inplanes = planes | |||||
return nn.Sequential(*layers) | |||||
def forward(self, x): | |||||
x = self.conv1(x) | |||||
x = self.bn1(x) | |||||
x = self.relu(x) | |||||
x = self.maxpool(x) | |||||
x = self.layer1(x) | |||||
x = self.layer2(x) | |||||
x = self.layer3(x) | |||||
x = self.layer4(x) | |||||
x = self.deconv_layers(x) | |||||
ret = {} | |||||
for head in self.heads: | |||||
ret[head] = self.__getattr__(head)(x) | |||||
return ret | |||||
def init_weights(self, num_layers, pretrained=True): | |||||
if pretrained: | |||||
# print('=> init resnet deconv weights from normal distribution') | |||||
for _, m in self.deconv_layers.named_modules(): | |||||
if isinstance(m, nn.ConvTranspose2d): | |||||
# print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||||
# print('=> init {}.bias as 0'.format(name)) | |||||
nn.init.normal_(m.weight, std=0.001) | |||||
if self.deconv_with_bias: | |||||
nn.init.constant_(m.bias, 0) | |||||
elif isinstance(m, nn.BatchNorm2d): | |||||
# print('=> init {}.weight as 1'.format(name)) | |||||
# print('=> init {}.bias as 0'.format(name)) | |||||
nn.init.constant_(m.weight, 1) | |||||
nn.init.constant_(m.bias, 0) | |||||
# print('=> init final conv weights from normal distribution') | |||||
for head in self.heads: | |||||
final_layer = self.__getattr__(head) | |||||
for i, m in enumerate(final_layer.modules()): | |||||
if isinstance(m, nn.Conv2d): | |||||
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |||||
# print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||||
# print('=> init {}.bias as 0'.format(name)) | |||||
if m.weight.shape[0] == self.heads[head]: | |||||
if 'hm' in head: | |||||
nn.init.constant_(m.bias, -2.19) | |||||
else: | |||||
nn.init.normal_(m.weight, std=0.001) | |||||
nn.init.constant_(m.bias, 0) | |||||
# pretrained_state_dict = torch.load(pretrained) | |||||
url = model_urls['resnet{}'.format(num_layers)] | |||||
pretrained_state_dict = model_zoo.load_url(url) | |||||
print('=> loading pretrained model {}'.format(url)) | |||||
self.load_state_dict(pretrained_state_dict, strict=False) | |||||
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), | |||||
34: (BasicBlock, [3, 4, 6, 3]), | |||||
50: (Bottleneck, [3, 4, 6, 3]), | |||||
101: (Bottleneck, [3, 4, 23, 3]), | |||||
152: (Bottleneck, [3, 8, 36, 3])} | |||||
def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): | |||||
block_class, layers = resnet_spec[num_layers] | |||||
model = PoseResNet(block_class, layers, heads, head_conv=head_conv) | |||||
model.init_weights(num_layers, pretrained=imagenet_pretrained) | |||||
return model |
@@ -0,0 +1,290 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: This script for training | |||||
""" | |||||
import time | |||||
import numpy as np | |||||
import sys | |||||
import random | |||||
import os | |||||
import warnings | |||||
warnings.filterwarnings("ignore", category=UserWarning) | |||||
import torch | |||||
from torch.utils.tensorboard import SummaryWriter | |||||
import torch.distributed as dist | |||||
import torch.multiprocessing as mp | |||||
import torch.utils.data.distributed | |||||
from tqdm import tqdm | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from data_process.kitti_dataloader import create_train_dataloader, create_val_dataloader | |||||
from models.model_utils import create_model, make_data_parallel, get_num_parameters | |||||
from utils.train_utils import create_optimizer, create_lr_scheduler, get_saved_state, save_checkpoint | |||||
from utils.torch_utils import reduce_tensor, to_python_float | |||||
from utils.misc import AverageMeter, ProgressMeter | |||||
from utils.logger import Logger | |||||
from config.train_config import parse_train_configs | |||||
from losses.losses import Compute_Loss | |||||
def main(): | |||||
configs = parse_train_configs() | |||||
# Re-produce results | |||||
if configs.seed is not None: | |||||
random.seed(configs.seed) | |||||
np.random.seed(configs.seed) | |||||
torch.manual_seed(configs.seed) | |||||
torch.backends.cudnn.deterministic = True | |||||
torch.backends.cudnn.benchmark = False | |||||
if configs.gpu_idx is not None: | |||||
print('You have chosen a specific GPU. This will completely disable data parallelism.') | |||||
if configs.dist_url == "env://" and configs.world_size == -1: | |||||
configs.world_size = int(os.environ["WORLD_SIZE"]) | |||||
configs.distributed = configs.world_size > 1 or configs.multiprocessing_distributed | |||||
if configs.multiprocessing_distributed: | |||||
configs.world_size = configs.ngpus_per_node * configs.world_size | |||||
mp.spawn(main_worker, nprocs=configs.ngpus_per_node, args=(configs,)) | |||||
else: | |||||
main_worker(configs.gpu_idx, configs) | |||||
def main_worker(gpu_idx, configs): | |||||
configs.gpu_idx = gpu_idx | |||||
# configs.device = torch.device('cpu' if configs.gpu_idx is None else 'cuda:{}'.format(configs.gpu_idx)) | |||||
if configs.distributed: | |||||
if configs.dist_url == "env://" and configs.rank == -1: | |||||
configs.rank = int(os.environ["RANK"]) | |||||
if configs.multiprocessing_distributed: | |||||
# For multiprocessing distributed training, rank needs to be the | |||||
# global rank among all the processes | |||||
configs.rank = configs.rank * configs.ngpus_per_node + gpu_idx | |||||
dist.init_process_group(backend=configs.dist_backend, init_method=configs.dist_url, | |||||
world_size=configs.world_size, rank=configs.rank) | |||||
configs.subdivisions = int(64 / configs.batch_size / configs.ngpus_per_node) | |||||
else: | |||||
configs.subdivisions = int(64 / configs.batch_size) | |||||
configs.is_master_node = (not configs.distributed) or ( | |||||
configs.distributed and (configs.rank % configs.ngpus_per_node == 0)) | |||||
if configs.is_master_node: | |||||
logger = Logger(configs.logs_dir, configs.saved_fn) | |||||
logger.info('>>> Created a new logger') | |||||
logger.info('>>> configs: {}'.format(configs)) | |||||
tb_writer = SummaryWriter(log_dir=os.path.join(configs.logs_dir, 'tensorboard')) | |||||
else: | |||||
logger = None | |||||
tb_writer = None | |||||
# model | |||||
model = create_model(configs) | |||||
# load weight from a checkpoint | |||||
if configs.pretrained_path is not None: | |||||
# assert os.path.isfile(configs.pretrained_path), "=> no checkpoint found at '{}'".format(configs.pretrained_path) | |||||
if os.path.isfile(configs.pretrained_path): | |||||
model_path = configs.pretrained_path | |||||
else: | |||||
# 取最后一个模型 | |||||
model_path = os.path.join(configs.pretrained_path, os.listdir(configs.pretrained_path)[-1]) | |||||
model.load_state_dict(torch.load(model_path, map_location=configs.device)) | |||||
if logger is not None: | |||||
logger.info('loaded pretrained model at {}'.format(configs.pretrained_path)) | |||||
# resume weights of model from a checkpoint | |||||
if configs.resume_path is not None: | |||||
assert os.path.isfile(configs.resume_path), "=> no checkpoint found at '{}'".format(configs.resume_path) | |||||
model.load_state_dict(torch.load(configs.resume_path, map_location='cpu')) | |||||
if logger is not None: | |||||
logger.info('resume training model from checkpoint {}'.format(configs.resume_path)) | |||||
# Data Parallel | |||||
model = make_data_parallel(model, configs) | |||||
# Make sure to create optimizer after moving the model to cuda | |||||
optimizer = create_optimizer(configs, model) | |||||
lr_scheduler = create_lr_scheduler(optimizer, configs) | |||||
configs.step_lr_in_epoch = False if configs.lr_type in ['multi_step', 'cosin', 'one_cycle'] else True | |||||
# resume optimizer, lr_scheduler from a checkpoint | |||||
if configs.resume_path is not None: | |||||
utils_path = configs.resume_path.replace('Model_', 'Utils_') | |||||
assert os.path.isfile(utils_path), "=> no checkpoint found at '{}'".format(utils_path) | |||||
utils_state_dict = torch.load(utils_path, map_location='cuda:{}'.format(configs.gpu_idx)) | |||||
optimizer.load_state_dict(utils_state_dict['optimizer']) | |||||
lr_scheduler.load_state_dict(utils_state_dict['lr_scheduler']) | |||||
configs.start_epoch = utils_state_dict['epoch'] + 1 | |||||
if configs.is_master_node: | |||||
num_parameters = get_num_parameters(model) | |||||
logger.info('number of trained parameters of the model: {}'.format(num_parameters)) | |||||
if logger is not None: | |||||
logger.info(">>> Loading dataset & getting dataloader...") | |||||
# Create dataloader | |||||
train_dataloader, train_sampler = create_train_dataloader(configs) | |||||
if logger is not None: | |||||
logger.info('number of batches in training set: {}'.format(len(train_dataloader))) | |||||
if configs.evaluate: | |||||
val_dataloader = create_val_dataloader(configs) | |||||
val_loss = validate(val_dataloader, model, configs) | |||||
print('val_loss: {:.4e}'.format(val_loss)) | |||||
return | |||||
for epoch in range(configs.start_epoch, configs.num_epochs + 1): | |||||
if logger is not None: | |||||
logger.info('{}'.format('*-' * 40)) | |||||
logger.info('{} {}/{} {}'.format('=' * 35, epoch, configs.num_epochs, '=' * 35)) | |||||
logger.info('{}'.format('*-' * 40)) | |||||
logger.info('>>> Epoch: [{}/{}]'.format(epoch, configs.num_epochs)) | |||||
if configs.distributed: | |||||
train_sampler.set_epoch(epoch) | |||||
# train for one epoch | |||||
train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer) | |||||
if (not configs.no_val) and (epoch % configs.checkpoint_freq == 0): | |||||
val_dataloader = create_val_dataloader(configs) | |||||
print('number of batches in val_dataloader: {}'.format(len(val_dataloader))) | |||||
val_loss = validate(val_dataloader, model, configs) | |||||
print('val_loss: {:.4e}'.format(val_loss)) | |||||
if tb_writer is not None: | |||||
tb_writer.add_scalar('Val_loss', val_loss, epoch) | |||||
# Save checkpoint | |||||
if configs.is_master_node and ((epoch % configs.checkpoint_freq) == 0): | |||||
model_state_dict, utils_state_dict = get_saved_state(model, optimizer, lr_scheduler, epoch, configs) | |||||
save_checkpoint(configs.checkpoints_dir, configs.saved_fn, model_state_dict, utils_state_dict, epoch) | |||||
if not configs.step_lr_in_epoch: | |||||
lr_scheduler.step() | |||||
if tb_writer is not None: | |||||
tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], epoch) | |||||
if tb_writer is not None: | |||||
tb_writer.close() | |||||
if configs.distributed: | |||||
cleanup() | |||||
def cleanup(): | |||||
dist.destroy_process_group() | |||||
def train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer): | |||||
batch_time = AverageMeter('Time', ':6.3f') | |||||
data_time = AverageMeter('Data', ':6.3f') | |||||
losses = AverageMeter('Loss', ':.4e') | |||||
progress = ProgressMeter(len(train_dataloader), [batch_time, data_time, losses], | |||||
prefix="Train - Epoch: [{}/{}]".format(epoch, configs.num_epochs)) | |||||
criterion = Compute_Loss(device=configs.device) | |||||
num_iters_per_epoch = len(train_dataloader) | |||||
# switch to train mode | |||||
model.train() | |||||
start_time = time.time() | |||||
for batch_idx, batch_data in enumerate(tqdm(train_dataloader)): | |||||
data_time.update(time.time() - start_time) | |||||
imgs, targets = batch_data | |||||
batch_size = imgs.size(0) | |||||
global_step = num_iters_per_epoch * (epoch - 1) + batch_idx + 1 | |||||
for k in targets.keys(): | |||||
targets[k] = targets[k].to(configs.device, non_blocking=True) | |||||
imgs = imgs.to(configs.device, non_blocking=True).float() | |||||
outputs = model(imgs) | |||||
total_loss, loss_stats = criterion(outputs, targets) | |||||
# For torch.nn.DataParallel case | |||||
if (not configs.distributed) and (configs.gpu_idx is None): | |||||
total_loss = torch.mean(total_loss) | |||||
# compute gradient and perform backpropagation | |||||
total_loss.backward() | |||||
if global_step % configs.subdivisions == 0: | |||||
optimizer.step() | |||||
# zero the parameter gradients | |||||
optimizer.zero_grad() | |||||
# Adjust learning rate | |||||
if configs.step_lr_in_epoch: | |||||
lr_scheduler.step() | |||||
if tb_writer is not None: | |||||
tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], global_step) | |||||
if configs.distributed: | |||||
reduced_loss = reduce_tensor(total_loss.data, configs.world_size) | |||||
else: | |||||
reduced_loss = total_loss.data | |||||
losses.update(to_python_float(reduced_loss), batch_size) | |||||
# measure elapsed time | |||||
# torch.cuda.synchronize() | |||||
batch_time.update(time.time() - start_time) | |||||
if tb_writer is not None: | |||||
if (global_step % configs.tensorboard_freq) == 0: | |||||
loss_stats['avg_loss'] = losses.avg | |||||
tb_writer.add_scalars('Train', loss_stats, global_step) | |||||
# Log message | |||||
if logger is not None: | |||||
if (global_step % configs.print_freq) == 0: | |||||
logger.info(progress.get_message(batch_idx)) | |||||
start_time = time.time() | |||||
def validate(val_dataloader, model, configs): | |||||
losses = AverageMeter('Loss', ':.4e') | |||||
criterion = Compute_Loss(device=configs.device) | |||||
# switch to train mode | |||||
model.eval() | |||||
with torch.no_grad(): | |||||
for batch_idx, batch_data in enumerate(tqdm(val_dataloader)): | |||||
imgs, targets = batch_data | |||||
batch_size = imgs.size(0) | |||||
for k in targets.keys(): | |||||
targets[k] = targets[k].to(configs.device, non_blocking=True) | |||||
imgs = imgs.to(configs.device, non_blocking=True).float() | |||||
outputs = model(imgs) | |||||
total_loss, loss_stats = criterion(outputs, targets) | |||||
# For torch.nn.DataParallel case | |||||
if (not configs.distributed) and (configs.gpu_idx is None): | |||||
total_loss = torch.mean(total_loss) | |||||
if configs.distributed: | |||||
reduced_loss = reduce_tensor(total_loss.data, configs.world_size) | |||||
else: | |||||
reduced_loss = total_loss.data | |||||
losses.update(to_python_float(reduced_loss), batch_size) | |||||
return losses.avg | |||||
if __name__ == '__main__': | |||||
try: | |||||
main() | |||||
except KeyboardInterrupt: | |||||
try: | |||||
cleanup() | |||||
sys.exit(0) | |||||
except SystemExit: | |||||
os._exit(0) |
@@ -0,0 +1,137 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: Demonstration utils script | |||||
""" | |||||
import argparse | |||||
import sys | |||||
import os | |||||
import warnings | |||||
import zipfile | |||||
warnings.filterwarnings("ignore", category=UserWarning) | |||||
from easydict import EasyDict as edict | |||||
import numpy as np | |||||
import wget | |||||
import torch | |||||
import cv2 | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from utils.misc import make_folder, time_synchronized | |||||
from utils.evaluation_utils import decode, post_processing | |||||
from utils.torch_utils import _sigmoid | |||||
def parse_demo_configs(): | |||||
parser = argparse.ArgumentParser(description='Demonstration config for the implementation') | |||||
parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||||
help='The name using for saving logs, models,...') | |||||
parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||||
help='The name of the model architecture') | |||||
parser.add_argument('--pretrained_path', type=str, | |||||
default='../checkpoints/fpn_resnet_18/fpn_resnet_18_epoch_300.pth', metavar='PATH', | |||||
help='the path of the pretrained checkpoint') | |||||
parser.add_argument('--foldername', type=str, default='2011_09_26_drive_0014_sync', metavar='FN', | |||||
help='Folder name for demostration dataset') | |||||
parser.add_argument('--K', type=int, default=50, | |||||
help='the number of top K') | |||||
parser.add_argument('--no_cuda', action='store_true', | |||||
help='If true, cuda is not used.') | |||||
parser.add_argument('--gpu_idx', default=0, type=int, | |||||
help='GPU index to use.') | |||||
parser.add_argument('--peak_thresh', type=float, default=0.2) | |||||
parser.add_argument('--output_format', type=str, default='image', metavar='PATH', | |||||
help='the type of the test output (support image or video)') | |||||
parser.add_argument('--output-width', type=int, default=608, | |||||
help='the width of showing output, the height maybe vary') | |||||
configs = edict(vars(parser.parse_args())) | |||||
configs.pin_memory = True | |||||
configs.distributed = False # For testing on 1 GPU only | |||||
configs.input_size = (608, 608) | |||||
configs.hm_size = (152, 152) | |||||
configs.down_ratio = 4 | |||||
configs.max_objects = 50 | |||||
configs.imagenet_pretrained = False | |||||
configs.head_conv = 64 | |||||
configs.num_classes = 3 | |||||
configs.num_center_offset = 2 | |||||
configs.num_z = 1 | |||||
configs.num_dim = 3 | |||||
configs.num_direction = 2 # sin, cos | |||||
configs.heads = { | |||||
'hm_cen': configs.num_classes, | |||||
'cen_offset': configs.num_center_offset, | |||||
'direction': configs.num_direction, | |||||
'z_coor': configs.num_z, | |||||
'dim': configs.num_dim | |||||
} | |||||
#################################################################### | |||||
##############Dataset, Checkpoints, and results dir configs######### | |||||
#################################################################### | |||||
configs.root_dir = '../' | |||||
configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo') | |||||
configs.calib_path = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo', 'calib.txt') | |||||
configs.results_dir = os.path.join(configs.root_dir, 'results', configs.saved_fn) | |||||
make_folder(configs.results_dir) | |||||
return configs | |||||
def download_and_unzip(demo_dataset_dir, download_url): | |||||
filename = download_url.split('/')[-1] | |||||
filepath = os.path.join(demo_dataset_dir, filename) | |||||
if os.path.isfile(filepath): | |||||
print('The dataset have been downloaded') | |||||
return | |||||
print('\nDownloading data for demonstration...') | |||||
wget.download(download_url, filepath) | |||||
print('\nUnzipping the downloaded data...') | |||||
with zipfile.ZipFile(filepath, "r") as zip_ref: | |||||
zip_ref.extractall(os.path.join(demo_dataset_dir, filename[:-4])) | |||||
def do_detect(configs, model, bevmap, is_front): | |||||
if not is_front: | |||||
bevmap = torch.flip(bevmap, [1, 2]) | |||||
input_bev_maps = bevmap.unsqueeze(0).to(configs.device, non_blocking=True).float() | |||||
t1 = time_synchronized() | |||||
outputs = model(input_bev_maps) | |||||
outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||||
outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||||
# detections size (batch_size, K, 10) | |||||
detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], | |||||
outputs['dim'], K=configs.K) | |||||
detections = detections.cpu().numpy().astype(np.float32) | |||||
detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) | |||||
t2 = time_synchronized() | |||||
# Inference speed | |||||
fps = 1 / (t2 - t1) | |||||
return detections[0], bevmap, fps | |||||
def write_credit(img, org_author=(500, 400), text_author='github.com/maudzung', org_fps=(50, 1000), fps=None): | |||||
font = cv2.FONT_HERSHEY_SIMPLEX | |||||
fontScale = 1 | |||||
color = (255, 255, 255) | |||||
thickness = 2 | |||||
cv2.putText(img, text_author, org_author, font, fontScale, color, thickness, cv2.LINE_AA) | |||||
cv2.putText(img, 'Speed: {:.1f} FPS'.format(fps), org_fps, font, fontScale, color, thickness, cv2.LINE_AA) |
@@ -0,0 +1,183 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.17 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: The utils for evaluation | |||||
# Refer from: https://github.com/xingyizhou/CenterNet | |||||
""" | |||||
from __future__ import division | |||||
import os | |||||
import sys | |||||
import torch | |||||
import numpy as np | |||||
import torch.nn.functional as F | |||||
import cv2 | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
import config.kitti_config as cnf | |||||
from data_process.kitti_bev_utils import drawRotatedBox | |||||
def _nms(heat, kernel=3): | |||||
pad = (kernel - 1) // 2 | |||||
hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) | |||||
keep = (hmax == heat).float() | |||||
return heat * keep | |||||
def _gather_feat(feat, ind, mask=None): | |||||
dim = feat.size(2) | |||||
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||||
feat = feat.gather(1, ind) | |||||
if mask is not None: | |||||
mask = mask.unsqueeze(2).expand_as(feat) | |||||
feat = feat[mask] | |||||
feat = feat.view(-1, dim) | |||||
return feat | |||||
def _transpose_and_gather_feat(feat, ind): | |||||
feat = feat.permute(0, 2, 3, 1).contiguous() | |||||
feat = feat.view(feat.size(0), -1, feat.size(3)) | |||||
feat = _gather_feat(feat, ind) | |||||
return feat | |||||
def _topk(scores, K=40): | |||||
batch, cat, height, width = scores.size() | |||||
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||||
topk_inds = topk_inds % (height * width) | |||||
topk_ys = (torch.floor_divide(topk_inds, width)).float() | |||||
topk_xs = (topk_inds % width).int().float() | |||||
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) | |||||
topk_clses = (torch.floor_divide(topk_ind, K)).int() | |||||
topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) | |||||
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) | |||||
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) | |||||
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs | |||||
def _topk_channel(scores, K=40): | |||||
batch, cat, height, width = scores.size() | |||||
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||||
topk_inds = topk_inds % (height * width) | |||||
topk_ys = (topk_inds / width).int().float() | |||||
topk_xs = (topk_inds % width).int().float() | |||||
return topk_scores, topk_inds, topk_ys, topk_xs | |||||
def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40): | |||||
batch_size, num_classes, height, width = hm_cen.size() | |||||
hm_cen = _nms(hm_cen) | |||||
scores, inds, clses, ys, xs = _topk(hm_cen, K=K) | |||||
if cen_offset is not None: | |||||
cen_offset = _transpose_and_gather_feat(cen_offset, inds) | |||||
cen_offset = cen_offset.view(batch_size, K, 2) | |||||
xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1] | |||||
ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2] | |||||
else: | |||||
xs = xs.view(batch_size, K, 1) + 0.5 | |||||
ys = ys.view(batch_size, K, 1) + 0.5 | |||||
direction = _transpose_and_gather_feat(direction, inds) | |||||
direction = direction.view(batch_size, K, 2) | |||||
z_coor = _transpose_and_gather_feat(z_coor, inds) | |||||
z_coor = z_coor.view(batch_size, K, 1) | |||||
dim = _transpose_and_gather_feat(dim, inds) | |||||
dim = dim.view(batch_size, K, 3) | |||||
clses = clses.view(batch_size, K, 1).float() | |||||
scores = scores.view(batch_size, K, 1) | |||||
# (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||||
# (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||||
# detections: [batch_size, K, 10] | |||||
detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2) | |||||
return detections | |||||
def get_yaw(direction): | |||||
return np.arctan2(direction[:, 0:1], direction[:, 1:2]) | |||||
def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2): | |||||
""" | |||||
:param detections: [batch_size, K, 10] | |||||
# (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||||
# (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||||
:return: | |||||
""" | |||||
# TODO: Need to consider rescale to the original scale: x, y | |||||
ret = [] | |||||
for i in range(detections.shape[0]): | |||||
top_preds = {} | |||||
classes = detections[i, :, -1] | |||||
for j in range(num_classes): | |||||
inds = (classes == j) | |||||
# x, y, z, h, w, l, yaw | |||||
top_preds[j] = np.concatenate([ | |||||
detections[i, inds, 0:1], | |||||
detections[i, inds, 1:2] * down_ratio, | |||||
detections[i, inds, 2:3] * down_ratio, | |||||
detections[i, inds, 3:4], | |||||
detections[i, inds, 4:5], | |||||
detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH, | |||||
detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT, | |||||
get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1) | |||||
# Filter by peak_thresh | |||||
if len(top_preds[j]) > 0: | |||||
keep_inds = (top_preds[j][:, 0] > peak_thresh) | |||||
top_preds[j] = top_preds[j][keep_inds] | |||||
ret.append(top_preds) | |||||
return ret | |||||
def draw_predictions(img, detections, num_classes=3): | |||||
for j in range(num_classes): | |||||
if len(detections[j]) > 0: | |||||
for det in detections[j]: | |||||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||||
drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)]) | |||||
return img | |||||
def convert_det_to_real_values(detections, num_classes=3): | |||||
kitti_dets = [] | |||||
for cls_id in range(num_classes): | |||||
if len(detections[cls_id]) > 0: | |||||
for det in detections[cls_id]: | |||||
# (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||||
_score, _x, _y, _z, _h, _w, _l, _yaw = det | |||||
_yaw = round(-_yaw, 2) | |||||
x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2) | |||||
y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2) | |||||
z = round(_z + cnf.boundary['minZ'], 2) | |||||
w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2) | |||||
l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2) | |||||
h = round(_h/1, 2) | |||||
kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw]) | |||||
return np.array(kitti_dets) |
@@ -0,0 +1,49 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.07.31 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: This script for logging | |||||
""" | |||||
import os | |||||
import logging | |||||
class Logger(): | |||||
""" | |||||
Create logger to save logs during training | |||||
Args: | |||||
logs_dir: | |||||
saved_fn: | |||||
Returns: | |||||
""" | |||||
def __init__(self, logs_dir, saved_fn): | |||||
logger_fn = 'logger_{}.txt'.format(saved_fn) | |||||
logger_path = os.path.join(logs_dir, logger_fn) | |||||
self.logger = logging.getLogger(__name__) | |||||
self.logger.setLevel(logging.INFO) | |||||
# formatter = logging.Formatter('%(asctime)s:File %(module)s.py:Func %(funcName)s:Line %(lineno)d:%(levelname)s: %(message)s') | |||||
formatter = logging.Formatter( | |||||
'%(asctime)s: %(module)s.py - %(funcName)s(), at Line %(lineno)d:%(levelname)s:\n%(message)s') | |||||
file_handler = logging.FileHandler(logger_path) | |||||
file_handler.setLevel(logging.INFO) | |||||
file_handler.setFormatter(formatter) | |||||
stream_handler = logging.StreamHandler() | |||||
stream_handler.setFormatter(formatter) | |||||
self.logger.addHandler(file_handler) | |||||
self.logger.addHandler(stream_handler) | |||||
def info(self, message): | |||||
self.logger.info(message) |
@@ -0,0 +1,312 @@ | |||||
import torch | |||||
from torch.optim import SGD, lr_scheduler | |||||
import numpy as np | |||||
class _LRMomentumScheduler(lr_scheduler._LRScheduler): | |||||
def __init__(self, optimizer, last_epoch=-1): | |||||
if last_epoch == -1: | |||||
for group in optimizer.param_groups: | |||||
group.setdefault('initial_momentum', group['momentum']) | |||||
else: | |||||
for i, group in enumerate(optimizer.param_groups): | |||||
if 'initial_momentum' not in group: | |||||
raise KeyError("param 'initial_momentum' is not specified " | |||||
"in param_groups[{}] when resuming an optimizer".format(i)) | |||||
self.base_momentums = list(map(lambda group: group['initial_momentum'], optimizer.param_groups)) | |||||
super().__init__(optimizer, last_epoch) | |||||
def get_lr(self): | |||||
raise NotImplementedError | |||||
def get_momentum(self): | |||||
raise NotImplementedError | |||||
def step(self, epoch=None): | |||||
if epoch is None: | |||||
epoch = self.last_epoch + 1 | |||||
self.last_epoch = epoch | |||||
for param_group, lr, momentum in zip(self.optimizer.param_groups, self.get_lr(), self.get_momentum()): | |||||
param_group['lr'] = lr | |||||
param_group['momentum'] = momentum | |||||
class ParameterUpdate(object): | |||||
"""A callable class used to define an arbitrary schedule defined by a list. | |||||
This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply | |||||
the given schedule. | |||||
Arguments: | |||||
params {list or numpy.array} -- List or numpy array defining parameter schedule. | |||||
base_param {float} -- Parameter value used to initialize the optimizer. | |||||
""" | |||||
def __init__(self, params, base_param): | |||||
self.params = np.hstack([params, 0]) | |||||
self.base_param = base_param | |||||
def __call__(self, epoch): | |||||
return self.params[epoch] / self.base_param | |||||
def apply_lambda(last_epoch, bases, lambdas): | |||||
return [base * lmbda(last_epoch) for lmbda, base in zip(lambdas, bases)] | |||||
class LambdaScheduler(_LRMomentumScheduler): | |||||
"""Sets the learning rate and momentum of each parameter group to the initial lr and momentum | |||||
times a given function. When last_epoch=-1, sets initial lr and momentum to the optimizer | |||||
values. | |||||
Args: | |||||
optimizer (Optimizer): Wrapped optimizer. | |||||
lr_lambda (function or list): A function which computes a multiplicative | |||||
factor given an integer parameter epoch, or a list of such | |||||
functions, one for each group in optimizer.param_groups. | |||||
Default: lambda x:x. | |||||
momentum_lambda (function or list): As for lr_lambda but applied to momentum. | |||||
Default: lambda x:x. | |||||
last_epoch (int): The index of last epoch. Default: -1. | |||||
Example: | |||||
>>> # Assuming optimizer has two groups. | |||||
>>> lr_lambda = [ | |||||
... lambda epoch: epoch // 30, | |||||
... lambda epoch: 0.95 ** epoch | |||||
... ] | |||||
>>> mom_lambda = [ | |||||
... lambda epoch: max(0, (50 - epoch) // 50), | |||||
... lambda epoch: 0.99 ** epoch | |||||
... ] | |||||
>>> scheduler = LambdaScheduler(optimizer, lr_lambda, mom_lambda) | |||||
>>> for epoch in range(100): | |||||
>>> train(...) | |||||
>>> validate(...) | |||||
>>> scheduler.step() | |||||
""" | |||||
def __init__(self, optimizer, lr_lambda=lambda x: x, momentum_lambda=lambda x: x, last_epoch=-1): | |||||
self.optimizer = optimizer | |||||
if not isinstance(lr_lambda, (list, tuple)): | |||||
self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups) | |||||
else: | |||||
if len(lr_lambda) != len(optimizer.param_groups): | |||||
raise ValueError("Expected {} lr_lambdas, but got {}".format( | |||||
len(optimizer.param_groups), len(lr_lambda))) | |||||
self.lr_lambdas = list(lr_lambda) | |||||
if not isinstance(momentum_lambda, (list, tuple)): | |||||
self.momentum_lambdas = [momentum_lambda] * len(optimizer.param_groups) | |||||
else: | |||||
if len(momentum_lambda) != len(optimizer.param_groups): | |||||
raise ValueError("Expected {} momentum_lambdas, but got {}".format( | |||||
len(optimizer.param_groups), len(momentum_lambda))) | |||||
self.momentum_lambdas = list(momentum_lambda) | |||||
self.last_epoch = last_epoch | |||||
super().__init__(optimizer, last_epoch) | |||||
def state_dict(self): | |||||
"""Returns the state of the scheduler as a :class:`dict`. | |||||
It contains an entry for every variable in self.__dict__ which | |||||
is not the optimizer. | |||||
The learning rate and momentum lambda functions will only be saved if they are | |||||
callable objects and not if they are functions or lambdas. | |||||
""" | |||||
state_dict = {key: value for key, value in self.__dict__.items() | |||||
if key not in ('optimizer', 'lr_lambdas', 'momentum_lambdas')} | |||||
state_dict['lr_lambdas'] = [None] * len(self.lr_lambdas) | |||||
state_dict['momentum_lambdas'] = [None] * len(self.momentum_lambdas) | |||||
for idx, (lr_fn, mom_fn) in enumerate(zip(self.lr_lambdas, self.momentum_lambdas)): | |||||
if not isinstance(lr_fn, types.FunctionType): | |||||
state_dict['lr_lambdas'][idx] = lr_fn.__dict__.copy() | |||||
if not isinstance(mom_fn, types.FunctionType): | |||||
state_dict['momentum_lambdas'][idx] = mom_fn.__dict__.copy() | |||||
return state_dict | |||||
def load_state_dict(self, state_dict): | |||||
"""Loads the schedulers state. | |||||
Arguments: | |||||
state_dict (dict): scheduler state. Should be an object returned | |||||
from a call to :meth:`state_dict`. | |||||
""" | |||||
lr_lambdas = state_dict.pop('lr_lambdas') | |||||
momentum_lambdas = state_dict.pop('momentum_lambdas') | |||||
self.__dict__.update(state_dict) | |||||
for idx, fn in enumerate(lr_lambdas): | |||||
if fn is not None: | |||||
self.lr_lambdas[idx].__dict__.update(fn) | |||||
for idx, fn in enumerate(momentum_lambdas): | |||||
if fn is not None: | |||||
self.momentum_lambdas[idx].__dict__.update(fn) | |||||
def get_lr(self): | |||||
return apply_lambda(self.last_epoch, self.base_lrs, self.lr_lambdas) | |||||
def get_momentum(self): | |||||
return apply_lambda(self.last_epoch, self.base_momentums, self.momentum_lambdas) | |||||
class ParameterUpdate(object): | |||||
"""A callable class used to define an arbitrary schedule defined by a list. | |||||
This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply | |||||
the given schedule. If a base_param is zero, no updates are applied. | |||||
Arguments: | |||||
params {list or numpy.array} -- List or numpy array defining parameter schedule. | |||||
base_param {float} -- Parameter value used to initialize the optimizer. | |||||
""" | |||||
def __init__(self, params, base_param): | |||||
self.params = np.hstack([params, 0]) | |||||
self.base_param = base_param | |||||
if base_param < 1e-12: | |||||
self.base_param = 1 | |||||
self.params = self.params * 0.0 + 1.0 | |||||
def __call__(self, epoch): | |||||
return self.params[epoch] / self.base_param | |||||
class ListScheduler(LambdaScheduler): | |||||
"""Sets the learning rate and momentum of each parameter group to values defined by lists. | |||||
When last_epoch=-1, sets initial lr and momentum to the optimizer values. One of both of lr | |||||
and momentum schedules may be specified. | |||||
Note that the parameters used to initialize the optimizer are overriden by those defined by | |||||
this scheduler. | |||||
Args: | |||||
optimizer (Optimizer): Wrapped optimizer. | |||||
lrs (list or numpy.ndarray): A list of learning rates, or a list of lists, one for each | |||||
parameter group. One- or two-dimensional numpy arrays may also be passed. | |||||
momentum (list or numpy.ndarray): A list of momentums, or a list of lists, one for each | |||||
parameter group. One- or two-dimensional numpy arrays may also be passed. | |||||
last_epoch (int): The index of last epoch. Default: -1. | |||||
Example: | |||||
>>> # Assuming optimizer has two groups. | |||||
>>> lrs = [ | |||||
... np.linspace(0.01, 0.1, 100), | |||||
... np.logspace(-2, 0, 100) | |||||
... ] | |||||
>>> momentums = [ | |||||
... np.linspace(0.85, 0.95, 100), | |||||
... np.linspace(0.8, 0.99, 100) | |||||
... ] | |||||
>>> scheduler = ListScheduler(optimizer, lrs, momentums) | |||||
>>> for epoch in range(100): | |||||
>>> train(...) | |||||
>>> validate(...) | |||||
>>> scheduler.step() | |||||
""" | |||||
def __init__(self, optimizer, lrs=None, momentums=None, last_epoch=-1): | |||||
groups = optimizer.param_groups | |||||
if lrs is None: | |||||
lr_lambda = lambda x: x | |||||
else: | |||||
lrs = np.array(lrs) if isinstance(lrs, (list, tuple)) else lrs | |||||
if len(lrs.shape) == 1: | |||||
lr_lambda = [ParameterUpdate(lrs, g['lr']) for g in groups] | |||||
else: | |||||
lr_lambda = [ParameterUpdate(l, g['lr']) for l, g in zip(lrs, groups)] | |||||
if momentums is None: | |||||
momentum_lambda = lambda x: x | |||||
else: | |||||
momentums = np.array(momentums) if isinstance(momentums, (list, tuple)) else momentums | |||||
if len(momentums.shape) == 1: | |||||
momentum_lambda = [ParameterUpdate(momentums, g['momentum']) for g in groups] | |||||
else: | |||||
momentum_lambda = [ParameterUpdate(l, g['momentum']) for l, g in zip(momentums, groups)] | |||||
super().__init__(optimizer, lr_lambda, momentum_lambda) | |||||
class RangeFinder(ListScheduler): | |||||
"""Scheduler class that implements the LR range search specified in: | |||||
A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch | |||||
size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820. | |||||
Logarithmically spaced learning rates from 1e-7 to 1 are searched. The number of increments in | |||||
that range is determined by 'epochs'. | |||||
Note that the parameters used to initialize the optimizer are overriden by those defined by | |||||
this scheduler. | |||||
Args: | |||||
optimizer (Optimizer): Wrapped optimizer. | |||||
epochs (int): Number of epochs over which to run test. | |||||
Example: | |||||
>>> scheduler = RangeFinder(optimizer, 100) | |||||
>>> for epoch in range(100): | |||||
>>> train(...) | |||||
>>> validate(...) | |||||
>>> scheduler.step() | |||||
""" | |||||
def __init__(self, optimizer, epochs): | |||||
lrs = np.logspace(-7, 0, epochs) | |||||
super().__init__(optimizer, lrs) | |||||
class OneCyclePolicy(ListScheduler): | |||||
"""Scheduler class that implements the 1cycle policy search specified in: | |||||
A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch | |||||
size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820. | |||||
Args: | |||||
optimizer (Optimizer): Wrapped optimizer. | |||||
lr (float or list). Maximum learning rate in range. If a list of values is passed, they | |||||
should correspond to parameter groups. | |||||
epochs (int): The number of epochs to use during search. | |||||
momentum_rng (list). Optional upper and lower momentum values (may be both equal). Set to | |||||
None to run without momentum. Default: [0.85, 0.95]. If a list of lists is passed, they | |||||
should correspond to parameter groups. | |||||
phase_ratio (float): Fraction of epochs used for the increasing and decreasing phase of | |||||
the schedule. For example, if phase_ratio=0.45 and epochs=100, the learning rate will | |||||
increase from lr/10 to lr over 45 epochs, then decrease back to lr/10 over 45 epochs, | |||||
then decrease to lr/100 over the remaining 10 epochs. Default: 0.45. | |||||
""" | |||||
def __init__(self, optimizer, lr, epochs, momentum_rng=[0.85, 0.95], phase_ratio=0.45): | |||||
phase_epochs = int(phase_ratio * epochs) | |||||
if isinstance(lr, (list, tuple)): | |||||
lrs = [ | |||||
np.hstack([ | |||||
np.linspace(l * 1e-1, l, phase_epochs), | |||||
np.linspace(l, l * 1e-1, phase_epochs), | |||||
np.linspace(l * 1e-1, l * 1e-2, epochs - 2 * phase_epochs), | |||||
]) for l in lr | |||||
] | |||||
else: | |||||
lrs = np.hstack([ | |||||
np.linspace(lr * 1e-1, lr, phase_epochs), | |||||
np.linspace(lr, lr * 1e-1, phase_epochs), | |||||
np.linspace(lr * 1e-1, lr * 1e-2, epochs - 2 * phase_epochs), | |||||
]) | |||||
if momentum_rng is not None: | |||||
momentum_rng = np.array(momentum_rng) | |||||
if len(momentum_rng.shape) == 2: | |||||
for i, g in enumerate(optimizer.param_groups): | |||||
g['momentum'] = momentum_rng[i][1] | |||||
momentums = [ | |||||
np.hstack([ | |||||
np.linspace(m[1], m[0], phase_epochs), | |||||
np.linspace(m[0], m[1], phase_epochs), | |||||
np.linspace(m[1], m[1], epochs - 2 * phase_epochs), | |||||
]) for m in momentum_rng | |||||
] | |||||
else: | |||||
for i, g in enumerate(optimizer.param_groups): | |||||
g['momentum'] = momentum_rng[1] | |||||
momentums = np.hstack([ | |||||
np.linspace(momentum_rng[1], momentum_rng[0], phase_epochs), | |||||
np.linspace(momentum_rng[0], momentum_rng[1], phase_epochs), | |||||
np.linspace(momentum_rng[1], momentum_rng[1], epochs - 2 * phase_epochs), | |||||
]) | |||||
else: | |||||
momentums = None | |||||
super().__init__(optimizer, lrs, momentums) |
@@ -0,0 +1,71 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.07.31 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: This script for logging | |||||
""" | |||||
import os | |||||
import torch | |||||
import time | |||||
def make_folder(folder_name): | |||||
if not os.path.exists(folder_name): | |||||
os.makedirs(folder_name) | |||||
# or os.makedirs(folder_name, exist_ok=True) | |||||
class AverageMeter(object): | |||||
"""Computes and stores the average and current value""" | |||||
def __init__(self, name, fmt=':f'): | |||||
self.name = name | |||||
self.fmt = fmt | |||||
self.reset() | |||||
def reset(self): | |||||
self.val = 0 | |||||
self.avg = 0 | |||||
self.sum = 0 | |||||
self.count = 0 | |||||
def update(self, val, n=1): | |||||
self.val = val | |||||
self.sum += val * n | |||||
self.count += n | |||||
self.avg = self.sum / self.count | |||||
def __str__(self): | |||||
fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' | |||||
return fmtstr.format(**self.__dict__) | |||||
class ProgressMeter(object): | |||||
def __init__(self, num_batches, meters, prefix=""): | |||||
self.batch_fmtstr = self._get_batch_fmtstr(num_batches) | |||||
self.meters = meters | |||||
self.prefix = prefix | |||||
def display(self, batch): | |||||
entries = [self.prefix + self.batch_fmtstr.format(batch)] | |||||
entries += [str(meter) for meter in self.meters] | |||||
print('\t'.join(entries)) | |||||
def get_message(self, batch): | |||||
entries = [self.prefix + self.batch_fmtstr.format(batch)] | |||||
entries += [str(meter) for meter in self.meters] | |||||
return '\t'.join(entries) | |||||
def _get_batch_fmtstr(self, num_batches): | |||||
num_digits = len(str(num_batches // 1)) | |||||
fmt = '{:' + str(num_digits) + 'd}' | |||||
return '[' + fmt + '/' + fmt.format(num_batches) + ']' | |||||
def time_synchronized(): | |||||
torch.cuda.synchronize() if torch.cuda.is_available() else None | |||||
return time.time() |
@@ -0,0 +1,45 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.09 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: some utilities of torch (conversion) | |||||
----------------------------------------------------------------------------------- | |||||
""" | |||||
import torch | |||||
import torch.distributed as dist | |||||
__all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu', 'reduce_tensor', 'to_python_float', '_sigmoid'] | |||||
def convert2cpu(gpu_matrix): | |||||
return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) | |||||
def convert2cpu_long(gpu_matrix): | |||||
return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) | |||||
def to_cpu(tensor): | |||||
return tensor.detach().cpu() | |||||
def reduce_tensor(tensor, world_size): | |||||
rt = tensor.clone() | |||||
dist.all_reduce(rt, op=dist.reduce_op.SUM) | |||||
rt /= world_size | |||||
return rt | |||||
def to_python_float(t): | |||||
if hasattr(t, 'item'): | |||||
return t.item() | |||||
else: | |||||
return t[0] | |||||
def _sigmoid(x): | |||||
return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) |
@@ -0,0 +1,140 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.09 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: utils functions that use for training process | |||||
""" | |||||
import copy | |||||
import os | |||||
import math | |||||
import sys | |||||
import torch | |||||
from torch.optim.lr_scheduler import LambdaLR | |||||
import matplotlib.pyplot as plt | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
from utils.lr_scheduler import OneCyclePolicy | |||||
def create_optimizer(configs, model): | |||||
"""Create optimizer for training process | |||||
""" | |||||
if hasattr(model, 'module'): | |||||
train_params = [param for param in model.module.parameters() if param.requires_grad] | |||||
else: | |||||
train_params = [param for param in model.parameters() if param.requires_grad] | |||||
if configs.optimizer_type == 'sgd': | |||||
optimizer = torch.optim.SGD(train_params, lr=configs.lr, momentum=configs.momentum, nesterov=True) | |||||
elif configs.optimizer_type == 'adam': | |||||
optimizer = torch.optim.Adam(train_params, lr=configs.lr, weight_decay=configs.weight_decay) | |||||
else: | |||||
assert False, "Unknown optimizer type" | |||||
return optimizer | |||||
def create_lr_scheduler(optimizer, configs): | |||||
"""Create learning rate scheduler for training process""" | |||||
if configs.lr_type == 'multi_step': | |||||
def multi_step_scheduler(i): | |||||
if i < configs.steps[0]: | |||||
factor = 1. | |||||
elif i < configs.steps[1]: | |||||
factor = 0.1 | |||||
else: | |||||
factor = 0.01 | |||||
return factor | |||||
lr_scheduler = LambdaLR(optimizer, multi_step_scheduler) | |||||
elif configs.lr_type == 'cosin': | |||||
# Scheduler https://arxiv.org/pdf/1812.01187.pdf | |||||
lf = lambda x: (((1 + math.cos(x * math.pi / configs.num_epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine | |||||
lr_scheduler = LambdaLR(optimizer, lr_lambda=lf) | |||||
elif configs.lr_type == 'one_cycle': | |||||
lr_scheduler = OneCyclePolicy(optimizer, configs.lr, configs.num_epochs, momentum_rng=[0.85, 0.95], | |||||
phase_ratio=0.45) | |||||
else: | |||||
raise ValueError | |||||
plot_lr_scheduler(optimizer, lr_scheduler, configs.num_epochs, save_dir=configs.logs_dir, lr_type=configs.lr_type) | |||||
return lr_scheduler | |||||
def get_saved_state(model, optimizer, lr_scheduler, epoch, configs): | |||||
"""Get the information to save with checkpoints""" | |||||
if hasattr(model, 'module'): | |||||
model_state_dict = model.module.state_dict() | |||||
else: | |||||
model_state_dict = model.state_dict() | |||||
utils_state_dict = { | |||||
'epoch': epoch, | |||||
'configs': configs, | |||||
'optimizer': copy.deepcopy(optimizer.state_dict()), | |||||
'lr_scheduler': copy.deepcopy(lr_scheduler.state_dict()) | |||||
} | |||||
return model_state_dict, utils_state_dict | |||||
def save_checkpoint(checkpoints_dir, saved_fn, model_state_dict, utils_state_dict, epoch): | |||||
"""Save checkpoint every epoch only is best model or after every checkpoint_freq epoch""" | |||||
model_save_path = os.path.join(checkpoints_dir, 'Model_{}_epoch_{}.pth'.format(saved_fn, epoch)) | |||||
utils_save_path = os.path.join(checkpoints_dir, 'Utils_{}_epoch_{}.pth'.format(saved_fn, epoch)) | |||||
torch.save(model_state_dict, model_save_path) | |||||
torch.save(utils_state_dict, utils_save_path) | |||||
print('save a checkpoint at {}'.format(model_save_path)) | |||||
def plot_lr_scheduler(optimizer, scheduler, num_epochs=300, save_dir='', lr_type=''): | |||||
# Plot LR simulating training for full num_epochs | |||||
optimizer, scheduler = copy.copy(optimizer), copy.copy(scheduler) # do not modify originals | |||||
y = [] | |||||
for _ in range(num_epochs): | |||||
scheduler.step() | |||||
y.append(optimizer.param_groups[0]['lr']) | |||||
plt.plot(y, '.-', label='LR') | |||||
plt.xlabel('epoch') | |||||
plt.ylabel('LR') | |||||
plt.grid() | |||||
plt.xlim(0, num_epochs) | |||||
plt.ylim(0) | |||||
plt.tight_layout() | |||||
plt.savefig(os.path.join(save_dir, 'LR_{}.png'.format(lr_type)), dpi=200) | |||||
if __name__ == '__main__': | |||||
from easydict import EasyDict as edict | |||||
from torchvision.models import resnet18 | |||||
configs = edict() | |||||
configs.steps = [150, 180] | |||||
configs.lr_type = 'one_cycle' # multi_step, cosin, one_csycle | |||||
configs.logs_dir = '../../logs/' | |||||
configs.num_epochs = 50 | |||||
configs.lr = 2.25e-3 | |||||
net = resnet18() | |||||
optimizer = torch.optim.Adam(net.parameters(), 0.0002) | |||||
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 6, 9], gamma=0.1) | |||||
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.1) | |||||
scheduler = create_lr_scheduler(optimizer, configs) | |||||
for i in range(configs.num_epochs): | |||||
print(i, scheduler.get_lr()) | |||||
scheduler.step() |
@@ -0,0 +1,154 @@ | |||||
""" | |||||
# -*- coding: utf-8 -*- | |||||
----------------------------------------------------------------------------------- | |||||
# Author: Nguyen Mau Dung | |||||
# DoC: 2020.08.09 | |||||
# email: nguyenmaudung93.kstn@gmail.com | |||||
----------------------------------------------------------------------------------- | |||||
# Description: The utils of the kitti dataset | |||||
""" | |||||
from __future__ import print_function | |||||
import os | |||||
import sys | |||||
import numpy as np | |||||
import cv2 | |||||
src_dir = os.path.dirname(os.path.realpath(__file__)) | |||||
# while not src_dir.endswith("sfa"): | |||||
# src_dir = os.path.dirname(src_dir) | |||||
if src_dir not in sys.path: | |||||
sys.path.append(src_dir) | |||||
import config.kitti_config as cnf | |||||
def roty(angle): | |||||
# Rotation about the y-axis. | |||||
c = np.cos(angle) | |||||
s = np.sin(angle) | |||||
return np.array([[c, 0, s], | |||||
[0, 1, 0], | |||||
[-s, 0, c]]) | |||||
def compute_box_3d(dim, location, ry): | |||||
# dim: 3 | |||||
# location: 3 | |||||
# ry: 1 | |||||
# return: 8 x 3 | |||||
R = roty(ry) | |||||
h, w, l = dim | |||||
x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] | |||||
y_corners = [0, 0, 0, 0, -h, -h, -h, -h] | |||||
z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] | |||||
corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) | |||||
corners_3d = np.dot(R, corners) | |||||
corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1) | |||||
return corners_3d.transpose(1, 0) | |||||
def project_to_image(pts_3d, P): | |||||
# pts_3d: n x 3 | |||||
# P: 3 x 4 | |||||
# return: n x 2 | |||||
pts_3d_homo = np.concatenate([pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) | |||||
pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) | |||||
pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] | |||||
return pts_2d.astype(np.int) | |||||
def draw_box_3d_v2(image, qs, color=(255, 0, 255), thickness=2): | |||||
''' Draw 3d bounding box in image | |||||
qs: (8,3) array of vertices for the 3d box in following order: | |||||
1 -------- 0 | |||||
/| /| | |||||
2 -------- 3 . | |||||
| | | | | |||||
. 5 -------- 4 | |||||
|/ |/ | |||||
6 -------- 7 | |||||
''' | |||||
qs = qs.astype(np.int32) | |||||
for k in range(0, 4): | |||||
# Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html | |||||
i, j = k, (k + 1) % 4 | |||||
# use LINE_AA for opencv3 | |||||
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||||
i, j = k + 4, (k + 1) % 4 + 4 | |||||
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||||
i, j = k, k + 4 | |||||
cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||||
return image | |||||
def draw_box_3d(image, corners, color=(0, 0, 255)): | |||||
''' Draw 3d bounding box in image | |||||
corners: (8,3) array of vertices for the 3d box in following order: | |||||
1 -------- 0 | |||||
/| /| | |||||
2 -------- 3 . | |||||
| | | | | |||||
. 5 -------- 4 | |||||
|/ |/ | |||||
6 -------- 7 | |||||
''' | |||||
face_idx = [[0, 1, 5, 4], | |||||
[1, 2, 6, 5], | |||||
[2, 3, 7, 6], | |||||
[3, 0, 4, 7]] | |||||
for ind_f in range(3, -1, -1): | |||||
f = face_idx[ind_f] | |||||
for j in range(4): | |||||
cv2.line(image, (corners[f[j], 0], corners[f[j], 1]), | |||||
(corners[f[(j + 1) % 4], 0], corners[f[(j + 1) % 4], 1]), color, 2, lineType=cv2.LINE_AA) | |||||
if ind_f == 0: | |||||
cv2.line(image, (corners[f[0], 0], corners[f[0], 1]), | |||||
(corners[f[2], 0], corners[f[2], 1]), color, 1, lineType=cv2.LINE_AA) | |||||
cv2.line(image, (corners[f[1], 0], corners[f[1], 1]), | |||||
(corners[f[3], 0], corners[f[3], 1]), color, 1, lineType=cv2.LINE_AA) | |||||
return image | |||||
def show_rgb_image_with_boxes(img, labels, calib): | |||||
for box_idx, label in enumerate(labels): | |||||
cls_id, location, dim, ry = label[0], label[1:4], label[4:7], label[7] | |||||
if location[2] < 2.0: # The object is too close to the camera, ignore it during visualization | |||||
continue | |||||
if cls_id < 0: | |||||
continue | |||||
corners_3d = compute_box_3d(dim, location, ry) | |||||
corners_2d = project_to_image(corners_3d, calib.P2) | |||||
img = draw_box_3d(img, corners_2d, color=cnf.colors[int(cls_id)]) | |||||
return img | |||||
def merge_rgb_to_bev(img_rgb, img_bev, output_width): | |||||
img_rgb_h, img_rgb_w = img_rgb.shape[:2] | |||||
ratio_rgb = output_width / img_rgb_w | |||||
output_rgb_h = int(ratio_rgb * img_rgb_h) | |||||
ret_img_rgb = cv2.resize(img_rgb, (output_width, output_rgb_h)) | |||||
img_bev_h, img_bev_w = img_bev.shape[:2] | |||||
ratio_bev = output_width / img_bev_w | |||||
output_bev_h = int(ratio_bev * img_bev_h) | |||||
ret_img_bev = cv2.resize(img_bev, (output_width, output_bev_h)) | |||||
out_img = np.zeros((output_rgb_h + output_bev_h, output_width, 3), dtype=np.uint8) | |||||
# Upper: RGB --> BEV | |||||
out_img[:output_rgb_h, ...] = ret_img_rgb | |||||
out_img[output_rgb_h:, ...] = ret_img_bev | |||||
return out_img |