| @@ -1 +0,0 @@ | |||
| Subproject commit e4c429e813608acbcf487656abe2eb87dcc4636c | |||
| @@ -0,0 +1,9 @@ | |||
| dataset | |||
| # cache | |||
| __pycache__ | |||
| # results | |||
| results | |||
| # logs | |||
| logs | |||
| @@ -0,0 +1,14 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="PublishConfigData"> | |||
| <serverData> | |||
| <paths name="root@10.5.24.134:10000"> | |||
| <serverdata> | |||
| <mappings> | |||
| <mapping local="$PROJECT_DIR$" web="/" /> | |||
| </mappings> | |||
| </serverdata> | |||
| </paths> | |||
| </serverData> | |||
| </component> | |||
| </project> | |||
| @@ -0,0 +1,6 @@ | |||
| <component name="InspectionProjectProfileManager"> | |||
| <settings> | |||
| <option name="USE_PROJECT_PROFILE" value="false" /> | |||
| <version value="1.0" /> | |||
| </settings> | |||
| </component> | |||
| @@ -0,0 +1,7 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="JavaScriptSettings"> | |||
| <option name="languageLevel" value="ES6" /> | |||
| </component> | |||
| <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" /> | |||
| </project> | |||
| @@ -0,0 +1,8 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="ProjectModuleManager"> | |||
| <modules> | |||
| <module fileurl="file://$PROJECT_DIR$/.idea/sfa3d.iml" filepath="$PROJECT_DIR$/.idea/sfa3d.iml" /> | |||
| </modules> | |||
| </component> | |||
| </project> | |||
| @@ -0,0 +1,12 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <module type="PYTHON_MODULE" version="4"> | |||
| <component name="NewModuleRootManager"> | |||
| <content url="file://$MODULE_DIR$" /> | |||
| <orderEntry type="inheritedJdk" /> | |||
| <orderEntry type="sourceFolder" forTests="false" /> | |||
| </component> | |||
| <component name="PyDocumentationSettings"> | |||
| <option name="format" value="PLAIN" /> | |||
| <option name="myDocStringFormat" value="Plain" /> | |||
| </component> | |||
| </module> | |||
| @@ -0,0 +1,6 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="VcsDirectoryMappings"> | |||
| <mapping directory="$PROJECT_DIR$" vcs="Git" /> | |||
| </component> | |||
| </project> | |||
| @@ -0,0 +1,49 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="ChangeListManager"> | |||
| <list default="true" id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" /> | |||
| <option name="SHOW_DIALOG" value="false" /> | |||
| <option name="HIGHLIGHT_CONFLICTS" value="true" /> | |||
| <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> | |||
| <option name="LAST_RESOLUTION" value="IGNORE" /> | |||
| </component> | |||
| <component name="Git.Settings"> | |||
| <option name="RECENT_BRANCH_BY_REPOSITORY"> | |||
| <map> | |||
| <entry key="$PROJECT_DIR$" value="master" /> | |||
| </map> | |||
| </option> | |||
| <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> | |||
| </component> | |||
| <component name="ProjectId" id="2E4AHz6idZOBGHdHApv98dU5PkK" /> | |||
| <component name="ProjectViewState"> | |||
| <option name="hideEmptyMiddlePackages" value="true" /> | |||
| <option name="showLibraryContents" value="true" /> | |||
| </component> | |||
| <component name="PropertiesComponent"> | |||
| <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" /> | |||
| <property name="RunOnceActivity.ShowReadmeOnStart" value="true" /> | |||
| </component> | |||
| <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" /> | |||
| <component name="TaskManager"> | |||
| <task active="true" id="Default" summary="Default task"> | |||
| <changelist id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" /> | |||
| <created>1661844398596</created> | |||
| <option name="number" value="Default" /> | |||
| <option name="presentableId" value="Default" /> | |||
| <updated>1661844398596</updated> | |||
| </task> | |||
| <servers /> | |||
| </component> | |||
| <component name="Vcs.Log.Tabs.Properties"> | |||
| <option name="TAB_STATES"> | |||
| <map> | |||
| <entry key="MAIN"> | |||
| <value> | |||
| <State /> | |||
| </value> | |||
| </entry> | |||
| </map> | |||
| </option> | |||
| </component> | |||
| </project> | |||
| @@ -0,0 +1,21 @@ | |||
| MIT License | |||
| Copyright (c) 2020 Nguyen Mau Dung | |||
| Permission is hereby granted, free of charge, to any person obtaining a copy | |||
| of this software and associated documentation files (the "Software"), to deal | |||
| in the Software without restriction, including without limitation the rights | |||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
| copies of the Software, and to permit persons to whom the Software is | |||
| furnished to do so, subject to the following conditions: | |||
| The above copyright notice and this permission notice shall be included in all | |||
| copies or substantial portions of the Software. | |||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
| SOFTWARE. | |||
| @@ -0,0 +1,116 @@ | |||
| # Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds | |||
| [![python-image]][python-url] | |||
| [![pytorch-image]][pytorch-url] | |||
| --- | |||
| ## 1. Getting Started | |||
| ### 1.1 Requirement | |||
| The instructions for setting up a virtual environment is [here](https://github.com/maudzung/virtual_environment_python3). | |||
| ```shell script | |||
| cd SFA3D/ | |||
| pip install -r requirements.txt | |||
| ``` | |||
| ### 1.2 Data Preparation | |||
| Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). | |||
| The downloaded data includes: | |||
| - Velodyne point clouds _**(29 GB)**_ | |||
| - Training labels of object data set _**(5 MB)**_ | |||
| Please make sure that you construct the source code & dataset directories structure as below. | |||
| ## 2. How to run | |||
| ### 2.1 Inference | |||
| The pre-trained model was pushed to this repo. | |||
| - **CPU** | |||
| ``` | |||
| python inference.py --no_cuda=True | |||
| ``` | |||
| - **GPU** | |||
| ``` | |||
| python inference.py | |||
| ``` | |||
| Label of inference | |||
| - Pedestrian | |||
| - Car | |||
| - Cyclist | |||
| ### 2.2 Training | |||
| #### 2.2.1 CPU | |||
| ``` | |||
| python train.py --no_cuda=True | |||
| ``` | |||
| #### 2.2.2 Single machine, single gpu | |||
| ```shell script | |||
| python train.py --gpu_idx 0 | |||
| ``` | |||
| #### 2.2.3 Distributed Data Parallel Training | |||
| - **Single machine (node), multiple GPUs** | |||
| ``` | |||
| python train.py --multiprocessing-distributed --world-size 1 --rank 0 --batch_size 64 --num_workers 8 | |||
| ``` | |||
| - **Two machines (two nodes), multiple GPUs** | |||
| - _**First machine**_ | |||
| ``` | |||
| python train.py --dist-url 'tcp://IP_OF_NODE1:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 0 --batch_size 64 --num_workers 8 | |||
| ``` | |||
| - _**Second machine**_ | |||
| ``` | |||
| python train.py --dist-url 'tcp://IP_OF_NODE2:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 1 --batch_size 64 --num_workers 8 | |||
| ``` | |||
| ## References | |||
| [1] SFA3D: [PyTorch Implementation](https://github.com/maudzung/SFA3D) | |||
| ## Folder structure | |||
| ### Dataset | |||
| ``` | |||
| └── kitti/ | |||
| ├── image_2/ (left color camera,非必须) | |||
| ├── calib/ (非必须) | |||
| ├── label_2/ (标注结果/标签,非必须) | |||
| └── velodyne/ (点云文件,必须) | |||
| ``` | |||
| ### Checkpoints & Algorithm | |||
| ``` | |||
| ${ROOT} | |||
| └── checkpoints/ | |||
| ├── fpn_resnet_18/ | |||
| ├── fpn_resnet_18_epoch_300.pth (点云目标检测标注模型) | |||
| └── sfa/ (点云标注算法) | |||
| ├── config/ | |||
| ├── data_process/ | |||
| ├── models/ | |||
| ├── utils/ | |||
| ├── inference.py | |||
| └── train.py | |||
| ├── README.md | |||
| ├── LICENSE | |||
| └── requirements.txt | |||
| ``` | |||
| [python-image]: https://img.shields.io/badge/Python-3.6-ff69b4.svg | |||
| [python-url]: https://www.python.org/ | |||
| [pytorch-image]: https://img.shields.io/badge/PyTorch-1.5-2BAF2B.svg | |||
| [pytorch-url]: https://pytorch.org/ | |||
| @@ -0,0 +1,55 @@ | |||
| # Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds | |||
| --- | |||
| Technical details of the implementation | |||
| ## 1. Network architecture | |||
| - The **ResNet-based Keypoint Feature Pyramid Network** (KFPN) that was proposed in [RTM3D paper](https://arxiv.org/pdf/2001.03343.pdf). | |||
| The unofficial implementation of the RTM3D paper by using PyTorch is [here](https://github.com/maudzung/RTM3D) | |||
| - **Input**: | |||
| - The model takes a birds-eye-view (BEV) map as input. | |||
| - The BEV map is encoded by height, intensity, and density of 3D LiDAR point clouds. Assume that the size of the BEV input is `(H, W, 3)`. | |||
| - **Outputs**: | |||
| - Heatmap for main center with a size of `(H/S, W/S, C)` where `S=4` _(the down-sample ratio)_, and `C=3` _(the number of classes)_ | |||
| - Center offset: `(H/S, W/S, 2)` | |||
| - The heading angle _(yaw)_: `(H/S, W/S, 2)`. The model estimates the **im**aginary and the **re**al fraction (`sin(yaw)` and `cos(yaw)` values). | |||
| - Dimension _(h, w, l)_: `(H/S, W/S, 3)` | |||
| - `z` coordinate: `(H/S, W/S, 1)` | |||
| - **Targets**: **7 degrees of freedom** _(7-DOF)_ of objects: `(cx, cy, cz, l, w, h, θ)` | |||
| - `cx, cy, cz`: The center coordinates. | |||
| - `l, w, h`: length, width, height of the bounding box. | |||
| - `θ`: The heading angle in radians of the bounding box. | |||
| - **Objects**: Cars, Pedestrians, Cyclists. | |||
| ## 2. Losses function | |||
| - For main center heatmap: Used `focal loss` | |||
| - For heading angle _(yaw)_: The `im` and `re` fractions are directly regressed by using `l1_loss` | |||
| - For `z coordinate` and `3 dimensions` (height, width, length), I used `balanced l1 loss` that was proposed by the paper | |||
| [Libra R-CNN: Towards Balanced Learning for Object Detection](https://arxiv.org/pdf/1904.02701.pdf) | |||
| ## 3. Training in details | |||
| - Set uniform weights to the above components of losses. (`=1.0` for all) | |||
| - Number of epochs: 300. | |||
| - Learning rate scheduler: [`cosine`](https://arxiv.org/pdf/1812.01187.pdf), initial learning rate: 0.001. | |||
| - Batch size: `16` (on a single GTX 1080Ti). | |||
| ## 4. Inference | |||
| - A `3 × 3` max-pooling operation was applied on the center heat map, then only `50` predictions whose | |||
| center confidences are larger than 0.2 were kept. | |||
| - The heading angle _(yaw)_ = `arctan`(_imaginary fraction_ / _real fraction_) | |||
| ## 5. How to expand the work | |||
| - The model could be trained with more classes and with a larger detected area by modifying configurations in | |||
| the [config/kitti_dataset.py](https://github.com/maudzung/Super-Fast-Accurate-3D-Object-Detection/blob/master/src/config/kitti_config.py) file. | |||
| @@ -0,0 +1,41 @@ | |||
| absl-py==1.1.0 | |||
| cachetools==4.2.4 | |||
| certifi==2022.6.15 | |||
| charset-normalizer==2.0.12 | |||
| cycler==0.11.0 | |||
| easydict==1.9 | |||
| future==0.18.2 | |||
| google-auth==1.35.0 | |||
| google-auth-oauthlib==0.4.6 | |||
| grpcio==1.46.3 | |||
| idna==3.3 | |||
| importlib-metadata==4.11.4 | |||
| joblib==1.1.0 | |||
| kiwisolver==1.4.3 | |||
| Markdown==3.3.7 | |||
| matplotlib==3.3.3 | |||
| numpy==1.18.3 | |||
| oauthlib==3.2.0 | |||
| opencv-python==4.2.0.34 | |||
| Pillow==8.4.0 | |||
| protobuf==3.19.1 | |||
| pyasn1==0.4.8 | |||
| pyasn1-modules==0.2.8 | |||
| pyparsing==3.0.9 | |||
| python-dateutil==2.8.2 | |||
| requests==2.28.0 | |||
| requests-oauthlib==1.3.1 | |||
| rsa==4.8 | |||
| scikit-learn==0.22.2 | |||
| scipy==1.8.1 | |||
| six==1.16.0 | |||
| tensorboard==2.2.1 | |||
| tensorboard-plugin-wit==1.8.1 | |||
| torch==1.5.0 | |||
| torchsummary==1.5.1 | |||
| torchvision==0.6.0 | |||
| tqdm==4.54.0 | |||
| urllib3==1.26.9 | |||
| Werkzeug==2.1.2 | |||
| wget==3.2 | |||
| zipp==3.8.0 | |||
| @@ -0,0 +1,99 @@ | |||
| import math | |||
| import numpy as np | |||
| # Car and Van ==> Car class | |||
| # Pedestrian and Person_Sitting ==> Pedestrian Class | |||
| # for train | |||
| CLASS_NAME_TO_ID = { | |||
| 'Pedestrian': 0, | |||
| 'Car': 1, | |||
| 'Cyclist': 2, | |||
| 'Van': 1, | |||
| 'Truck': -3, | |||
| 'Person_sitting': 0, | |||
| 'Tram': -99, | |||
| 'Misc': -99, | |||
| 'TraffiCone': -1, | |||
| 'DontCare': -1 | |||
| } | |||
| # for test | |||
| CLASS_ID_TO_NAME = { | |||
| 0: 'Pedestrian', # Person_sitting in the same class | |||
| 1: 'Car', # Van in the same class | |||
| 2: 'Cyclist' | |||
| } | |||
| colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0], [255, 120, 0], | |||
| [255, 120, 120], [0, 120, 0], [120, 255, 255], [120, 0, 255]] | |||
| ##################################################################################### | |||
| boundary = { | |||
| "minX": -50, | |||
| "maxX": 50, | |||
| "minY": -25, | |||
| "maxY": 25, | |||
| "minZ": -2.73, | |||
| "maxZ": 1.27 | |||
| } | |||
| bound_size_x = boundary['maxX'] - boundary['minX'] | |||
| bound_size_y = boundary['maxY'] - boundary['minY'] | |||
| bound_size_z = boundary['maxZ'] - boundary['minZ'] | |||
| boundary_back = { | |||
| "minX": -50, | |||
| "maxX": 0, | |||
| "minY": -25, | |||
| "maxY": 25, | |||
| "minZ": -2.73, | |||
| "maxZ": 1.27 | |||
| } | |||
| BEV_WIDTH = 608 # across y axis -25m ~ 25m | |||
| BEV_HEIGHT = 1216 # across x axis 0m ~ 50m | |||
| DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT | |||
| DISCRETIZATION_Y = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT | |||
| DISCRETIZATION_X = (boundary["maxY"] - boundary["minY"]) / BEV_WIDTH | |||
| # maximum number of points per voxel | |||
| T = 35 | |||
| # voxel size | |||
| vd = 0.1 # z | |||
| vh = 0.05 # y | |||
| vw = 0.05 # x | |||
| # voxel grid | |||
| W = math.ceil(bound_size_x / vw) | |||
| H = math.ceil(bound_size_y / vh) | |||
| D = math.ceil(bound_size_z / vd) | |||
| # Following parameters are calculated as an average from KITTI dataset for simplicity | |||
| ##################################################################################### | |||
| Tr_velo_to_cam = np.array([ | |||
| [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03], | |||
| [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02], | |||
| [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01], | |||
| [0, 0, 0, 1] | |||
| ]) | |||
| # cal mean from train set | |||
| R0 = np.array([ | |||
| [0.99992475, 0.00975976, -0.00734152, 0], | |||
| [-0.0097913, 0.99994262, -0.00430371, 0], | |||
| [0.00729911, 0.0043753, 0.99996319, 0], | |||
| [0, 0, 0, 1] | |||
| ]) | |||
| P2 = np.array([[719.787081, 0., 608.463003, 44.9538775], | |||
| [0., 719.787081, 174.545111, 0.1066855], | |||
| [0., 0., 1., 3.0106472e-03], | |||
| [0., 0., 0., 0] | |||
| ]) | |||
| R0_inv = np.linalg.inv(R0) | |||
| Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam) | |||
| P2_inv = np.linalg.pinv(P2) | |||
| ##################################################################################### | |||
| @@ -0,0 +1,172 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: The configurations of the project will be defined here | |||
| """ | |||
| import os | |||
| import argparse | |||
| import torch | |||
| from easydict import EasyDict as edict | |||
| def parse_train_configs(): | |||
| parser = argparse.ArgumentParser(description='The Implementation using PyTorch') | |||
| parser.add_argument('--seed', type=int, default=2020, | |||
| help='re-produce the results with seed random') | |||
| parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||
| help='The name using for saving logs, models,...') | |||
| parser.add_argument('--root_dir', type=str, default='../', metavar='PATH', | |||
| help='The ROOT working directory') | |||
| #################################################################### | |||
| ############## Model configs ######################## | |||
| #################################################################### | |||
| parser.add_argument('--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||
| help='The name of the model architecture') | |||
| parser.add_argument('--model_load_dir', type=str, default=None, metavar='PATH', | |||
| help='the path of the pretrained checkpoint') | |||
| #################################################################### | |||
| ############## Dataloader and Running configs ####### | |||
| #################################################################### | |||
| parser.add_argument('--data_url', type=str, default='../dataset/apollo/training', metavar='PATH', | |||
| help='the path of the dataset') | |||
| parser.add_argument('--val_data_url', type=str, default='../dataset/apollo/val', metavar='PATH', | |||
| help='the path of the dataset') | |||
| parser.add_argument('--train_model_out', type=str, default='../checkpoints', metavar='PATH', | |||
| help='the path of the model output') | |||
| parser.add_argument('--train_out', type=str, default='../logs', metavar='PATH', | |||
| help='the path of the logs output') | |||
| parser.add_argument('--hflip_prob', type=float, default=0.5, | |||
| help='The probability of horizontal flip') | |||
| parser.add_argument('--no-val', action='store_true', | |||
| help='If true, dont evaluate the model on the val set') | |||
| parser.add_argument('--num_samples', type=int, default=None, | |||
| help='Take a subset of the dataset to run and debug') | |||
| parser.add_argument('--num_workers', type=int, default=4, | |||
| help='Number of threads for loading data') | |||
| parser.add_argument('--batch_size', type=int, default=8, | |||
| help='mini-batch size (default: 16), this is the total' | |||
| 'batch size of all GPUs on the current node when using' | |||
| 'Data Parallel or Distributed Data Parallel') | |||
| parser.add_argument('--print_freq', type=int, default=50, metavar='N', | |||
| help='print frequency (default: 50)') | |||
| parser.add_argument('--tensorboard_freq', type=int, default=50, metavar='N', | |||
| help='frequency of saving tensorboard (default: 50)') | |||
| parser.add_argument('--checkpoint_freq', type=int, default=2, metavar='N', | |||
| help='frequency of saving checkpoints (default: 5)') | |||
| parser.add_argument('--gpu_num_per_node', type=int, default=1, | |||
| help='Number of GPU') | |||
| #################################################################### | |||
| ############## Training strategy #################### | |||
| #################################################################### | |||
| parser.add_argument('--start_epoch', type=int, default=1, metavar='N', | |||
| help='the starting epoch') | |||
| parser.add_argument('--num_epochs', type=int, default=300, metavar='N', | |||
| help='number of total epochs to run') | |||
| parser.add_argument('--lr_type', type=str, default='cosin', | |||
| help='the type of learning rate scheduler (cosin or multi_step or one_cycle)') | |||
| parser.add_argument('--lr', type=float, default=0.001, metavar='LR', | |||
| help='initial learning rate') | |||
| parser.add_argument('--minimum_lr', type=float, default=1e-7, metavar='MIN_LR', | |||
| help='minimum learning rate during training') | |||
| parser.add_argument('--momentum', type=float, default=0.949, metavar='M', | |||
| help='momentum') | |||
| parser.add_argument('-wd', '--weight_decay', type=float, default=0., metavar='WD', | |||
| help='weight decay (default: 0.)') | |||
| parser.add_argument('--optimizer_type', type=str, default='adam', metavar='OPTIMIZER', | |||
| help='the type of optimizer, it can be sgd or adam') | |||
| parser.add_argument('--steps', nargs='*', default=[150, 180], | |||
| help='number of burn in step') | |||
| #################################################################### | |||
| ############## Loss weight ########################## | |||
| #################################################################### | |||
| #################################################################### | |||
| ############## Distributed Data Parallel ############ | |||
| #################################################################### | |||
| parser.add_argument('--world-size', default=-1, type=int, metavar='N', | |||
| help='number of nodes for distributed training') | |||
| parser.add_argument('--rank', default=-1, type=int, metavar='N', | |||
| help='node rank for distributed training') | |||
| parser.add_argument('--dist-url', default='tcp://127.0.0.1:29500', type=str, | |||
| help='url used to set up distributed training') | |||
| parser.add_argument('--dist-backend', default='nccl', type=str, | |||
| help='distributed backend') | |||
| parser.add_argument('--gpu_idx', default=0, type=int, | |||
| help='GPU index to use.') | |||
| parser.add_argument('--no_cuda', default= False, | |||
| help='If true, cuda is not used.') | |||
| parser.add_argument('--multiprocessing-distributed', action='store_true', | |||
| help='Use multi-processing distributed training to launch ' | |||
| 'N processes per node, which has N GPUs. This is the ' | |||
| 'fastest way to use PyTorch for either single node or ' | |||
| 'multi node data parallel training') | |||
| #################################################################### | |||
| ############## Evaluation configurations ################### | |||
| #################################################################### | |||
| parser.add_argument('--evaluate', action='store_true', | |||
| help='only evaluate the model, not training') | |||
| parser.add_argument('--resume_path', type=str, default=None, metavar='PATH', | |||
| help='the path of the resumed checkpoint') | |||
| parser.add_argument('--K', type=int, default=50, | |||
| help='the number of top K') | |||
| configs = edict(vars(parser.parse_args())) | |||
| #################################################################### | |||
| ############## Hardware configurations ############################# | |||
| #################################################################### | |||
| # configs.device = torch.device('cpu' if configs.no_cuda else 'cuda') | |||
| configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) | |||
| configs.ngpus_per_node = torch.cuda.device_count() | |||
| configs.pin_memory = True | |||
| configs.input_size = (1216, 608) | |||
| configs.hm_size = (304, 152) | |||
| configs.down_ratio = 4 | |||
| configs.max_objects = 50 | |||
| configs.imagenet_pretrained = True | |||
| configs.head_conv = 64 | |||
| configs.num_classes = 3 | |||
| configs.num_center_offset = 2 | |||
| configs.num_z = 1 | |||
| configs.num_dim = 3 | |||
| configs.num_direction = 2 # sin, cos | |||
| configs.heads = { | |||
| 'hm_cen': configs.num_classes, | |||
| 'cen_offset': configs.num_center_offset, | |||
| 'direction': configs.num_direction, | |||
| 'z_coor': configs.num_z, | |||
| 'dim': configs.num_dim | |||
| } | |||
| configs.num_input_features = 4 | |||
| #################################################################### | |||
| ############## Dataset, logs, Checkpoints dir ###################### | |||
| #################################################################### | |||
| configs.dataset = 'apollo' # or kitti | |||
| configs.dataset_dir = configs.data_url | |||
| # configs.checkpoints_dir = os.path.join(configs.train_model_out, configs.saved_fn) | |||
| configs.checkpoints_dir = configs.train_model_out | |||
| # configs.logs_dir = os.path.join(configs.train_out, configs.saved_fn) | |||
| configs.logs_dir = configs.train_out | |||
| configs.pretrained_path = configs.model_load_dir | |||
| if not os.path.isdir(configs.checkpoints_dir): | |||
| os.makedirs(configs.checkpoints_dir) | |||
| if not os.path.isdir(configs.logs_dir): | |||
| os.makedirs(configs.logs_dir) | |||
| return configs | |||
| @@ -0,0 +1,99 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: This script for the KITTI dataset | |||
| """ | |||
| import sys | |||
| import os | |||
| from builtins import int | |||
| from glob import glob | |||
| import numpy as np | |||
| from torch.utils.data import Dataset | |||
| import cv2 | |||
| import torch | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from data_process.kitti_data_utils import get_filtered_lidar | |||
| from data_process.kitti_bev_utils import makeBEVMap | |||
| import config.kitti_config as cnf | |||
| class Demo_KittiDataset(Dataset): | |||
| def __init__(self, configs): | |||
| self.dataset_dir = os.path.join(configs.dataset_dir, configs.foldername, configs.foldername[:10], | |||
| configs.foldername) | |||
| self.input_size = configs.input_size | |||
| self.hm_size = configs.hm_size | |||
| self.num_classes = configs.num_classes | |||
| self.max_objects = configs.max_objects | |||
| self.image_dir = os.path.join(self.dataset_dir, "image_02", "data") | |||
| self.lidar_dir = os.path.join(self.dataset_dir, "velodyne_points", "data") | |||
| self.label_dir = os.path.join(self.dataset_dir, "label_2", "data") | |||
| self.sample_id_list = sorted(glob(os.path.join(self.lidar_dir, '*.bin'))) | |||
| self.sample_id_list = [float(os.path.basename(fn)[:-4]) for fn in self.sample_id_list] | |||
| self.num_samples = len(self.sample_id_list) | |||
| def __len__(self): | |||
| return len(self.sample_id_list) | |||
| def __getitem__(self, index): | |||
| pass | |||
| def load_bevmap_front(self, index): | |||
| """Load only image for the testing phase""" | |||
| sample_id = int(self.sample_id_list[index]) | |||
| img_path, img_rgb = self.get_image(sample_id) | |||
| lidarData = self.get_lidar(sample_id) | |||
| front_lidar = get_filtered_lidar(lidarData, cnf.boundary) | |||
| front_bevmap = makeBEVMap(front_lidar, cnf.boundary) | |||
| front_bevmap = torch.from_numpy(front_bevmap) | |||
| metadatas = { | |||
| 'img_path': img_path, | |||
| } | |||
| return metadatas, front_bevmap, img_rgb | |||
| def load_bevmap_front_vs_back(self, index): | |||
| """Load only image for the testing phase""" | |||
| sample_id = int(self.sample_id_list[index]) | |||
| img_path, img_rgb = self.get_image(sample_id) | |||
| lidarData = self.get_lidar(sample_id) | |||
| front_lidar = get_filtered_lidar(lidarData, cnf.boundary) | |||
| front_bevmap = makeBEVMap(front_lidar, cnf.boundary) | |||
| front_bevmap = torch.from_numpy(front_bevmap) | |||
| back_lidar = get_filtered_lidar(lidarData, cnf.boundary_back) | |||
| back_bevmap = makeBEVMap(back_lidar, cnf.boundary_back) | |||
| back_bevmap = torch.from_numpy(back_bevmap) | |||
| metadatas = { | |||
| 'img_path': img_path, | |||
| } | |||
| return metadatas, front_bevmap, back_bevmap, img_rgb | |||
| def get_image(self, idx): | |||
| img_path = os.path.join(self.image_dir, '{:010d}.png'.format(idx)) | |||
| img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) | |||
| return img_path, img | |||
| def get_lidar(self, idx): | |||
| lidar_file = os.path.join(self.lidar_dir, '{:010d}.bin'.format(idx)) | |||
| # assert os.path.isfile(lidar_file) | |||
| return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) | |||
| @@ -0,0 +1,98 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| """ | |||
| import math | |||
| import os | |||
| import sys | |||
| import cv2 | |||
| import numpy as np | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| import config.kitti_config as cnf | |||
| def makeBEVMap(PointCloud_, boundary): | |||
| Height = cnf.BEV_HEIGHT + 1 | |||
| Width = cnf.BEV_WIDTH + 1 | |||
| # Discretize Feature Map | |||
| PointCloud = np.copy(PointCloud_) | |||
| # PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION)) | |||
| # PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION) + Width / 2) | |||
| # 针对Apollo数据集,检测360° | |||
| PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION_Y) + Height / 2) | |||
| PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION_X) + Width / 2) | |||
| # sort-3times | |||
| indices = np.lexsort((-PointCloud[:, 2], PointCloud[:, 1], PointCloud[:, 0])) | |||
| PointCloud = PointCloud[indices] | |||
| # Height Map | |||
| heightMap = np.zeros((Height, Width)) | |||
| _, indices = np.unique(PointCloud[:, 0:2], axis=0, return_index=True) | |||
| PointCloud_frac = PointCloud[indices] | |||
| # some important problem is image coordinate is (y,x), not (x,y) | |||
| max_height = float(np.abs(boundary['maxZ'] - boundary['minZ'])) | |||
| heightMap[np.int_(PointCloud_frac[:, 0]), np.int_(PointCloud_frac[:, 1])] = PointCloud_frac[:, 2] / max_height #(1217,609) | |||
| # Intensity Map & DensityMap | |||
| intensityMap = np.zeros((Height, Width)) | |||
| densityMap = np.zeros((Height, Width)) | |||
| _, indices, counts = np.unique(PointCloud[:, 0:2], axis=0, return_index=True, return_counts=True) | |||
| PointCloud_top = PointCloud[indices] | |||
| normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64)) | |||
| intensityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = PointCloud_top[:, 3] / 255.0 # hesai40p的反射强度0~255 | |||
| densityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = normalizedCounts | |||
| RGB_Map = np.zeros((3, Height - 1, Width - 1)) | |||
| RGB_Map[2, :, :] = densityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # r_map | |||
| RGB_Map[1, :, :] = heightMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # g_map | |||
| RGB_Map[0, :, :] = intensityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH] # b_map | |||
| return RGB_Map | |||
| # bev image coordinates format | |||
| def get_corners(x, y, w, l, yaw): | |||
| bev_corners = np.zeros((4, 2), dtype=np.float32) | |||
| cos_yaw = np.cos(yaw) | |||
| sin_yaw = np.sin(yaw) | |||
| # front left | |||
| bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw | |||
| bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw | |||
| # rear left | |||
| bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw | |||
| bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw | |||
| # rear right | |||
| bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw | |||
| bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw | |||
| # front right | |||
| bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw | |||
| bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw | |||
| return bev_corners | |||
| def drawRotatedBox(img, x, y, w, l, yaw, color): | |||
| img_cp = img.copy() | |||
| bev_corners = get_corners(x, y, w, l, yaw) | |||
| corners_int = bev_corners.reshape(-1, 1, 2).astype(int) | |||
| cv2.polylines(img, [corners_int], True, color, 2) | |||
| corners_int = bev_corners.reshape(-1, 2) | |||
| cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2) | |||
| # return img_cp | |||
| @@ -0,0 +1,324 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: The utils of the kitti dataset | |||
| """ | |||
| from __future__ import print_function | |||
| import os | |||
| import sys | |||
| import numpy as np | |||
| import cv2 | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| import config.kitti_config as cnf | |||
| class Object3d(object): | |||
| ''' 3d object label ''' | |||
| def __init__(self, label_file_line): | |||
| data = label_file_line.split(' ') | |||
| data[1:] = [float(x) for x in data[1:]] | |||
| # extract label, truncation, occlusion | |||
| self.type = data[0] # 'Car', 'Pedestrian', ... | |||
| self.cls_id = self.cls_type_to_id(self.type) | |||
| self.truncation = data[1] # truncated pixel ratio [0..1] | |||
| self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown | |||
| self.alpha = data[3] # object observation angle [-pi..pi] | |||
| # extract 2d bounding box in 0-based coordinates | |||
| self.xmin = data[4] # left | |||
| self.ymin = data[5] # top | |||
| self.xmax = data[6] # right | |||
| self.ymax = data[7] # bottom | |||
| self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) | |||
| # extract 3d bounding box information | |||
| self.h = data[8] # box height | |||
| self.w = data[9] # box width | |||
| self.l = data[10] # box length (in meters) | |||
| self.t = (data[11], data[12], data[13]) # location (x,y,z) in camera coord. | |||
| self.dis_to_cam = np.linalg.norm(self.t) | |||
| self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi] | |||
| self.score = data[15] if data.__len__() == 16 else -1.0 | |||
| self.level_str = None | |||
| self.level = self.get_obj_level() | |||
| def cls_type_to_id(self, cls_type): | |||
| if cls_type not in cnf.CLASS_NAME_TO_ID.keys(): | |||
| return -1 | |||
| return cnf.CLASS_NAME_TO_ID[cls_type] | |||
| def get_obj_level(self): | |||
| height = float(self.box2d[3]) - float(self.box2d[1]) + 1 | |||
| if height >= 40 and self.truncation <= 0.15 and self.occlusion <= 0: | |||
| self.level_str = 'Easy' | |||
| return 1 # Easy | |||
| elif height >= 25 and self.truncation <= 0.3 and self.occlusion <= 1: | |||
| self.level_str = 'Moderate' | |||
| return 2 # Moderate | |||
| elif height >= 25 and self.truncation <= 0.5 and self.occlusion <= 2: | |||
| self.level_str = 'Hard' | |||
| return 3 # Hard | |||
| else: | |||
| self.level_str = 'UnKnown' | |||
| return 4 | |||
| def print_object(self): | |||
| print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \ | |||
| (self.type, self.truncation, self.occlusion, self.alpha)) | |||
| print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \ | |||
| (self.xmin, self.ymin, self.xmax, self.ymax)) | |||
| print('3d bbox h,w,l: %f, %f, %f' % \ | |||
| (self.h, self.w, self.l)) | |||
| print('3d bbox location, ry: (%f, %f, %f), %f' % \ | |||
| (self.t[0], self.t[1], self.t[2], self.ry)) | |||
| def to_kitti_format(self): | |||
| kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \ | |||
| % (self.type, self.truncation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1], | |||
| self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.t[0], self.t[1], self.t[2], | |||
| self.ry, self.score) | |||
| return kitti_str | |||
| def read_label(label_filename): | |||
| lines = [line.rstrip() for line in open(label_filename)] | |||
| objects = [Object3d(line) for line in lines] | |||
| return objects | |||
| class Calibration(object): | |||
| ''' Calibration matrices and utils | |||
| 3d XYZ in <label>.txt are in rect camera coord. | |||
| 2d box xy are in image2 coord | |||
| Points in <lidar>.bin are in Velodyne coord. | |||
| y_image2 = P^2_rect * x_rect | |||
| y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo | |||
| x_ref = Tr_velo_to_cam * x_velo | |||
| x_rect = R0_rect * x_ref | |||
| P^2_rect = [f^2_u, 0, c^2_u, -f^2_u b^2_x; | |||
| 0, f^2_v, c^2_v, -f^2_v b^2_y; | |||
| 0, 0, 1, 0] | |||
| = K * [1|t] | |||
| image2 coord: | |||
| ----> x-axis (u) | |||
| | | |||
| | | |||
| v y-axis (v) | |||
| velodyne coord: | |||
| front x, left y, up z | |||
| rect/ref camera coord: | |||
| right x, down y, front z | |||
| Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf | |||
| TODO(rqi): do matrix multiplication only once for each projection. | |||
| ''' | |||
| def __init__(self, calib_filepath): | |||
| calibs = self.read_calib_file(calib_filepath) | |||
| # Projection matrix from rect camera coord to image2 coord | |||
| self.P2 = calibs['P2'] | |||
| self.P2 = np.reshape(self.P2, [3, 4]) | |||
| self.P3 = calibs['P3'] | |||
| self.P3 = np.reshape(self.P3, [3, 4]) | |||
| # Rigid transform from Velodyne coord to reference camera coord | |||
| self.V2C = calibs['Tr_velo2cam'] | |||
| self.V2C = np.reshape(self.V2C, [3, 4]) | |||
| # Rotation from reference camera coord to rect camera coord | |||
| self.R0 = calibs['R_rect'] | |||
| self.R0 = np.reshape(self.R0, [3, 3]) | |||
| # Camera intrinsics and extrinsics | |||
| self.c_u = self.P2[0, 2] | |||
| self.c_v = self.P2[1, 2] | |||
| self.f_u = self.P2[0, 0] | |||
| self.f_v = self.P2[1, 1] | |||
| self.b_x = self.P2[0, 3] / (-self.f_u) # relative | |||
| self.b_y = self.P2[1, 3] / (-self.f_v) | |||
| def read_calib_file(self, filepath): | |||
| with open(filepath) as f: | |||
| lines = f.readlines() | |||
| obj = lines[2].strip().split(' ')[1:] | |||
| P2 = np.array(obj, dtype=np.float32) | |||
| obj = lines[3].strip().split(' ')[1:] | |||
| P3 = np.array(obj, dtype=np.float32) | |||
| obj = lines[4].strip().split(' ')[1:] | |||
| R0 = np.array(obj, dtype=np.float32) | |||
| obj = lines[5].strip().split(' ')[1:] | |||
| Tr_velo_to_cam = np.array(obj, dtype=np.float32) | |||
| return {'P2': P2.reshape(3, 4), | |||
| 'P3': P3.reshape(3, 4), | |||
| 'R_rect': R0.reshape(3, 3), | |||
| 'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)} | |||
| def cart2hom(self, pts_3d): | |||
| """ | |||
| :param pts: (N, 3 or 2) | |||
| :return pts_hom: (N, 4 or 3) | |||
| """ | |||
| pts_hom = np.hstack((pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32))) | |||
| return pts_hom | |||
| def compute_radius(det_size, min_overlap=0.7): | |||
| height, width = det_size | |||
| a1 = 1 | |||
| b1 = (height + width) | |||
| c1 = width * height * (1 - min_overlap) / (1 + min_overlap) | |||
| sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) | |||
| r1 = (b1 + sq1) / 2 | |||
| a2 = 4 | |||
| b2 = 2 * (height + width) | |||
| c2 = (1 - min_overlap) * width * height | |||
| sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) | |||
| r2 = (b2 + sq2) / 2 | |||
| a3 = 4 * min_overlap | |||
| b3 = -2 * min_overlap * (height + width) | |||
| c3 = (min_overlap - 1) * width * height | |||
| sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) | |||
| r3 = (b3 + sq3) / 2 | |||
| return min(r1, r2, r3) | |||
| def gaussian2D(shape, sigma=1): | |||
| m, n = [(ss - 1.) / 2. for ss in shape] | |||
| y, x = np.ogrid[-m:m + 1, -n:n + 1] | |||
| h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) | |||
| h[h < np.finfo(h.dtype).eps * h.max()] = 0 | |||
| return h | |||
| def gen_hm_radius(heatmap, center, radius, k=1): | |||
| diameter = 2 * radius + 1 | |||
| gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) | |||
| x, y = int(center[0]), int(center[1]) | |||
| height, width = heatmap.shape[0:2] | |||
| left, right = min(x, radius), min(width - x, radius + 1) | |||
| top, bottom = min(y, radius), min(height - y, radius + 1) | |||
| masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] | |||
| masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] | |||
| if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug | |||
| np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) | |||
| return heatmap | |||
| def get_filtered_lidar(lidar, boundary, labels=None): | |||
| minX = boundary['minX'] | |||
| maxX = boundary['maxX'] | |||
| minY = boundary['minY'] | |||
| maxY = boundary['maxY'] | |||
| minZ = boundary['minZ'] | |||
| maxZ = boundary['maxZ'] | |||
| # Remove the point out of range x,y,z | |||
| mask = np.where((lidar[:, 0] >= minX) & (lidar[:, 0] <= maxX) & | |||
| (lidar[:, 1] >= minY) & (lidar[:, 1] <= maxY) & | |||
| (lidar[:, 2] >= minZ) & (lidar[:, 2] <= maxZ)) | |||
| lidar = lidar[mask] | |||
| lidar[:, 2] = lidar[:, 2] - minZ | |||
| if labels is not None: | |||
| label_x = (labels[:, 1] >= minX) & (labels[:, 1] < maxX) | |||
| label_y = (labels[:, 2] >= minY) & (labels[:, 2] < maxY) | |||
| label_z = (labels[:, 3] >= minZ) & (labels[:, 3] < maxZ) | |||
| mask_label = label_x & label_y & label_z | |||
| labels = labels[mask_label] | |||
| return lidar, labels | |||
| else: | |||
| return lidar | |||
| def box3d_corners_to_center(box3d_corner): | |||
| # (N, 8, 3) -> (N, 7) | |||
| assert box3d_corner.ndim == 3 | |||
| xyz = np.mean(box3d_corner, axis=1) | |||
| h = abs(np.mean(box3d_corner[:, 4:, 2] - box3d_corner[:, :4, 2], axis=1, keepdims=True)) | |||
| w = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 1, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
| np.sqrt(np.sum((box3d_corner[:, 2, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
| np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 5, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
| np.sqrt(np.sum((box3d_corner[:, 6, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 | |||
| l = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
| np.sqrt(np.sum((box3d_corner[:, 1, [0, 1]] - box3d_corner[:, 2, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
| np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True)) + | |||
| np.sqrt(np.sum((box3d_corner[:, 5, [0, 1]] - box3d_corner[:, 6, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4 | |||
| yaw = (np.arctan2(box3d_corner[:, 2, 1] - box3d_corner[:, 1, 1], | |||
| box3d_corner[:, 2, 0] - box3d_corner[:, 1, 0]) + | |||
| np.arctan2(box3d_corner[:, 3, 1] - box3d_corner[:, 0, 1], | |||
| box3d_corner[:, 3, 0] - box3d_corner[:, 0, 0]) + | |||
| np.arctan2(box3d_corner[:, 2, 0] - box3d_corner[:, 3, 0], | |||
| box3d_corner[:, 3, 1] - box3d_corner[:, 2, 1]) + | |||
| np.arctan2(box3d_corner[:, 1, 0] - box3d_corner[:, 0, 0], | |||
| box3d_corner[:, 0, 1] - box3d_corner[:, 1, 1]))[:, np.newaxis] / 4 | |||
| return np.concatenate([h, w, l, xyz, yaw], axis=1).reshape(-1, 7) | |||
| def box3d_center_to_conners(box3d_center): | |||
| h, w, l, x, y, z, yaw = box3d_center | |||
| Box = np.array([[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], | |||
| [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], | |||
| [0, 0, 0, 0, h, h, h, h]]) | |||
| rotMat = np.array([ | |||
| [np.cos(yaw), -np.sin(yaw), 0.0], | |||
| [np.sin(yaw), np.cos(yaw), 0.0], | |||
| [0.0, 0.0, 1.0]]) | |||
| velo_box = np.dot(rotMat, Box) | |||
| cornerPosInVelo = velo_box + np.tile(np.array([x, y, z]), (8, 1)).T | |||
| box3d_corner = cornerPosInVelo.transpose() | |||
| return box3d_corner.astype(np.float32) | |||
| if __name__ == '__main__': | |||
| heatmap = np.zeros((96, 320)) | |||
| h, w = 40, 50 | |||
| radius = compute_radius((h, w)) | |||
| radius = max(0, int(radius)) | |||
| print('h: {}, w: {}, radius: {}, sigma: {}'.format(h, w, radius, (2 * radius + 1) / 6.)) | |||
| gen_hm_radius(heatmap, center=(200, 50), radius=radius) | |||
| while True: | |||
| cv2.imshow('heatmap', heatmap) | |||
| if cv2.waitKey(0) & 0xff == 27: | |||
| break | |||
| max_pos = np.unravel_index(heatmap.argmax(), shape=heatmap.shape) | |||
| print('max_pos: {}'.format(max_pos)) | |||
| @@ -0,0 +1,67 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: This script for creating the dataloader for training/validation/test phase | |||
| """ | |||
| import os | |||
| import sys | |||
| import torch | |||
| from torch.utils.data import DataLoader | |||
| import numpy as np | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from data_process.kitti_dataset import KittiDataset | |||
| from data_process.transformation import OneOf, Random_Rotation, Random_Scaling | |||
| def create_train_dataloader(configs): | |||
| """Create dataloader for training""" | |||
| train_lidar_aug = OneOf([ | |||
| Random_Rotation(limit_angle=np.pi / 4, p=1.0), | |||
| Random_Scaling(scaling_range=(0.95, 1.05), p=1.0), | |||
| ], p=0.66) | |||
| train_dataset = KittiDataset(configs, mode='train', lidar_aug=train_lidar_aug, hflip_prob=configs.hflip_prob, | |||
| num_samples=configs.num_samples) | |||
| train_sampler = None | |||
| if configs.distributed: | |||
| train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) | |||
| train_dataloader = DataLoader(train_dataset, batch_size=configs.batch_size, shuffle=(train_sampler is None), | |||
| pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=train_sampler) | |||
| return train_dataloader, train_sampler | |||
| def create_val_dataloader(configs): | |||
| """Create dataloader for validation""" | |||
| val_sampler = None | |||
| val_dataset = KittiDataset(configs, mode='val', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples) | |||
| if configs.distributed: | |||
| val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False) | |||
| val_dataloader = DataLoader(val_dataset, batch_size=configs.batch_size, shuffle=False, | |||
| pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=val_sampler) | |||
| return val_dataloader | |||
| def create_test_dataloader(configs): | |||
| """Create dataloader for testing phase""" | |||
| test_dataset = KittiDataset(configs, mode='test', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples) | |||
| test_sampler = None | |||
| if configs.distributed: | |||
| test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) | |||
| test_dataloader = DataLoader(test_dataset, batch_size=configs.batch_size, shuffle=False, | |||
| pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=test_sampler) | |||
| return test_dataloader | |||
| @@ -0,0 +1,335 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: This script for the KITTI dataset | |||
| """ | |||
| import sys | |||
| import os | |||
| import math | |||
| from builtins import int | |||
| import numpy as np | |||
| from torch.utils.data import Dataset | |||
| import cv2 | |||
| import torch | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from data_process.kitti_data_utils import gen_hm_radius, compute_radius, Calibration, get_filtered_lidar | |||
| from data_process.kitti_bev_utils import makeBEVMap, drawRotatedBox, get_corners | |||
| from data_process import transformation | |||
| import config.kitti_config as cnf | |||
| class KittiDataset(Dataset): | |||
| def __init__(self, configs, mode='train', lidar_aug=None, hflip_prob=None, num_samples=None): | |||
| self.dataset_dir = configs.dataset_dir | |||
| self.input_size = configs.input_size | |||
| self.hm_size = configs.hm_size | |||
| self.num_classes = configs.num_classes | |||
| self.max_objects = configs.max_objects | |||
| assert mode in ['train', 'val', 'test'], 'Invalid mode: {}'.format(mode) | |||
| self.mode = mode | |||
| self.is_test = (self.mode == 'test') | |||
| # sub_folder = 'testing' if self.is_test else 'training' | |||
| self.lidar_aug = lidar_aug | |||
| self.hflip_prob = hflip_prob | |||
| if mode == 'val': | |||
| self.val_data_url = configs.val_data_url | |||
| self.lidar_dir = os.path.join(self.val_data_url, "velodyne") | |||
| self.calib_dir = os.path.join(self.val_data_url, "calib") | |||
| self.label_dir = os.path.join(self.val_data_url, "label_2") | |||
| # self.image_dir = os.path.join(self.dataset_dir, sub_folder, "image_2") | |||
| else: | |||
| self.lidar_dir = os.path.join(self.dataset_dir, "velodyne") | |||
| self.calib_dir = os.path.join(self.dataset_dir, "calib") | |||
| self.label_dir = os.path.join(self.dataset_dir, "label_2") | |||
| # split_txt_path = os.path.join('../dataset/apollo/', 'ImageSets', '{}.txt'.format(mode)) | |||
| sample_list = [] | |||
| sample_files = os.listdir(self.lidar_dir) | |||
| for bin_file in sample_files: | |||
| bin_name = bin_file.split('.')[0] | |||
| sample_list.append(bin_name) | |||
| self.sample_id_list = sample_list | |||
| if num_samples is not None: | |||
| self.sample_id_list = self.sample_id_list[:num_samples] | |||
| self.num_samples = len(self.sample_id_list) | |||
| def __len__(self): | |||
| return len(self.sample_id_list) | |||
| def __getitem__(self, index): | |||
| if self.is_test: | |||
| return self.load_img_only(index) | |||
| else: | |||
| return self.load_img_with_targets(index) | |||
| def load_img_only(self, index): | |||
| """Load only image for the testing phase""" | |||
| sample_id = self.sample_id_list[index] | |||
| # print(sample_id) | |||
| # img_path, img_rgb = self.get_image(sample_id) | |||
| lidarData = self.get_lidar(sample_id) | |||
| lidarData = get_filtered_lidar(lidarData, cnf.boundary) | |||
| bev_map = makeBEVMap(lidarData, cnf.boundary) | |||
| bev_map = torch.from_numpy(bev_map) | |||
| bev_path = os.path.join(self.lidar_dir, '{}.png'.format(sample_id)) | |||
| metadatas = { | |||
| 'bev_path': bev_path, | |||
| } | |||
| # return metadatas, bev_map, img_rgb | |||
| return bev_map,metadatas | |||
| def load_img_with_targets(self, index): | |||
| """Load images and targets for the training and validation phase""" | |||
| sample_id = self.sample_id_list[index] | |||
| # img_path = os.path.join(self.image_dir, '{}.png'.format(sample_id)) | |||
| lidarData = self.get_lidar(sample_id) | |||
| # calib = self.get_calib(sample_id) | |||
| labels, has_labels = self.get_label(sample_id) | |||
| # if has_labels: | |||
| # labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||
| if self.lidar_aug: | |||
| lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:]) | |||
| lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels) | |||
| bev_map = makeBEVMap(lidarData, cnf.boundary) | |||
| bev_map = torch.from_numpy(bev_map) | |||
| hflipped = False | |||
| if np.random.random() < self.hflip_prob: | |||
| hflipped = True | |||
| # C, H, W | |||
| bev_map = torch.flip(bev_map, [-1]) | |||
| targets = self.build_targets(labels, hflipped) | |||
| # metadatas = { | |||
| # 'img_path': img_path, | |||
| # 'hflipped': hflipped | |||
| # } | |||
| # return metadatas, bev_map, targets | |||
| return bev_map, targets | |||
| def get_image(self, idx): | |||
| img_path = os.path.join(self.image_dir, '{}.png'.format(idx)) | |||
| img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB) | |||
| return img_path, img | |||
| def get_calib(self, idx): | |||
| calib_file = os.path.join(self.calib_dir, '{}.txt'.format(idx)) | |||
| # assert os.path.isfile(calib_file) | |||
| return Calibration(calib_file) | |||
| def get_lidar(self, idx): | |||
| lidar_file = os.path.join(self.lidar_dir, '{}.bin'.format(idx)) | |||
| # assert os.path.isfile(lidar_file) | |||
| return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) | |||
| def get_label(self, idx): | |||
| labels = [] | |||
| label_path = os.path.join(self.label_dir, '{}.txt'.format(idx)) | |||
| for line in open(label_path, 'r'): | |||
| line = line.rstrip() | |||
| line_parts = line.split(' ') | |||
| obj_name = line_parts[0] # 'Car', 'Pedestrian', ... | |||
| cat_id = int(cnf.CLASS_NAME_TO_ID[obj_name]) | |||
| if cat_id <= -99: # ignore Tram and Misc | |||
| continue | |||
| truncated = int(float(line_parts[1])) # truncated pixel ratio [0..1] | |||
| occluded = int(line_parts[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown | |||
| alpha = float(line_parts[3]) # object observation angle [-pi..pi] | |||
| # xmin, ymin, xmax, ymax | |||
| # bbox = np.array([float(line_parts[4]), float(line_parts[5]), float(line_parts[6]), float(line_parts[7])]) | |||
| # height, width, length (h, w, l) | |||
| h, w, l = float(line_parts[8]), float(line_parts[9]), float(line_parts[10]) | |||
| # location (x,y,z) in camera coord. | |||
| x, y, z = float(line_parts[11]), float(line_parts[12]), float(line_parts[13]) | |||
| ry = float(line_parts[14]) # yaw angle (around Y-axis in camera coordinates) [-pi..pi] | |||
| object_label = [cat_id, x, y, z, h, w, l, ry] | |||
| labels.append(object_label) | |||
| if len(labels) == 0: | |||
| labels = np.zeros((1, 8), dtype=np.float32) | |||
| has_labels = False | |||
| else: | |||
| labels = np.array(labels, dtype=np.float32) | |||
| has_labels = True | |||
| return labels, has_labels | |||
| def build_targets(self, labels, hflipped): | |||
| minX = cnf.boundary['minX'] | |||
| maxX = cnf.boundary['maxX'] | |||
| minY = cnf.boundary['minY'] | |||
| maxY = cnf.boundary['maxY'] | |||
| minZ = cnf.boundary['minZ'] | |||
| maxZ = cnf.boundary['maxZ'] | |||
| num_objects = min(len(labels), self.max_objects) | |||
| hm_l, hm_w = self.hm_size | |||
| hm_main_center = np.zeros((self.num_classes, hm_l, hm_w), dtype=np.float32) | |||
| cen_offset = np.zeros((self.max_objects, 2), dtype=np.float32) | |||
| direction = np.zeros((self.max_objects, 2), dtype=np.float32) | |||
| z_coor = np.zeros((self.max_objects, 1), dtype=np.float32) | |||
| dimension = np.zeros((self.max_objects, 3), dtype=np.float32) | |||
| indices_center = np.zeros((self.max_objects), dtype=np.int64) | |||
| obj_mask = np.zeros((self.max_objects), dtype=np.uint8) | |||
| for k in range(num_objects): | |||
| cls_id, x, y, z, h, w, l, yaw = labels[k] | |||
| cls_id = int(cls_id) | |||
| # Invert yaw angle | |||
| yaw = -yaw | |||
| if not ((minX <= x <= maxX) and (minY <= y <= maxY) and (minZ <= z <= maxZ)): | |||
| continue | |||
| if (h <= 0) or (w <= 0) or (l <= 0): | |||
| continue | |||
| bbox_l = l / cnf.bound_size_x * hm_l | |||
| bbox_w = w / cnf.bound_size_y * hm_w | |||
| radius = compute_radius((math.ceil(bbox_l), math.ceil(bbox_w))) | |||
| radius = max(0, int(radius)) | |||
| center_y = (x - minX) / cnf.bound_size_x * hm_l # x --> y (invert to 2D image space) | |||
| center_x = (y - minY) / cnf.bound_size_y * hm_w # y --> x | |||
| center = np.array([center_x, center_y], dtype=np.float32) | |||
| if hflipped: | |||
| center[0] = hm_w - center[0] - 1 | |||
| center_int = center.astype(np.int32) | |||
| if cls_id < 0: | |||
| ignore_ids = [_ for _ in range(self.num_classes)] if cls_id == - 1 else [- cls_id - 2] | |||
| # Consider to make mask ignore | |||
| for cls_ig in ignore_ids: | |||
| gen_hm_radius(hm_main_center[cls_ig], center_int, radius) | |||
| hm_main_center[ignore_ids, center_int[1], center_int[0]] = 0.9999 | |||
| continue | |||
| # Generate heatmaps for main center | |||
| gen_hm_radius(hm_main_center[cls_id], center, radius) | |||
| # Index of the center | |||
| indices_center[k] = center_int[1] * hm_w + center_int[0] | |||
| # targets for center offset | |||
| cen_offset[k] = center - center_int | |||
| # targets for dimension | |||
| dimension[k, 0] = h | |||
| dimension[k, 1] = w | |||
| dimension[k, 2] = l | |||
| # targets for direction | |||
| direction[k, 0] = math.sin(float(yaw)) # im | |||
| direction[k, 1] = math.cos(float(yaw)) # re | |||
| # im -->> -im | |||
| if hflipped: | |||
| direction[k, 0] = - direction[k, 0] | |||
| # targets for depth | |||
| z_coor[k] = z - minZ | |||
| # Generate object masks | |||
| obj_mask[k] = 1 | |||
| targets = { | |||
| 'hm_cen': hm_main_center, | |||
| 'cen_offset': cen_offset, | |||
| 'direction': direction, | |||
| 'z_coor': z_coor, | |||
| 'dim': dimension, | |||
| 'indices_center': indices_center, | |||
| 'obj_mask': obj_mask, | |||
| } | |||
| return targets | |||
| def draw_img_with_label(self, index): | |||
| sample_id = self.sample_id_list[index] | |||
| lidar_path = os.path.join(self.lidar_dir, '{}.bin'.format(sample_id)) | |||
| lidarData = self.get_lidar(sample_id) | |||
| calib = self.get_calib(sample_id) | |||
| labels, has_labels = self.get_label(sample_id) | |||
| print(lidar_path) | |||
| if has_labels: | |||
| labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||
| if self.lidar_aug: | |||
| lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:]) | |||
| lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels) | |||
| bev_map = makeBEVMap(lidarData, cnf.boundary) | |||
| print(labels) | |||
| return bev_map, labels, lidar_path | |||
| if __name__ == '__main__': | |||
| from easydict import EasyDict as edict | |||
| from data_process.transformation import OneOf, Random_Scaling, Random_Rotation, lidar_to_camera_box | |||
| from utils.visualization_utils import merge_rgb_to_bev, show_rgb_image_with_boxes | |||
| configs = edict() | |||
| configs.distributed = False # For testing | |||
| configs.pin_memory = False | |||
| configs.num_samples = None | |||
| configs.input_size = (1216, 608) | |||
| configs.hm_size = (304, 152) | |||
| configs.max_objects = 50 | |||
| configs.num_classes = 3 | |||
| configs.output_width = 608 | |||
| # configs.dataset_dir = os.path.join('../../', 'dataset', 'kitti') | |||
| # lidar_aug = OneOf([ | |||
| # Random_Rotation(limit_angle=np.pi / 4, p=1.), | |||
| # Random_Scaling(scaling_range=(0.95, 1.05), p=1.), | |||
| # ], p=1.) | |||
| lidar_aug = None | |||
| dataset = KittiDataset(configs, mode='val', lidar_aug=lidar_aug, hflip_prob=0., num_samples=configs.num_samples) | |||
| print('\n\nPress n to see the next sample >>> Press Esc to quit...') | |||
| for idx in range(len(dataset)): | |||
| bev_map, labels, lidar_path = dataset.draw_img_with_label(idx) | |||
| calib = Calibration(lidar_path.replace(".bin", ".txt").replace("velodyne", "calib")) | |||
| bev_map = (bev_map.transpose(1, 2, 0) * 255).astype(np.uint8) | |||
| # bev_map = cv2.resize(bev_map, (cnf.BEV_HEIGHT, cnf.BEV_WIDTH)) | |||
| print(bev_map.shape) | |||
| for box_idx, (cls_id, x, y, z, h, w, l, yaw) in enumerate(labels): | |||
| # Draw rotated box | |||
| yaw = -yaw | |||
| y1 = int((x - cnf.boundary['minX']) / cnf.DISCRETIZATION) | |||
| x1 = int((y - cnf.boundary['minY']) / cnf.DISCRETIZATION) | |||
| w1 = int(w / cnf.DISCRETIZATION) | |||
| l1 = int(l / cnf.DISCRETIZATION) | |||
| drawRotatedBox(bev_map, x1, y1, w1, l1, yaw, cnf.colors[int(cls_id)]) | |||
| # Rotate the bev_map | |||
| bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) | |||
| # labels[:, 1:] = lidar_to_camera_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2) | |||
| cv2.imshow('bev_map', bev_map) | |||
| if cv2.waitKey(0) & 0xff == 27: | |||
| break | |||
| @@ -0,0 +1,426 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Refer: https://github.com/ghimiredhikura/Complex-YOLOv3 | |||
| # Source : https://github.com/jeasinema/VoxelNet-tensorflow/blob/master/utils/utils.py | |||
| """ | |||
| import os | |||
| import sys | |||
| import math | |||
| import numpy as np | |||
| import torch | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from config import kitti_config as cnf | |||
| def angle_in_limit(angle): | |||
| # To limit the angle in -pi/2 - pi/2 | |||
| limit_degree = 5 | |||
| while angle >= np.pi / 2: | |||
| angle -= np.pi | |||
| while angle < -np.pi / 2: | |||
| angle += np.pi | |||
| if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi: | |||
| angle = np.pi / 2 | |||
| return angle | |||
| def camera_to_lidar(x, y, z, V2C=None, R0=None, P2=None): | |||
| p = np.array([x, y, z, 1]) | |||
| if V2C is None or R0 is None: | |||
| p = np.matmul(cnf.R0_inv, p) | |||
| p = np.matmul(cnf.Tr_velo_to_cam_inv, p) | |||
| else: | |||
| R0_i = np.zeros((4, 4)) | |||
| R0_i[:3, :3] = R0 | |||
| R0_i[3, 3] = 1 | |||
| p = np.matmul(np.linalg.inv(R0_i), p) | |||
| p = np.matmul(inverse_rigid_trans(V2C), p) | |||
| p = p[0:3] | |||
| return tuple(p) | |||
| def lidar_to_camera(x, y, z, V2C=None, R0=None, P2=None): | |||
| p = np.array([x, y, z, 1]) | |||
| if V2C is None or R0 is None: | |||
| p = np.matmul(cnf.Tr_velo_to_cam, p) | |||
| p = np.matmul(cnf.R0, p) | |||
| else: | |||
| p = np.matmul(V2C, p) | |||
| p = np.matmul(R0, p) | |||
| p = p[0:3] | |||
| return tuple(p) | |||
| def camera_to_lidar_point(points): | |||
| # (N, 3) -> (N, 3) | |||
| N = points.shape[0] | |||
| points = np.hstack([points, np.ones((N, 1))]).T # (N,4) -> (4,N) | |||
| points = np.matmul(cnf.R0_inv, points) | |||
| points = np.matmul(cnf.Tr_velo_to_cam_inv, points).T # (4, N) -> (N, 4) | |||
| points = points[:, 0:3] | |||
| return points.reshape(-1, 3) | |||
| def lidar_to_camera_point(points, V2C=None, R0=None): | |||
| # (N, 3) -> (N, 3) | |||
| N = points.shape[0] | |||
| points = np.hstack([points, np.ones((N, 1))]).T | |||
| if V2C is None or R0 is None: | |||
| points = np.matmul(cnf.Tr_velo_to_cam, points) | |||
| points = np.matmul(cnf.R0, points).T | |||
| else: | |||
| points = np.matmul(V2C, points) | |||
| points = np.matmul(R0, points).T | |||
| points = points[:, 0:3] | |||
| return points.reshape(-1, 3) | |||
| def camera_to_lidar_box(boxes, V2C=None, R0=None, P2=None): | |||
| # (N, 7) -> (N, 7) x,y,z,h,w,l,r | |||
| ret = [] | |||
| for box in boxes: | |||
| x, y, z, h, w, l, ry = box | |||
| # print(x, y, z, h, w, l, ry) | |||
| (x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -ry - np.pi / 2 | |||
| # print(x, y, z, h, w, l, ry) | |||
| # print("camera_to_lidar") | |||
| # rz = angle_in_limit(rz) | |||
| ret.append([x, y, z, h, w, l, rz]) | |||
| return np.array(ret).reshape(-1, 7) | |||
| def lidar_to_camera_box(boxes, V2C=None, R0=None, P2=None): | |||
| # (N, 7) -> (N, 7) x,y,z,h,w,l,r | |||
| ret = [] | |||
| for box in boxes: | |||
| x, y, z, h, w, l, rz = box | |||
| # (x, y, z), h, w, l, ry = lidar_to_camera(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -rz - np.pi / 2 | |||
| # ry = angle_in_limit(ry) | |||
| ry = -rz - np.pi / 2 | |||
| ret.append([x, y, z, h, w, l, ry]) | |||
| return np.array(ret).reshape(-1, 7) | |||
| def center_to_corner_box2d(boxes_center, coordinate='lidar'): | |||
| # (N, 5) -> (N, 4, 2) | |||
| N = boxes_center.shape[0] | |||
| boxes3d_center = np.zeros((N, 7)) | |||
| boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center | |||
| boxes3d_corner = center_to_corner_box3d(boxes3d_center, coordinate=coordinate) | |||
| return boxes3d_corner[:, 0:4, 0:2] | |||
| def center_to_corner_box3d(boxes_center, coordinate='lidar'): | |||
| # (N, 7) -> (N, 8, 3) | |||
| N = boxes_center.shape[0] | |||
| ret = np.zeros((N, 8, 3), dtype=np.float32) | |||
| if coordinate == 'camera': | |||
| boxes_center = camera_to_lidar_box(boxes_center) | |||
| for i in range(N): | |||
| box = boxes_center[i] | |||
| translation = box[0:3] | |||
| size = box[3:6] | |||
| rotation = [0, 0, box[-1]] | |||
| h, w, l = size[0], size[1], size[2] | |||
| trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet | |||
| [-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \ | |||
| [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \ | |||
| [0, 0, 0, 0, h, h, h, h]]) | |||
| # re-create 3D bounding box in velodyne coordinate system | |||
| yaw = rotation[2] | |||
| rotMat = np.array([ | |||
| [np.cos(yaw), -np.sin(yaw), 0.0], | |||
| [np.sin(yaw), np.cos(yaw), 0.0], | |||
| [0.0, 0.0, 1.0]]) | |||
| cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8, 1)).T | |||
| box3d = cornerPosInVelo.transpose() | |||
| ret[i] = box3d | |||
| if coordinate == 'camera': | |||
| for idx in range(len(ret)): | |||
| ret[idx] = lidar_to_camera_point(ret[idx]) | |||
| return ret | |||
| CORNER2CENTER_AVG = True | |||
| def corner_to_center_box3d(boxes_corner, coordinate='camera'): | |||
| # (N, 8, 3) -> (N, 7) x,y,z,h,w,l,ry/z | |||
| if coordinate == 'lidar': | |||
| for idx in range(len(boxes_corner)): | |||
| boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx]) | |||
| ret = [] | |||
| for roi in boxes_corner: | |||
| if CORNER2CENTER_AVG: # average version | |||
| roi = np.array(roi) | |||
| h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4) | |||
| w = np.sum( | |||
| np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) | |||
| ) / 4 | |||
| l = np.sum( | |||
| np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) | |||
| ) / 4 | |||
| x = np.sum(roi[:, 0], axis=0) / 8 | |||
| y = np.sum(roi[0:4, 1], axis=0) / 4 | |||
| z = np.sum(roi[:, 2], axis=0) / 8 | |||
| ry = np.sum( | |||
| math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + | |||
| math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + | |||
| math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + | |||
| math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + | |||
| math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + | |||
| math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + | |||
| math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + | |||
| math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) | |||
| ) / 8 | |||
| if w > l: | |||
| w, l = l, w | |||
| ry = ry - np.pi / 2 | |||
| elif l > w: | |||
| l, w = w, l | |||
| ry = ry - np.pi / 2 | |||
| ret.append([x, y, z, h, w, l, ry]) | |||
| else: # max version | |||
| h = max(abs(roi[:4, 1] - roi[4:, 1])) | |||
| w = np.max( | |||
| np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2)) | |||
| ) | |||
| l = np.max( | |||
| np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) + | |||
| np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2)) | |||
| ) | |||
| x = np.sum(roi[:, 0], axis=0) / 8 | |||
| y = np.sum(roi[0:4, 1], axis=0) / 4 | |||
| z = np.sum(roi[:, 2], axis=0) / 8 | |||
| ry = np.sum( | |||
| math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + | |||
| math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + | |||
| math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + | |||
| math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + | |||
| math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + | |||
| math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + | |||
| math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + | |||
| math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) | |||
| ) / 8 | |||
| if w > l: | |||
| w, l = l, w | |||
| ry = angle_in_limit(ry + np.pi / 2) | |||
| ret.append([x, y, z, h, w, l, ry]) | |||
| if coordinate == 'lidar': | |||
| ret = camera_to_lidar_box(np.array(ret)) | |||
| return np.array(ret) | |||
| def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0): | |||
| # Input: | |||
| # points: (N, 3) | |||
| # rx/y/z: in radians | |||
| # Output: | |||
| # points: (N, 3) | |||
| N = points.shape[0] | |||
| points = np.hstack([points, np.ones((N, 1))]) | |||
| mat1 = np.eye(4) | |||
| mat1[3, 0:3] = tx, ty, tz | |||
| points = np.matmul(points, mat1) | |||
| if rx != 0: | |||
| mat = np.zeros((4, 4)) | |||
| mat[0, 0] = 1 | |||
| mat[3, 3] = 1 | |||
| mat[1, 1] = np.cos(rx) | |||
| mat[1, 2] = -np.sin(rx) | |||
| mat[2, 1] = np.sin(rx) | |||
| mat[2, 2] = np.cos(rx) | |||
| points = np.matmul(points, mat) | |||
| if ry != 0: | |||
| mat = np.zeros((4, 4)) | |||
| mat[1, 1] = 1 | |||
| mat[3, 3] = 1 | |||
| mat[0, 0] = np.cos(ry) | |||
| mat[0, 2] = np.sin(ry) | |||
| mat[2, 0] = -np.sin(ry) | |||
| mat[2, 2] = np.cos(ry) | |||
| points = np.matmul(points, mat) | |||
| if rz != 0: | |||
| mat = np.zeros((4, 4)) | |||
| mat[2, 2] = 1 | |||
| mat[3, 3] = 1 | |||
| mat[0, 0] = np.cos(rz) | |||
| mat[0, 1] = -np.sin(rz) | |||
| mat[1, 0] = np.sin(rz) | |||
| mat[1, 1] = np.cos(rz) | |||
| points = np.matmul(points, mat) | |||
| return points[:, 0:3] | |||
| def box_transform(boxes, tx, ty, tz, r=0, coordinate='lidar'): | |||
| # Input: | |||
| # boxes: (N, 7) x y z h w l rz/y | |||
| # Output: | |||
| # boxes: (N, 7) x y z h w l rz/y | |||
| boxes_corner = center_to_corner_box3d(boxes, coordinate=coordinate) # (N, 8, 3) | |||
| for idx in range(len(boxes_corner)): | |||
| if coordinate == 'lidar': | |||
| boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r) | |||
| else: | |||
| boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, ry=r) | |||
| return corner_to_center_box3d(boxes_corner, coordinate=coordinate) | |||
| def inverse_rigid_trans(Tr): | |||
| ''' Inverse a rigid body transform matrix (3x4 as [R|t]) | |||
| [R'|-R't; 0|1] | |||
| ''' | |||
| inv_Tr = np.zeros_like(Tr) # 3x4 | |||
| inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3]) | |||
| inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3]) | |||
| return inv_Tr | |||
| class Compose(object): | |||
| def __init__(self, transforms, p=1.0): | |||
| self.transforms = transforms | |||
| self.p = p | |||
| def __call__(self, lidar, labels): | |||
| if np.random.random() <= self.p: | |||
| for t in self.transforms: | |||
| lidar, labels = t(lidar, labels) | |||
| return lidar, labels | |||
| class OneOf(object): | |||
| def __init__(self, transforms, p=1.0): | |||
| self.transforms = transforms | |||
| self.p = p | |||
| def __call__(self, lidar, labels): | |||
| if np.random.random() <= self.p: | |||
| choice = np.random.randint(low=0, high=len(self.transforms)) | |||
| lidar, labels = self.transforms[choice](lidar, labels) | |||
| return lidar, labels | |||
| class Random_Rotation(object): | |||
| def __init__(self, limit_angle=np.pi / 4, p=0.5): | |||
| self.limit_angle = limit_angle | |||
| self.p = p | |||
| def __call__(self, lidar, labels): | |||
| """ | |||
| :param labels: # (N', 7) x, y, z, h, w, l, r | |||
| :return: | |||
| """ | |||
| if np.random.random() <= self.p: | |||
| angle = np.random.uniform(-self.limit_angle, self.limit_angle) | |||
| lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle) | |||
| labels = box_transform(labels, 0, 0, 0, r=angle, coordinate='lidar') | |||
| return lidar, labels | |||
| class Random_Scaling(object): | |||
| def __init__(self, scaling_range=(0.95, 1.05), p=0.5): | |||
| self.scaling_range = scaling_range | |||
| self.p = p | |||
| def __call__(self, lidar, labels): | |||
| """ | |||
| :param labels: # (N', 7) x, y, z, h, w, l, r | |||
| :return: | |||
| """ | |||
| if np.random.random() <= self.p: | |||
| factor = np.random.uniform(self.scaling_range[0], self.scaling_range[0]) | |||
| lidar[:, 0:3] = lidar[:, 0:3] * factor | |||
| labels[:, 0:6] = labels[:, 0:6] * factor | |||
| return lidar, labels | |||
| class Cutout(object): | |||
| """Randomly mask out one or more patches from an image. | |||
| Args: | |||
| n_holes (int): Number of patches to cut out of each image. | |||
| length (int): The length (in pixels) of each square patch. | |||
| Refer from: https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py | |||
| """ | |||
| def __init__(self, n_holes, ratio, fill_value=0., p=1.0): | |||
| self.n_holes = n_holes | |||
| self.ratio = ratio | |||
| assert 0. <= fill_value <= 1., "the fill value is in a range of 0 to 1" | |||
| self.fill_value = fill_value | |||
| self.p = p | |||
| def __call__(self, img, targets): | |||
| """ | |||
| Args: | |||
| img (Tensor): Tensor image of size (C, H, W). | |||
| Returns: | |||
| Tensor: Image with n_holes of dimension length x length cut out of it. | |||
| """ | |||
| if np.random.random() <= self.p: | |||
| h = img.size(1) | |||
| w = img.size(2) | |||
| h_cutout = int(self.ratio * h) | |||
| w_cutout = int(self.ratio * w) | |||
| for n in range(self.n_holes): | |||
| y = np.random.randint(h) | |||
| x = np.random.randint(w) | |||
| y1 = np.clip(y - h_cutout // 2, 0, h) | |||
| y2 = np.clip(y + h_cutout // 2, 0, h) | |||
| x1 = np.clip(x - w_cutout // 2, 0, w) | |||
| x2 = np.clip(x + w_cutout // 2, 0, w) | |||
| img[:, y1: y2, x1: x2] = self.fill_value # Zero out the selected area | |||
| # Remove targets that are in the selected area | |||
| keep_target = [] | |||
| for target_idx, target in enumerate(targets): | |||
| _, _, target_x, target_y, target_w, target_l, _, _ = target | |||
| if (x1 <= target_x * w <= x2) and (y1 <= target_y * h <= y2): | |||
| continue | |||
| keep_target.append(target_idx) | |||
| targets = targets[keep_target] | |||
| return img, targets | |||
| @@ -0,0 +1,378 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: Testing script | |||
| """ | |||
| import argparse | |||
| import sys | |||
| import os | |||
| import time | |||
| import warnings | |||
| warnings.filterwarnings("ignore", category=UserWarning) | |||
| from easydict import EasyDict as edict | |||
| import cv2 | |||
| import torch | |||
| import numpy as np | |||
| import torch.nn.functional as F | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from data_process.kitti_dataloader import create_test_dataloader | |||
| from models.model_utils import create_model | |||
| import config.kitti_config as cnf | |||
| def parse_test_configs(): | |||
| parser = argparse.ArgumentParser(description='Testing config for the Implementation') | |||
| parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||
| help='The name using for saving logs, models,...') | |||
| parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||
| help='The name of the model architecture') | |||
| parser.add_argument('--model_dir', type=str, | |||
| default='/train_out_model/', metavar='PATH', | |||
| help='the path of the pretrained checkpoint') | |||
| parser.add_argument('--K', type=int, default=50, | |||
| help='the number of top K') | |||
| parser.add_argument('--no_cuda', default= False, | |||
| help='If true, cuda is not used.') | |||
| parser.add_argument('--gpu_idx', default=0, type=int, | |||
| help='GPU index to use.') | |||
| parser.add_argument('--num_samples', type=int, default=None, | |||
| help='Take a subset of the dataset to run and debug') | |||
| parser.add_argument('--num_workers', type=int, default=1, | |||
| help='Number of threads for loading data') | |||
| parser.add_argument('--batch_size', type=int, default=1, | |||
| help='mini-batch size (default: 4)') | |||
| parser.add_argument('--peak_thresh', type=float, default=0.2) | |||
| parser.add_argument('--dataset_dir', type=str,default='/dataset_dir/', | |||
| help='If true, the output image of the testing phase will be saved') | |||
| parser.add_argument('--results_dir', type=str,default='/results_dir/', | |||
| help='If true, the output image of the testing phase will be saved') | |||
| parser.add_argument('--save_test_output', type=bool, default=True, | |||
| help='save the test output or not') | |||
| parser.add_argument('--output_format', type=str, default='txt', metavar='PATH', | |||
| help='the type of the test output (support image, video or none)') | |||
| parser.add_argument('--output_video_fn', type=str, default='out_fpn_resnet_18', metavar='PATH', | |||
| help='the video filename if the output format is video') | |||
| parser.add_argument('--output-width', type=int, default=608, | |||
| help='the width of showing output, the height maybe vary') | |||
| configs = edict(vars(parser.parse_args())) | |||
| configs.pin_memory = True | |||
| configs.distributed = False # For testing on 1 GPU only | |||
| configs.input_size = (1216, 608) | |||
| configs.hm_size = (304, 152) | |||
| configs.down_ratio = 4 | |||
| configs.max_objects = 50 | |||
| configs.imagenet_pretrained = False | |||
| configs.head_conv = 64 | |||
| configs.num_classes = 3 | |||
| configs.num_center_offset = 2 | |||
| configs.num_z = 1 | |||
| configs.num_dim = 3 | |||
| configs.num_direction = 2 # sin, cos | |||
| configs.heads = { | |||
| 'hm_cen': configs.num_classes, | |||
| 'cen_offset': configs.num_center_offset, | |||
| 'direction': configs.num_direction, | |||
| 'z_coor': configs.num_z, | |||
| 'dim': configs.num_dim | |||
| } | |||
| configs.num_input_features = 4 | |||
| #################################################################### | |||
| ##############Dataset, Checkpoints, and results dir configs######### | |||
| #################################################################### | |||
| configs.root_dir = '../' | |||
| # configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'apollo') | |||
| # configs.results_dir_img = os.path.join(configs.results_dir, configs.saved_fn, 'image') | |||
| # configs.results_dir_txt = os.path.join(configs.results_dir, configs.saved_fn, 'txt') | |||
| # make_folder(configs.results_dir_img) | |||
| # make_folder(configs.results_dir_txt) | |||
| make_folder(configs.results_dir) | |||
| return configs | |||
| def _sigmoid(x): | |||
| return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) | |||
| def time_synchronized(): | |||
| torch.cuda.synchronize() if torch.cuda.is_available() else None | |||
| return time.time() | |||
| def make_folder(folder_name): | |||
| if not os.path.exists(folder_name): | |||
| os.makedirs(folder_name) | |||
| def drawRotatedBox(img, x, y, w, l, yaw, color): | |||
| bev_corners = get_corners(x, y, w, l, yaw) | |||
| corners_int = bev_corners.reshape(-1, 1, 2).astype(int) | |||
| cv2.polylines(img, [corners_int], True, color, 2) | |||
| corners_int = bev_corners.reshape(-1, 2) | |||
| cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2) | |||
| # bev image coordinates format | |||
| def get_corners(x, y, w, l, yaw): | |||
| bev_corners = np.zeros((4, 2), dtype=np.float32) | |||
| cos_yaw = np.cos(yaw) | |||
| sin_yaw = np.sin(yaw) | |||
| # front left | |||
| bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw | |||
| bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw | |||
| # rear left | |||
| bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw | |||
| bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw | |||
| # rear right | |||
| bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw | |||
| bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw | |||
| # front right | |||
| bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw | |||
| bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw | |||
| return bev_corners | |||
| def _nms(heat, kernel=3): | |||
| pad = (kernel - 1) // 2 | |||
| hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) | |||
| keep = (hmax == heat).float() | |||
| return heat * keep | |||
| def _gather_feat(feat, ind, mask=None): | |||
| dim = feat.size(2) | |||
| ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||
| feat = feat.gather(1, ind) | |||
| if mask is not None: | |||
| mask = mask.unsqueeze(2).expand_as(feat) | |||
| feat = feat[mask] | |||
| feat = feat.view(-1, dim) | |||
| return feat | |||
| def _transpose_and_gather_feat(feat, ind): | |||
| feat = feat.permute(0, 2, 3, 1).contiguous() | |||
| feat = feat.view(feat.size(0), -1, feat.size(3)) | |||
| feat = _gather_feat(feat, ind) | |||
| return feat | |||
| def _topk(scores, K=40): | |||
| batch, cat, height, width = scores.size() | |||
| topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||
| topk_inds = topk_inds % (height * width) | |||
| topk_ys = (torch.floor_divide(topk_inds, width)).float() | |||
| topk_xs = (topk_inds % width).int().float() | |||
| topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) | |||
| topk_clses = (torch.floor_divide(topk_ind, K)).int() | |||
| topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) | |||
| topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) | |||
| topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) | |||
| return topk_score, topk_inds, topk_clses, topk_ys, topk_xs | |||
| def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40): | |||
| batch_size, num_classes, height, width = hm_cen.size() | |||
| hm_cen = _nms(hm_cen) | |||
| scores, inds, clses, ys, xs = _topk(hm_cen, K=K) | |||
| if cen_offset is not None: | |||
| cen_offset = _transpose_and_gather_feat(cen_offset, inds) | |||
| cen_offset = cen_offset.view(batch_size, K, 2) | |||
| xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1] | |||
| ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2] | |||
| else: | |||
| xs = xs.view(batch_size, K, 1) + 0.5 | |||
| ys = ys.view(batch_size, K, 1) + 0.5 | |||
| direction = _transpose_and_gather_feat(direction, inds) | |||
| direction = direction.view(batch_size, K, 2) | |||
| z_coor = _transpose_and_gather_feat(z_coor, inds) | |||
| z_coor = z_coor.view(batch_size, K, 1) | |||
| dim = _transpose_and_gather_feat(dim, inds) | |||
| dim = dim.view(batch_size, K, 3) | |||
| clses = clses.view(batch_size, K, 1).float() | |||
| scores = scores.view(batch_size, K, 1) | |||
| # (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
| # (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
| # detections: [batch_size, K, 10] | |||
| detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2) | |||
| return detections | |||
| def get_yaw(direction): | |||
| return np.arctan2(direction[:, 0:1], direction[:, 1:2]) | |||
| def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2): | |||
| """ | |||
| :param detections: [batch_size, K, 10] | |||
| # (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
| # (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
| :return: | |||
| """ | |||
| # TODO: Need to consider rescale to the original scale: x, y | |||
| ret = [] | |||
| for i in range(detections.shape[0]): | |||
| top_preds = {} | |||
| classes = detections[i, :, -1] | |||
| for j in range(num_classes): | |||
| inds = (classes == j) | |||
| # x, y, z, h, w, l, yaw | |||
| top_preds[j] = np.concatenate([ | |||
| detections[i, inds, 0:1], | |||
| detections[i, inds, 1:2] * down_ratio, | |||
| detections[i, inds, 2:3] * down_ratio, | |||
| detections[i, inds, 3:4], | |||
| detections[i, inds, 4:5], | |||
| detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH, | |||
| detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT, | |||
| get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1) | |||
| # Filter by peak_thresh | |||
| if len(top_preds[j]) > 0: | |||
| keep_inds = (top_preds[j][:, 0] > peak_thresh) | |||
| top_preds[j] = top_preds[j][keep_inds] | |||
| ret.append(top_preds) | |||
| return ret | |||
| def draw_predictions(img, detections, num_classes=3): | |||
| for j in range(num_classes): | |||
| if len(detections[j]) > 0: | |||
| for det in detections[j]: | |||
| # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
| _score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
| drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)]) | |||
| return img | |||
| def convert_det_to_real_values(detections, num_classes=3): | |||
| kitti_dets = [] | |||
| for cls_id in range(num_classes): | |||
| if len(detections[cls_id]) > 0: | |||
| for det in detections[cls_id]: | |||
| # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
| _score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
| _yaw = round(-_yaw/1, 2) | |||
| x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2) | |||
| y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2) | |||
| z = round(_z + cnf.boundary['minZ'], 2) | |||
| w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2) | |||
| l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2) | |||
| h = round(_h/1, 2) | |||
| kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw]) | |||
| return np.array(kitti_dets) | |||
| if __name__ == '__main__': | |||
| print("=".ljust(66, "=")) | |||
| configs = parse_test_configs() | |||
| model = create_model(configs) | |||
| print('\n\n' + '-*=' * 30 + '\n\n') | |||
| # assert os.path.isfile(configs.model_dir), "No file at {}".format(configs.model_dir) | |||
| if os.path.isfile(configs.model_dir): | |||
| model_path = configs.model_dir | |||
| else: | |||
| # for file in os.listdir(configs.model_dir): | |||
| # model_path = os.path.join(configs.model_dir, file) | |||
| # 取最后一个模型 | |||
| model_path = os.path.join(configs.model_dir, os.listdir(configs.model_dir)[-1]) | |||
| print('Loaded weights from {}\n'.format(model_path)) | |||
| # model.load_state_dict(torch.load(model_path)) | |||
| configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) | |||
| model.load_state_dict(torch.load(model_path, map_location=configs.device)) | |||
| model = model.to(device=configs.device) | |||
| out_cap = None | |||
| model.eval() | |||
| test_dataloader = create_test_dataloader(configs) | |||
| with torch.no_grad(): | |||
| for batch_idx, batch_data in enumerate(test_dataloader): | |||
| bev_maps, metadatas = batch_data | |||
| input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float() | |||
| t1 = time_synchronized() | |||
| outputs = model(input_bev_maps) | |||
| outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||
| outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||
| # detections size (batch_size, K, 10) | |||
| detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], | |||
| outputs['dim'], K=configs.K) | |||
| detections = detections.cpu().numpy().astype(np.float32) | |||
| detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) | |||
| t2 = time_synchronized() | |||
| detections = detections[0] # only first batch | |||
| # Draw prediction in the image | |||
| bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8) | |||
| bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) | |||
| bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes) | |||
| # Rotate the bev_map | |||
| bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) | |||
| kitti_dets = convert_det_to_real_values(detections) | |||
| print('\tDone testing the {}th sample, time: {:.1f}ms, speed {:.2f}FPS'.format(batch_idx, (t2 - t1) * 1000, | |||
| 1 / (t2 - t1))) | |||
| if configs.save_test_output: | |||
| img_fn = os.path.basename(metadatas['bev_path'][0])[:-4] | |||
| if configs.output_format == 'image': | |||
| cv2.imwrite(os.path.join(configs.results_dir_img, '{}.jpg'.format(img_fn)), bev_map) | |||
| elif configs.output_format == 'video': | |||
| if out_cap is None: | |||
| out_cap_h, out_cap_w = bev_map.shape[:2] | |||
| fourcc = cv2.VideoWriter_fourcc(*'MJPG') | |||
| out_cap = cv2.VideoWriter( | |||
| os.path.join(configs.results_dir_img, '{}.avi'.format(configs.output_video_fn)), | |||
| fourcc, 30, (out_cap_w, out_cap_h)) | |||
| out_cap.write(bev_map) | |||
| else: | |||
| pass | |||
| txt_path = os.path.join(configs.results_dir,'{}.txt'.format(img_fn)) | |||
| txt_file = open(txt_path, 'w') | |||
| for det in kitti_dets: | |||
| write_line = cnf.CLASS_ID_TO_NAME[det[0]] + ' 0 0 0 0 0 0 0 ' + str(det[1]) + ' ' + str(det[2]) +\ | |||
| ' ' + str(det[3]) + ' ' + str(det[4]) + ' ' + str(det[5]) + ' ' + str(det[6]) + ' ' + str(det[7]) +'\n' | |||
| txt_file.writelines(write_line) | |||
| txt_file.close() | |||
| if out_cap: | |||
| out_cap.release() | |||
| cv2.destroyAllWindows() | |||
| @@ -0,0 +1,163 @@ | |||
| # ------------------------------------------------------------------------------ | |||
| # Portions of this code are from | |||
| # CornerNet (https://github.com/princeton-vl/CornerNet) | |||
| # Copyright (c) 2018, University of Michigan | |||
| # Licensed under the BSD 3-Clause License | |||
| # Modified by Nguyen Mau Dung (2020.08.09) | |||
| # ------------------------------------------------------------------------------ | |||
| import os | |||
| import sys | |||
| import math | |||
| import torch.nn as nn | |||
| import torch | |||
| import torch.nn.functional as F | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from utils.torch_utils import to_cpu, _sigmoid | |||
| def _gather_feat(feat, ind, mask=None): | |||
| dim = feat.size(2) | |||
| ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||
| feat = feat.gather(1, ind) | |||
| if mask is not None: | |||
| mask = mask.unsqueeze(2).expand_as(feat) | |||
| feat = feat[mask] | |||
| feat = feat.view(-1, dim) | |||
| return feat | |||
| def _transpose_and_gather_feat(feat, ind): | |||
| feat = feat.permute(0, 2, 3, 1).contiguous() | |||
| feat = feat.view(feat.size(0), -1, feat.size(3)) | |||
| feat = _gather_feat(feat, ind) | |||
| return feat | |||
| def _neg_loss(pred, gt, alpha=2, beta=4): | |||
| ''' Modified focal loss. Exactly the same as CornerNet. | |||
| Runs faster and costs a little bit more memory | |||
| Arguments: | |||
| pred (batch x c x h x w) | |||
| gt_regr (batch x c x h x w) | |||
| ''' | |||
| pos_inds = gt.eq(1).float() | |||
| neg_inds = gt.lt(1).float() | |||
| neg_weights = torch.pow(1 - gt, beta) | |||
| loss = 0 | |||
| pos_loss = torch.log(pred) * torch.pow(1 - pred, alpha) * pos_inds | |||
| neg_loss = torch.log(1 - pred) * torch.pow(pred, alpha) * neg_weights * neg_inds | |||
| num_pos = pos_inds.float().sum() | |||
| pos_loss = pos_loss.sum() | |||
| neg_loss = neg_loss.sum() | |||
| if num_pos == 0: | |||
| loss = loss - neg_loss | |||
| else: | |||
| loss = loss - (pos_loss + neg_loss) / num_pos | |||
| return loss | |||
| class FocalLoss(nn.Module): | |||
| '''nn.Module warpper for focal loss''' | |||
| def __init__(self): | |||
| super(FocalLoss, self).__init__() | |||
| self.neg_loss = _neg_loss | |||
| def forward(self, out, target): | |||
| return self.neg_loss(out, target) | |||
| class L1Loss(nn.Module): | |||
| def __init__(self): | |||
| super(L1Loss, self).__init__() | |||
| def forward(self, output, mask, ind, target): | |||
| pred = _transpose_and_gather_feat(output, ind) | |||
| mask = mask.unsqueeze(2).expand_as(pred).float() | |||
| loss = F.l1_loss(pred * mask, target * mask, size_average=False) | |||
| loss = loss / (mask.sum() + 1e-4) | |||
| return loss | |||
| class L1Loss_Balanced(nn.Module): | |||
| """Balanced L1 Loss | |||
| paper: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) | |||
| Code refer from: https://github.com/OceanPang/Libra_R-CNN | |||
| """ | |||
| def __init__(self, alpha=0.5, gamma=1.5, beta=1.0): | |||
| super(L1Loss_Balanced, self).__init__() | |||
| self.alpha = alpha | |||
| self.gamma = gamma | |||
| assert beta > 0 | |||
| self.beta = beta | |||
| def forward(self, output, mask, ind, target): | |||
| pred = _transpose_and_gather_feat(output, ind) | |||
| mask = mask.unsqueeze(2).expand_as(pred).float() | |||
| loss = self.balanced_l1_loss(pred * mask, target * mask) | |||
| loss = loss.sum() / (mask.sum() + 1e-4) | |||
| return loss | |||
| def balanced_l1_loss(self, pred, target): | |||
| assert pred.size() == target.size() and target.numel() > 0 | |||
| diff = torch.abs(pred - target) | |||
| b = math.exp(self.gamma / self.alpha) - 1 | |||
| loss = torch.where(diff < self.beta, | |||
| self.alpha / b * (b * diff + 1) * torch.log(b * diff / self.beta + 1) - self.alpha * diff, | |||
| self.gamma * diff + self.gamma / b - self.alpha * self.beta) | |||
| return loss | |||
| class Compute_Loss(nn.Module): | |||
| def __init__(self, device): | |||
| super(Compute_Loss, self).__init__() | |||
| self.device = device | |||
| self.focal_loss = FocalLoss() | |||
| self.l1_loss = L1Loss() | |||
| self.l1_loss_balanced = L1Loss_Balanced(alpha=0.5, gamma=1.5, beta=1.0) | |||
| self.weight_hm_cen = 1. | |||
| self.weight_z_coor, self.weight_cenoff, self.weight_dim, self.weight_direction = 1., 1., 1., 1. | |||
| def forward(self, outputs, tg): | |||
| # tg: targets | |||
| outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||
| outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||
| l_hm_cen = self.focal_loss(outputs['hm_cen'], tg['hm_cen']) | |||
| l_cen_offset = self.l1_loss(outputs['cen_offset'], tg['obj_mask'], tg['indices_center'], tg['cen_offset']) | |||
| l_direction = self.l1_loss(outputs['direction'], tg['obj_mask'], tg['indices_center'], tg['direction']) | |||
| # Apply the L1_loss balanced for z coor and dimension regression | |||
| l_z_coor = self.l1_loss_balanced(outputs['z_coor'], tg['obj_mask'], tg['indices_center'], tg['z_coor']) | |||
| l_dim = self.l1_loss_balanced(outputs['dim'], tg['obj_mask'], tg['indices_center'], tg['dim']) | |||
| total_loss = l_hm_cen * self.weight_hm_cen + l_cen_offset * self.weight_cenoff + \ | |||
| l_dim * self.weight_dim + l_direction * self.weight_direction + \ | |||
| l_z_coor * self.weight_z_coor | |||
| loss_stats = { | |||
| 'total_loss': to_cpu(total_loss).item(), | |||
| 'hm_cen_loss': to_cpu(l_hm_cen).item(), | |||
| 'cen_offset_loss': to_cpu(l_cen_offset).item(), | |||
| 'dim_loss': to_cpu(l_dim).item(), | |||
| 'direction_loss': to_cpu(l_direction).item(), | |||
| 'z_coor_loss': to_cpu(l_z_coor).item(), | |||
| } | |||
| return total_loss, loss_stats | |||
| @@ -0,0 +1,252 @@ | |||
| """ | |||
| # --------------------------------------------------------------------------------- | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Copyright (c) Microsoft | |||
| # Licensed under the MIT License. | |||
| # Written by Bin Xiao (Bin.Xiao@microsoft.com) | |||
| # Modified by Xingyi Zhou | |||
| # Refer from: https://github.com/xingyizhou/CenterNet | |||
| # Modifier: Nguyen Mau Dung (2020.08.09) | |||
| # ------------------------------------------------------------------------------ | |||
| """ | |||
| from __future__ import absolute_import | |||
| from __future__ import division | |||
| from __future__ import print_function | |||
| import os | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.utils.model_zoo as model_zoo | |||
| import torch.nn.functional as F | |||
| BN_MOMENTUM = 0.1 | |||
| model_urls = { | |||
| 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', | |||
| 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', | |||
| 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', | |||
| 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', | |||
| 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', | |||
| } | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| """3x3 convolution with padding""" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = conv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.conv2 = conv3x3(planes, planes) | |||
| self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class Bottleneck(nn.Module): | |||
| expansion = 4 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(Bottleneck, self).__init__() | |||
| self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
| self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) | |||
| self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) | |||
| self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.relu(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class PoseResNet(nn.Module): | |||
| def __init__(self, block, layers, heads, head_conv, **kwargs): | |||
| self.inplanes = 64 | |||
| self.deconv_with_bias = False | |||
| self.heads = heads | |||
| super(PoseResNet, self).__init__() | |||
| self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) | |||
| self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
| self.conv_up_level1 = nn.Conv2d(768, 256, kernel_size=1, stride=1, padding=0) | |||
| self.conv_up_level2 = nn.Conv2d(384, 128, kernel_size=1, stride=1, padding=0) | |||
| self.conv_up_level3 = nn.Conv2d(192, 64, kernel_size=1, stride=1, padding=0) | |||
| fpn_channels = [256, 128, 64] | |||
| for fpn_idx, fpn_c in enumerate(fpn_channels): | |||
| for head in sorted(self.heads): | |||
| num_output = self.heads[head] | |||
| if head_conv > 0: | |||
| fc = nn.Sequential( | |||
| nn.Conv2d(fpn_c, head_conv, kernel_size=3, padding=1, bias=True), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d(head_conv, num_output, kernel_size=1, stride=1, padding=0)) | |||
| else: | |||
| fc = nn.Conv2d(in_channels=fpn_c, out_channels=num_output, kernel_size=1, stride=1, padding=0) | |||
| self.__setattr__('fpn{}_{}'.format(fpn_idx, head), fc) | |||
| def _make_layer(self, block, planes, blocks, stride=1): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks): | |||
| layers.append(block(self.inplanes, planes)) | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| _, _, input_h, input_w = x.size() | |||
| hm_h, hm_w = input_h // 4, input_w // 4 | |||
| x = self.conv1(x) | |||
| x = self.bn1(x) | |||
| x = self.relu(x) | |||
| x = self.maxpool(x) | |||
| out_layer1 = self.layer1(x) | |||
| out_layer2 = self.layer2(out_layer1) | |||
| out_layer3 = self.layer3(out_layer2) | |||
| out_layer4 = self.layer4(out_layer3) | |||
| # up_level1: torch.Size([b, 512, 14, 14]) | |||
| up_level1 = F.interpolate(out_layer4, scale_factor=2, mode='bilinear', align_corners=True) | |||
| concat_level1 = torch.cat((up_level1, out_layer3), dim=1) | |||
| # up_level2: torch.Size([b, 256, 28, 28]) | |||
| up_level2 = F.interpolate(self.conv_up_level1(concat_level1), scale_factor=2, mode='bilinear', | |||
| align_corners=True) | |||
| concat_level2 = torch.cat((up_level2, out_layer2), dim=1) | |||
| # up_level3: torch.Size([b, 128, 56, 56]), | |||
| up_level3 = F.interpolate(self.conv_up_level2(concat_level2), scale_factor=2, mode='bilinear', | |||
| align_corners=True) | |||
| # up_level4: torch.Size([b, 64, 56, 56]) | |||
| up_level4 = self.conv_up_level3(torch.cat((up_level3, out_layer1), dim=1)) | |||
| ret = {} | |||
| for head in self.heads: | |||
| temp_outs = [] | |||
| for fpn_idx, fdn_input in enumerate([up_level2, up_level3, up_level4]): | |||
| fpn_out = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))(fdn_input) | |||
| _, _, fpn_out_h, fpn_out_w = fpn_out.size() | |||
| # Make sure the added features having same size of heatmap output | |||
| if (fpn_out_w != hm_w) or (fpn_out_h != hm_h): | |||
| fpn_out = F.interpolate(fpn_out, size=(hm_h, hm_w)) | |||
| temp_outs.append(fpn_out) | |||
| # Take the softmax in the keypoint feature pyramid network | |||
| final_out = self.apply_kfpn(temp_outs) | |||
| ret[head] = final_out | |||
| return ret | |||
| def apply_kfpn(self, outs): | |||
| outs = torch.cat([out.unsqueeze(-1) for out in outs], dim=-1) | |||
| softmax_outs = F.softmax(outs, dim=-1) | |||
| ret_outs = (outs * softmax_outs).sum(dim=-1) | |||
| return ret_outs | |||
| def init_weights(self, num_layers, pretrained=True): | |||
| if pretrained: | |||
| # TODO: Check initial weights for head later | |||
| for fpn_idx in [0, 1, 2]: # 3 FPN layers | |||
| for head in self.heads: | |||
| final_layer = self.__getattr__('fpn{}_{}'.format(fpn_idx, head)) | |||
| for i, m in enumerate(final_layer.modules()): | |||
| if isinstance(m, nn.Conv2d): | |||
| # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |||
| # print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||
| # print('=> init {}.bias as 0'.format(name)) | |||
| if m.weight.shape[0] == self.heads[head]: | |||
| if 'hm' in head: | |||
| nn.init.constant_(m.bias, -2.19) | |||
| else: | |||
| nn.init.normal_(m.weight, std=0.001) | |||
| nn.init.constant_(m.bias, 0) | |||
| # pretrained_state_dict = torch.load(pretrained) | |||
| url = model_urls['resnet{}'.format(num_layers)] | |||
| pretrained_state_dict = model_zoo.load_url(url) | |||
| print('=> loading pretrained model {}'.format(url)) | |||
| self.load_state_dict(pretrained_state_dict, strict=False) | |||
| resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), | |||
| 34: (BasicBlock, [3, 4, 6, 3]), | |||
| 50: (Bottleneck, [3, 4, 6, 3]), | |||
| 101: (Bottleneck, [3, 4, 23, 3]), | |||
| 152: (Bottleneck, [3, 8, 36, 3])} | |||
| def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): | |||
| block_class, layers = resnet_spec[num_layers] | |||
| model = PoseResNet(block_class, layers, heads, head_conv=head_conv) | |||
| model.init_weights(num_layers, pretrained=imagenet_pretrained) | |||
| return model | |||
| @@ -0,0 +1,134 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.09 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: utils functions that use for model | |||
| """ | |||
| import os | |||
| import sys | |||
| import torch | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from models import resnet, fpn_resnet | |||
| def create_model(configs): | |||
| """Create model based on architecture name""" | |||
| try: | |||
| arch_parts = configs.arch.split('_') | |||
| num_layers = int(arch_parts[-1]) | |||
| except: | |||
| raise ValueError | |||
| if 'fpn_resnet' in configs.arch: | |||
| print('using ResNet architecture with feature pyramid') | |||
| model = fpn_resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv, | |||
| imagenet_pretrained=configs.imagenet_pretrained) | |||
| elif 'resnet' in configs.arch: | |||
| print('using ResNet architecture') | |||
| model = resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv, | |||
| imagenet_pretrained=configs.imagenet_pretrained) | |||
| else: | |||
| assert False, 'Undefined model backbone' | |||
| return model | |||
| def get_num_parameters(model): | |||
| """Count number of trained parameters of the model""" | |||
| if hasattr(model, 'module'): | |||
| num_parameters = sum(p.numel() for p in model.module.parameters() if p.requires_grad) | |||
| else: | |||
| num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) | |||
| return num_parameters | |||
| def make_data_parallel(model, configs): | |||
| if configs.distributed: | |||
| # For multiprocessing distributed, DistributedDataParallel constructor | |||
| # should always set the single device scope, otherwise, | |||
| # DistributedDataParallel will use all available devices. | |||
| if configs.gpu_idx is not None: | |||
| torch.cuda.set_device(configs.gpu_idx) | |||
| model.cuda(configs.gpu_idx) | |||
| # When using a single GPU per process and per | |||
| # DistributedDataParallel, we need to divide the batch size | |||
| # ourselves based on the total number of GPUs we have | |||
| configs.batch_size = int(configs.batch_size / configs.ngpus_per_node) | |||
| configs.num_workers = int((configs.num_workers + configs.ngpus_per_node - 1) / configs.ngpus_per_node) | |||
| model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[configs.gpu_idx]) | |||
| else: | |||
| model.cuda() | |||
| # DistributedDataParallel will divide and allocate batch_size to all | |||
| # available GPUs if device_ids are not set | |||
| model = torch.nn.parallel.DistributedDataParallel(model) | |||
| elif configs.gpu_idx is not None: | |||
| torch.cuda.set_device(configs.gpu_idx) | |||
| model = model.cuda(configs.gpu_idx) | |||
| else: | |||
| # DataParallel will divide and allocate batch_size to all available GPUs | |||
| model = torch.nn.DataParallel(model).cuda() | |||
| return model | |||
| if __name__ == '__main__': | |||
| import argparse | |||
| from torchsummary import summary | |||
| from easydict import EasyDict as edict | |||
| parser = argparse.ArgumentParser(description='RTM3D Implementation') | |||
| parser.add_argument('-a', '--arch', type=str, default='resnet_18', metavar='ARCH', | |||
| help='The name of the model architecture') | |||
| parser.add_argument('--head_conv', type=int, default=-1, | |||
| help='conv layer channels for output head' | |||
| '0 for no conv layer' | |||
| '-1 for default setting: ' | |||
| '64 for resnets and 256 for dla.') | |||
| configs = edict(vars(parser.parse_args())) | |||
| if configs.head_conv == -1: # init default head_conv | |||
| configs.head_conv = 256 if 'dla' in configs.arch else 64 | |||
| configs.num_classes = 3 | |||
| configs.num_vertexes = 8 | |||
| configs.num_center_offset = 2 | |||
| configs.num_vertexes_offset = 2 | |||
| configs.num_dimension = 3 | |||
| configs.num_rot = 8 | |||
| configs.num_depth = 1 | |||
| configs.num_wh = 2 | |||
| configs.heads = { | |||
| 'hm_mc': configs.num_classes, | |||
| 'hm_ver': configs.num_vertexes, | |||
| 'vercoor': configs.num_vertexes * 2, | |||
| 'cenoff': configs.num_center_offset, | |||
| 'veroff': configs.num_vertexes_offset, | |||
| 'dim': configs.num_dimension, | |||
| 'rot': configs.num_rot, | |||
| 'depth': configs.num_depth, | |||
| 'wh': configs.num_wh | |||
| } | |||
| configs.device = torch.device('cuda:1') | |||
| # configs.device = torch.device('cpu') | |||
| model = create_model(configs).to(device=configs.device) | |||
| sample_input = torch.randn((1, 3, 224, 224)).to(device=configs.device) | |||
| # summary(model.cuda(1), (3, 224, 224)) | |||
| output = model(sample_input) | |||
| for hm_name, hm_out in output.items(): | |||
| print('hm_name: {}, hm_out size: {}'.format(hm_name, hm_out.size())) | |||
| print('number of parameters: {}'.format(get_num_parameters(model))) | |||
| @@ -0,0 +1,284 @@ | |||
| """ | |||
| # --------------------------------------------------------------------------------- | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Copyright (c) Microsoft | |||
| # Licensed under the MIT License. | |||
| # Written by Bin Xiao (Bin.Xiao@microsoft.com) | |||
| # Modified by Xingyi Zhou | |||
| # Refer from: https://github.com/xingyizhou/CenterNet | |||
| # Modifier: Nguyen Mau Dung (2020.08.09) | |||
| # ------------------------------------------------------------------------------ | |||
| """ | |||
| from __future__ import absolute_import | |||
| from __future__ import division | |||
| from __future__ import print_function | |||
| import os | |||
| import torch | |||
| import torch.nn as nn | |||
| import torch.utils.model_zoo as model_zoo | |||
| BN_MOMENTUM = 0.1 | |||
| model_urls = { | |||
| 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', | |||
| 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', | |||
| 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', | |||
| 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', | |||
| 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', | |||
| } | |||
| def conv3x3(in_planes, out_planes, stride=1): | |||
| """3x3 convolution with padding""" | |||
| return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| class BasicBlock(nn.Module): | |||
| expansion = 1 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(BasicBlock, self).__init__() | |||
| self.conv1 = conv3x3(inplanes, planes, stride) | |||
| self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.conv2 = conv3x3(planes, planes) | |||
| self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class Bottleneck(nn.Module): | |||
| expansion = 4 | |||
| def __init__(self, inplanes, planes, stride=1, downsample=None): | |||
| super(Bottleneck, self).__init__() | |||
| self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) | |||
| self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, | |||
| padding=1, bias=False) | |||
| self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |||
| self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, | |||
| bias=False) | |||
| self.bn3 = nn.BatchNorm2d(planes * self.expansion, | |||
| momentum=BN_MOMENTUM) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.downsample = downsample | |||
| self.stride = stride | |||
| def forward(self, x): | |||
| residual = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.relu(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample is not None: | |||
| residual = self.downsample(x) | |||
| out += residual | |||
| out = self.relu(out) | |||
| return out | |||
| class PoseResNet(nn.Module): | |||
| def __init__(self, block, layers, heads, head_conv, **kwargs): | |||
| self.inplanes = 64 | |||
| self.deconv_with_bias = False | |||
| self.heads = heads | |||
| super(PoseResNet, self).__init__() | |||
| self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, | |||
| bias=False) | |||
| self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) | |||
| self.relu = nn.ReLU(inplace=True) | |||
| self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) | |||
| self.layer1 = self._make_layer(block, 64, layers[0]) | |||
| self.layer2 = self._make_layer(block, 128, layers[1], stride=2) | |||
| self.layer3 = self._make_layer(block, 256, layers[2], stride=2) | |||
| self.layer4 = self._make_layer(block, 512, layers[3], stride=2) | |||
| # used for deconv layers | |||
| self.deconv_layers = self._make_deconv_layer( | |||
| 3, | |||
| [256, 256, 256], | |||
| [4, 4, 4], | |||
| ) | |||
| # self.final_layer = [] | |||
| for head in sorted(self.heads): | |||
| num_output = self.heads[head] | |||
| if head_conv > 0: | |||
| fc = nn.Sequential( | |||
| nn.Conv2d(256, head_conv, | |||
| kernel_size=3, padding=1, bias=True), | |||
| nn.ReLU(inplace=True), | |||
| nn.Conv2d(head_conv, num_output, | |||
| kernel_size=1, stride=1, padding=0)) | |||
| else: | |||
| fc = nn.Conv2d( | |||
| in_channels=256, | |||
| out_channels=num_output, | |||
| kernel_size=1, | |||
| stride=1, | |||
| padding=0 | |||
| ) | |||
| self.__setattr__(head, fc) | |||
| # self.final_layer = nn.ModuleList(self.final_layer) | |||
| def _make_layer(self, block, planes, blocks, stride=1): | |||
| downsample = None | |||
| if stride != 1 or self.inplanes != planes * block.expansion: | |||
| downsample = nn.Sequential( | |||
| nn.Conv2d(self.inplanes, planes * block.expansion, | |||
| kernel_size=1, stride=stride, bias=False), | |||
| nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), | |||
| ) | |||
| layers = [] | |||
| layers.append(block(self.inplanes, planes, stride, downsample)) | |||
| self.inplanes = planes * block.expansion | |||
| for i in range(1, blocks): | |||
| layers.append(block(self.inplanes, planes)) | |||
| return nn.Sequential(*layers) | |||
| def _get_deconv_cfg(self, deconv_kernel, index): | |||
| if deconv_kernel == 4: | |||
| padding = 1 | |||
| output_padding = 0 | |||
| elif deconv_kernel == 3: | |||
| padding = 1 | |||
| output_padding = 1 | |||
| elif deconv_kernel == 2: | |||
| padding = 0 | |||
| output_padding = 0 | |||
| return deconv_kernel, padding, output_padding | |||
| def _make_deconv_layer(self, num_layers, num_filters, num_kernels): | |||
| assert num_layers == len(num_filters), \ | |||
| 'ERROR: num_deconv_layers is different len(num_deconv_filters)' | |||
| assert num_layers == len(num_kernels), \ | |||
| 'ERROR: num_deconv_layers is different len(num_deconv_filters)' | |||
| layers = [] | |||
| for i in range(num_layers): | |||
| kernel, padding, output_padding = \ | |||
| self._get_deconv_cfg(num_kernels[i], i) | |||
| planes = num_filters[i] | |||
| layers.append( | |||
| nn.ConvTranspose2d( | |||
| in_channels=self.inplanes, | |||
| out_channels=planes, | |||
| kernel_size=kernel, | |||
| stride=2, | |||
| padding=padding, | |||
| output_padding=output_padding, | |||
| bias=self.deconv_with_bias)) | |||
| layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) | |||
| layers.append(nn.ReLU(inplace=True)) | |||
| self.inplanes = planes | |||
| return nn.Sequential(*layers) | |||
| def forward(self, x): | |||
| x = self.conv1(x) | |||
| x = self.bn1(x) | |||
| x = self.relu(x) | |||
| x = self.maxpool(x) | |||
| x = self.layer1(x) | |||
| x = self.layer2(x) | |||
| x = self.layer3(x) | |||
| x = self.layer4(x) | |||
| x = self.deconv_layers(x) | |||
| ret = {} | |||
| for head in self.heads: | |||
| ret[head] = self.__getattr__(head)(x) | |||
| return ret | |||
| def init_weights(self, num_layers, pretrained=True): | |||
| if pretrained: | |||
| # print('=> init resnet deconv weights from normal distribution') | |||
| for _, m in self.deconv_layers.named_modules(): | |||
| if isinstance(m, nn.ConvTranspose2d): | |||
| # print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||
| # print('=> init {}.bias as 0'.format(name)) | |||
| nn.init.normal_(m.weight, std=0.001) | |||
| if self.deconv_with_bias: | |||
| nn.init.constant_(m.bias, 0) | |||
| elif isinstance(m, nn.BatchNorm2d): | |||
| # print('=> init {}.weight as 1'.format(name)) | |||
| # print('=> init {}.bias as 0'.format(name)) | |||
| nn.init.constant_(m.weight, 1) | |||
| nn.init.constant_(m.bias, 0) | |||
| # print('=> init final conv weights from normal distribution') | |||
| for head in self.heads: | |||
| final_layer = self.__getattr__(head) | |||
| for i, m in enumerate(final_layer.modules()): | |||
| if isinstance(m, nn.Conv2d): | |||
| # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |||
| # print('=> init {}.weight as normal(0, 0.001)'.format(name)) | |||
| # print('=> init {}.bias as 0'.format(name)) | |||
| if m.weight.shape[0] == self.heads[head]: | |||
| if 'hm' in head: | |||
| nn.init.constant_(m.bias, -2.19) | |||
| else: | |||
| nn.init.normal_(m.weight, std=0.001) | |||
| nn.init.constant_(m.bias, 0) | |||
| # pretrained_state_dict = torch.load(pretrained) | |||
| url = model_urls['resnet{}'.format(num_layers)] | |||
| pretrained_state_dict = model_zoo.load_url(url) | |||
| print('=> loading pretrained model {}'.format(url)) | |||
| self.load_state_dict(pretrained_state_dict, strict=False) | |||
| resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), | |||
| 34: (BasicBlock, [3, 4, 6, 3]), | |||
| 50: (Bottleneck, [3, 4, 6, 3]), | |||
| 101: (Bottleneck, [3, 4, 23, 3]), | |||
| 152: (Bottleneck, [3, 8, 36, 3])} | |||
| def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained): | |||
| block_class, layers = resnet_spec[num_layers] | |||
| model = PoseResNet(block_class, layers, heads, head_conv=head_conv) | |||
| model.init_weights(num_layers, pretrained=imagenet_pretrained) | |||
| return model | |||
| @@ -0,0 +1,290 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: This script for training | |||
| """ | |||
| import time | |||
| import numpy as np | |||
| import sys | |||
| import random | |||
| import os | |||
| import warnings | |||
| warnings.filterwarnings("ignore", category=UserWarning) | |||
| import torch | |||
| from torch.utils.tensorboard import SummaryWriter | |||
| import torch.distributed as dist | |||
| import torch.multiprocessing as mp | |||
| import torch.utils.data.distributed | |||
| from tqdm import tqdm | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from data_process.kitti_dataloader import create_train_dataloader, create_val_dataloader | |||
| from models.model_utils import create_model, make_data_parallel, get_num_parameters | |||
| from utils.train_utils import create_optimizer, create_lr_scheduler, get_saved_state, save_checkpoint | |||
| from utils.torch_utils import reduce_tensor, to_python_float | |||
| from utils.misc import AverageMeter, ProgressMeter | |||
| from utils.logger import Logger | |||
| from config.train_config import parse_train_configs | |||
| from losses.losses import Compute_Loss | |||
| def main(): | |||
| configs = parse_train_configs() | |||
| # Re-produce results | |||
| if configs.seed is not None: | |||
| random.seed(configs.seed) | |||
| np.random.seed(configs.seed) | |||
| torch.manual_seed(configs.seed) | |||
| torch.backends.cudnn.deterministic = True | |||
| torch.backends.cudnn.benchmark = False | |||
| if configs.gpu_idx is not None: | |||
| print('You have chosen a specific GPU. This will completely disable data parallelism.') | |||
| if configs.dist_url == "env://" and configs.world_size == -1: | |||
| configs.world_size = int(os.environ["WORLD_SIZE"]) | |||
| configs.distributed = configs.world_size > 1 or configs.multiprocessing_distributed | |||
| if configs.multiprocessing_distributed: | |||
| configs.world_size = configs.ngpus_per_node * configs.world_size | |||
| mp.spawn(main_worker, nprocs=configs.ngpus_per_node, args=(configs,)) | |||
| else: | |||
| main_worker(configs.gpu_idx, configs) | |||
| def main_worker(gpu_idx, configs): | |||
| configs.gpu_idx = gpu_idx | |||
| # configs.device = torch.device('cpu' if configs.gpu_idx is None else 'cuda:{}'.format(configs.gpu_idx)) | |||
| if configs.distributed: | |||
| if configs.dist_url == "env://" and configs.rank == -1: | |||
| configs.rank = int(os.environ["RANK"]) | |||
| if configs.multiprocessing_distributed: | |||
| # For multiprocessing distributed training, rank needs to be the | |||
| # global rank among all the processes | |||
| configs.rank = configs.rank * configs.ngpus_per_node + gpu_idx | |||
| dist.init_process_group(backend=configs.dist_backend, init_method=configs.dist_url, | |||
| world_size=configs.world_size, rank=configs.rank) | |||
| configs.subdivisions = int(64 / configs.batch_size / configs.ngpus_per_node) | |||
| else: | |||
| configs.subdivisions = int(64 / configs.batch_size) | |||
| configs.is_master_node = (not configs.distributed) or ( | |||
| configs.distributed and (configs.rank % configs.ngpus_per_node == 0)) | |||
| if configs.is_master_node: | |||
| logger = Logger(configs.logs_dir, configs.saved_fn) | |||
| logger.info('>>> Created a new logger') | |||
| logger.info('>>> configs: {}'.format(configs)) | |||
| tb_writer = SummaryWriter(log_dir=os.path.join(configs.logs_dir, 'tensorboard')) | |||
| else: | |||
| logger = None | |||
| tb_writer = None | |||
| # model | |||
| model = create_model(configs) | |||
| # load weight from a checkpoint | |||
| if configs.pretrained_path is not None: | |||
| # assert os.path.isfile(configs.pretrained_path), "=> no checkpoint found at '{}'".format(configs.pretrained_path) | |||
| if os.path.isfile(configs.pretrained_path): | |||
| model_path = configs.pretrained_path | |||
| else: | |||
| # 取最后一个模型 | |||
| model_path = os.path.join(configs.pretrained_path, os.listdir(configs.pretrained_path)[-1]) | |||
| model.load_state_dict(torch.load(model_path, map_location=configs.device)) | |||
| if logger is not None: | |||
| logger.info('loaded pretrained model at {}'.format(configs.pretrained_path)) | |||
| # resume weights of model from a checkpoint | |||
| if configs.resume_path is not None: | |||
| assert os.path.isfile(configs.resume_path), "=> no checkpoint found at '{}'".format(configs.resume_path) | |||
| model.load_state_dict(torch.load(configs.resume_path, map_location='cpu')) | |||
| if logger is not None: | |||
| logger.info('resume training model from checkpoint {}'.format(configs.resume_path)) | |||
| # Data Parallel | |||
| model = make_data_parallel(model, configs) | |||
| # Make sure to create optimizer after moving the model to cuda | |||
| optimizer = create_optimizer(configs, model) | |||
| lr_scheduler = create_lr_scheduler(optimizer, configs) | |||
| configs.step_lr_in_epoch = False if configs.lr_type in ['multi_step', 'cosin', 'one_cycle'] else True | |||
| # resume optimizer, lr_scheduler from a checkpoint | |||
| if configs.resume_path is not None: | |||
| utils_path = configs.resume_path.replace('Model_', 'Utils_') | |||
| assert os.path.isfile(utils_path), "=> no checkpoint found at '{}'".format(utils_path) | |||
| utils_state_dict = torch.load(utils_path, map_location='cuda:{}'.format(configs.gpu_idx)) | |||
| optimizer.load_state_dict(utils_state_dict['optimizer']) | |||
| lr_scheduler.load_state_dict(utils_state_dict['lr_scheduler']) | |||
| configs.start_epoch = utils_state_dict['epoch'] + 1 | |||
| if configs.is_master_node: | |||
| num_parameters = get_num_parameters(model) | |||
| logger.info('number of trained parameters of the model: {}'.format(num_parameters)) | |||
| if logger is not None: | |||
| logger.info(">>> Loading dataset & getting dataloader...") | |||
| # Create dataloader | |||
| train_dataloader, train_sampler = create_train_dataloader(configs) | |||
| if logger is not None: | |||
| logger.info('number of batches in training set: {}'.format(len(train_dataloader))) | |||
| if configs.evaluate: | |||
| val_dataloader = create_val_dataloader(configs) | |||
| val_loss = validate(val_dataloader, model, configs) | |||
| print('val_loss: {:.4e}'.format(val_loss)) | |||
| return | |||
| for epoch in range(configs.start_epoch, configs.num_epochs + 1): | |||
| if logger is not None: | |||
| logger.info('{}'.format('*-' * 40)) | |||
| logger.info('{} {}/{} {}'.format('=' * 35, epoch, configs.num_epochs, '=' * 35)) | |||
| logger.info('{}'.format('*-' * 40)) | |||
| logger.info('>>> Epoch: [{}/{}]'.format(epoch, configs.num_epochs)) | |||
| if configs.distributed: | |||
| train_sampler.set_epoch(epoch) | |||
| # train for one epoch | |||
| train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer) | |||
| if (not configs.no_val) and (epoch % configs.checkpoint_freq == 0): | |||
| val_dataloader = create_val_dataloader(configs) | |||
| print('number of batches in val_dataloader: {}'.format(len(val_dataloader))) | |||
| val_loss = validate(val_dataloader, model, configs) | |||
| print('val_loss: {:.4e}'.format(val_loss)) | |||
| if tb_writer is not None: | |||
| tb_writer.add_scalar('Val_loss', val_loss, epoch) | |||
| # Save checkpoint | |||
| if configs.is_master_node and ((epoch % configs.checkpoint_freq) == 0): | |||
| model_state_dict, utils_state_dict = get_saved_state(model, optimizer, lr_scheduler, epoch, configs) | |||
| save_checkpoint(configs.checkpoints_dir, configs.saved_fn, model_state_dict, utils_state_dict, epoch) | |||
| if not configs.step_lr_in_epoch: | |||
| lr_scheduler.step() | |||
| if tb_writer is not None: | |||
| tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], epoch) | |||
| if tb_writer is not None: | |||
| tb_writer.close() | |||
| if configs.distributed: | |||
| cleanup() | |||
| def cleanup(): | |||
| dist.destroy_process_group() | |||
| def train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer): | |||
| batch_time = AverageMeter('Time', ':6.3f') | |||
| data_time = AverageMeter('Data', ':6.3f') | |||
| losses = AverageMeter('Loss', ':.4e') | |||
| progress = ProgressMeter(len(train_dataloader), [batch_time, data_time, losses], | |||
| prefix="Train - Epoch: [{}/{}]".format(epoch, configs.num_epochs)) | |||
| criterion = Compute_Loss(device=configs.device) | |||
| num_iters_per_epoch = len(train_dataloader) | |||
| # switch to train mode | |||
| model.train() | |||
| start_time = time.time() | |||
| for batch_idx, batch_data in enumerate(tqdm(train_dataloader)): | |||
| data_time.update(time.time() - start_time) | |||
| imgs, targets = batch_data | |||
| batch_size = imgs.size(0) | |||
| global_step = num_iters_per_epoch * (epoch - 1) + batch_idx + 1 | |||
| for k in targets.keys(): | |||
| targets[k] = targets[k].to(configs.device, non_blocking=True) | |||
| imgs = imgs.to(configs.device, non_blocking=True).float() | |||
| outputs = model(imgs) | |||
| total_loss, loss_stats = criterion(outputs, targets) | |||
| # For torch.nn.DataParallel case | |||
| if (not configs.distributed) and (configs.gpu_idx is None): | |||
| total_loss = torch.mean(total_loss) | |||
| # compute gradient and perform backpropagation | |||
| total_loss.backward() | |||
| if global_step % configs.subdivisions == 0: | |||
| optimizer.step() | |||
| # zero the parameter gradients | |||
| optimizer.zero_grad() | |||
| # Adjust learning rate | |||
| if configs.step_lr_in_epoch: | |||
| lr_scheduler.step() | |||
| if tb_writer is not None: | |||
| tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], global_step) | |||
| if configs.distributed: | |||
| reduced_loss = reduce_tensor(total_loss.data, configs.world_size) | |||
| else: | |||
| reduced_loss = total_loss.data | |||
| losses.update(to_python_float(reduced_loss), batch_size) | |||
| # measure elapsed time | |||
| # torch.cuda.synchronize() | |||
| batch_time.update(time.time() - start_time) | |||
| if tb_writer is not None: | |||
| if (global_step % configs.tensorboard_freq) == 0: | |||
| loss_stats['avg_loss'] = losses.avg | |||
| tb_writer.add_scalars('Train', loss_stats, global_step) | |||
| # Log message | |||
| if logger is not None: | |||
| if (global_step % configs.print_freq) == 0: | |||
| logger.info(progress.get_message(batch_idx)) | |||
| start_time = time.time() | |||
| def validate(val_dataloader, model, configs): | |||
| losses = AverageMeter('Loss', ':.4e') | |||
| criterion = Compute_Loss(device=configs.device) | |||
| # switch to train mode | |||
| model.eval() | |||
| with torch.no_grad(): | |||
| for batch_idx, batch_data in enumerate(tqdm(val_dataloader)): | |||
| imgs, targets = batch_data | |||
| batch_size = imgs.size(0) | |||
| for k in targets.keys(): | |||
| targets[k] = targets[k].to(configs.device, non_blocking=True) | |||
| imgs = imgs.to(configs.device, non_blocking=True).float() | |||
| outputs = model(imgs) | |||
| total_loss, loss_stats = criterion(outputs, targets) | |||
| # For torch.nn.DataParallel case | |||
| if (not configs.distributed) and (configs.gpu_idx is None): | |||
| total_loss = torch.mean(total_loss) | |||
| if configs.distributed: | |||
| reduced_loss = reduce_tensor(total_loss.data, configs.world_size) | |||
| else: | |||
| reduced_loss = total_loss.data | |||
| losses.update(to_python_float(reduced_loss), batch_size) | |||
| return losses.avg | |||
| if __name__ == '__main__': | |||
| try: | |||
| main() | |||
| except KeyboardInterrupt: | |||
| try: | |||
| cleanup() | |||
| sys.exit(0) | |||
| except SystemExit: | |||
| os._exit(0) | |||
| @@ -0,0 +1,137 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: Demonstration utils script | |||
| """ | |||
| import argparse | |||
| import sys | |||
| import os | |||
| import warnings | |||
| import zipfile | |||
| warnings.filterwarnings("ignore", category=UserWarning) | |||
| from easydict import EasyDict as edict | |||
| import numpy as np | |||
| import wget | |||
| import torch | |||
| import cv2 | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from utils.misc import make_folder, time_synchronized | |||
| from utils.evaluation_utils import decode, post_processing | |||
| from utils.torch_utils import _sigmoid | |||
| def parse_demo_configs(): | |||
| parser = argparse.ArgumentParser(description='Demonstration config for the implementation') | |||
| parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN', | |||
| help='The name using for saving logs, models,...') | |||
| parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH', | |||
| help='The name of the model architecture') | |||
| parser.add_argument('--pretrained_path', type=str, | |||
| default='../checkpoints/fpn_resnet_18/fpn_resnet_18_epoch_300.pth', metavar='PATH', | |||
| help='the path of the pretrained checkpoint') | |||
| parser.add_argument('--foldername', type=str, default='2011_09_26_drive_0014_sync', metavar='FN', | |||
| help='Folder name for demostration dataset') | |||
| parser.add_argument('--K', type=int, default=50, | |||
| help='the number of top K') | |||
| parser.add_argument('--no_cuda', action='store_true', | |||
| help='If true, cuda is not used.') | |||
| parser.add_argument('--gpu_idx', default=0, type=int, | |||
| help='GPU index to use.') | |||
| parser.add_argument('--peak_thresh', type=float, default=0.2) | |||
| parser.add_argument('--output_format', type=str, default='image', metavar='PATH', | |||
| help='the type of the test output (support image or video)') | |||
| parser.add_argument('--output-width', type=int, default=608, | |||
| help='the width of showing output, the height maybe vary') | |||
| configs = edict(vars(parser.parse_args())) | |||
| configs.pin_memory = True | |||
| configs.distributed = False # For testing on 1 GPU only | |||
| configs.input_size = (608, 608) | |||
| configs.hm_size = (152, 152) | |||
| configs.down_ratio = 4 | |||
| configs.max_objects = 50 | |||
| configs.imagenet_pretrained = False | |||
| configs.head_conv = 64 | |||
| configs.num_classes = 3 | |||
| configs.num_center_offset = 2 | |||
| configs.num_z = 1 | |||
| configs.num_dim = 3 | |||
| configs.num_direction = 2 # sin, cos | |||
| configs.heads = { | |||
| 'hm_cen': configs.num_classes, | |||
| 'cen_offset': configs.num_center_offset, | |||
| 'direction': configs.num_direction, | |||
| 'z_coor': configs.num_z, | |||
| 'dim': configs.num_dim | |||
| } | |||
| #################################################################### | |||
| ##############Dataset, Checkpoints, and results dir configs######### | |||
| #################################################################### | |||
| configs.root_dir = '../' | |||
| configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo') | |||
| configs.calib_path = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo', 'calib.txt') | |||
| configs.results_dir = os.path.join(configs.root_dir, 'results', configs.saved_fn) | |||
| make_folder(configs.results_dir) | |||
| return configs | |||
| def download_and_unzip(demo_dataset_dir, download_url): | |||
| filename = download_url.split('/')[-1] | |||
| filepath = os.path.join(demo_dataset_dir, filename) | |||
| if os.path.isfile(filepath): | |||
| print('The dataset have been downloaded') | |||
| return | |||
| print('\nDownloading data for demonstration...') | |||
| wget.download(download_url, filepath) | |||
| print('\nUnzipping the downloaded data...') | |||
| with zipfile.ZipFile(filepath, "r") as zip_ref: | |||
| zip_ref.extractall(os.path.join(demo_dataset_dir, filename[:-4])) | |||
| def do_detect(configs, model, bevmap, is_front): | |||
| if not is_front: | |||
| bevmap = torch.flip(bevmap, [1, 2]) | |||
| input_bev_maps = bevmap.unsqueeze(0).to(configs.device, non_blocking=True).float() | |||
| t1 = time_synchronized() | |||
| outputs = model(input_bev_maps) | |||
| outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) | |||
| outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) | |||
| # detections size (batch_size, K, 10) | |||
| detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], | |||
| outputs['dim'], K=configs.K) | |||
| detections = detections.cpu().numpy().astype(np.float32) | |||
| detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) | |||
| t2 = time_synchronized() | |||
| # Inference speed | |||
| fps = 1 / (t2 - t1) | |||
| return detections[0], bevmap, fps | |||
| def write_credit(img, org_author=(500, 400), text_author='github.com/maudzung', org_fps=(50, 1000), fps=None): | |||
| font = cv2.FONT_HERSHEY_SIMPLEX | |||
| fontScale = 1 | |||
| color = (255, 255, 255) | |||
| thickness = 2 | |||
| cv2.putText(img, text_author, org_author, font, fontScale, color, thickness, cv2.LINE_AA) | |||
| cv2.putText(img, 'Speed: {:.1f} FPS'.format(fps), org_fps, font, fontScale, color, thickness, cv2.LINE_AA) | |||
| @@ -0,0 +1,183 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.17 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: The utils for evaluation | |||
| # Refer from: https://github.com/xingyizhou/CenterNet | |||
| """ | |||
| from __future__ import division | |||
| import os | |||
| import sys | |||
| import torch | |||
| import numpy as np | |||
| import torch.nn.functional as F | |||
| import cv2 | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| import config.kitti_config as cnf | |||
| from data_process.kitti_bev_utils import drawRotatedBox | |||
| def _nms(heat, kernel=3): | |||
| pad = (kernel - 1) // 2 | |||
| hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) | |||
| keep = (hmax == heat).float() | |||
| return heat * keep | |||
| def _gather_feat(feat, ind, mask=None): | |||
| dim = feat.size(2) | |||
| ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) | |||
| feat = feat.gather(1, ind) | |||
| if mask is not None: | |||
| mask = mask.unsqueeze(2).expand_as(feat) | |||
| feat = feat[mask] | |||
| feat = feat.view(-1, dim) | |||
| return feat | |||
| def _transpose_and_gather_feat(feat, ind): | |||
| feat = feat.permute(0, 2, 3, 1).contiguous() | |||
| feat = feat.view(feat.size(0), -1, feat.size(3)) | |||
| feat = _gather_feat(feat, ind) | |||
| return feat | |||
| def _topk(scores, K=40): | |||
| batch, cat, height, width = scores.size() | |||
| topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||
| topk_inds = topk_inds % (height * width) | |||
| topk_ys = (torch.floor_divide(topk_inds, width)).float() | |||
| topk_xs = (topk_inds % width).int().float() | |||
| topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) | |||
| topk_clses = (torch.floor_divide(topk_ind, K)).int() | |||
| topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) | |||
| topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) | |||
| topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) | |||
| return topk_score, topk_inds, topk_clses, topk_ys, topk_xs | |||
| def _topk_channel(scores, K=40): | |||
| batch, cat, height, width = scores.size() | |||
| topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) | |||
| topk_inds = topk_inds % (height * width) | |||
| topk_ys = (topk_inds / width).int().float() | |||
| topk_xs = (topk_inds % width).int().float() | |||
| return topk_scores, topk_inds, topk_ys, topk_xs | |||
| def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40): | |||
| batch_size, num_classes, height, width = hm_cen.size() | |||
| hm_cen = _nms(hm_cen) | |||
| scores, inds, clses, ys, xs = _topk(hm_cen, K=K) | |||
| if cen_offset is not None: | |||
| cen_offset = _transpose_and_gather_feat(cen_offset, inds) | |||
| cen_offset = cen_offset.view(batch_size, K, 2) | |||
| xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1] | |||
| ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2] | |||
| else: | |||
| xs = xs.view(batch_size, K, 1) + 0.5 | |||
| ys = ys.view(batch_size, K, 1) + 0.5 | |||
| direction = _transpose_and_gather_feat(direction, inds) | |||
| direction = direction.view(batch_size, K, 2) | |||
| z_coor = _transpose_and_gather_feat(z_coor, inds) | |||
| z_coor = z_coor.view(batch_size, K, 1) | |||
| dim = _transpose_and_gather_feat(dim, inds) | |||
| dim = dim.view(batch_size, K, 3) | |||
| clses = clses.view(batch_size, K, 1).float() | |||
| scores = scores.view(batch_size, K, 1) | |||
| # (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
| # (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
| # detections: [batch_size, K, 10] | |||
| detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2) | |||
| return detections | |||
| def get_yaw(direction): | |||
| return np.arctan2(direction[:, 0:1], direction[:, 1:2]) | |||
| def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2): | |||
| """ | |||
| :param detections: [batch_size, K, 10] | |||
| # (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1) | |||
| # (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10) | |||
| :return: | |||
| """ | |||
| # TODO: Need to consider rescale to the original scale: x, y | |||
| ret = [] | |||
| for i in range(detections.shape[0]): | |||
| top_preds = {} | |||
| classes = detections[i, :, -1] | |||
| for j in range(num_classes): | |||
| inds = (classes == j) | |||
| # x, y, z, h, w, l, yaw | |||
| top_preds[j] = np.concatenate([ | |||
| detections[i, inds, 0:1], | |||
| detections[i, inds, 1:2] * down_ratio, | |||
| detections[i, inds, 2:3] * down_ratio, | |||
| detections[i, inds, 3:4], | |||
| detections[i, inds, 4:5], | |||
| detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH, | |||
| detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT, | |||
| get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1) | |||
| # Filter by peak_thresh | |||
| if len(top_preds[j]) > 0: | |||
| keep_inds = (top_preds[j][:, 0] > peak_thresh) | |||
| top_preds[j] = top_preds[j][keep_inds] | |||
| ret.append(top_preds) | |||
| return ret | |||
| def draw_predictions(img, detections, num_classes=3): | |||
| for j in range(num_classes): | |||
| if len(detections[j]) > 0: | |||
| for det in detections[j]: | |||
| # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
| _score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
| drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)]) | |||
| return img | |||
| def convert_det_to_real_values(detections, num_classes=3): | |||
| kitti_dets = [] | |||
| for cls_id in range(num_classes): | |||
| if len(detections[cls_id]) > 0: | |||
| for det in detections[cls_id]: | |||
| # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8) | |||
| _score, _x, _y, _z, _h, _w, _l, _yaw = det | |||
| _yaw = round(-_yaw, 2) | |||
| x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2) | |||
| y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2) | |||
| z = round(_z + cnf.boundary['minZ'], 2) | |||
| w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2) | |||
| l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2) | |||
| h = round(_h/1, 2) | |||
| kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw]) | |||
| return np.array(kitti_dets) | |||
| @@ -0,0 +1,49 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.07.31 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: This script for logging | |||
| """ | |||
| import os | |||
| import logging | |||
| class Logger(): | |||
| """ | |||
| Create logger to save logs during training | |||
| Args: | |||
| logs_dir: | |||
| saved_fn: | |||
| Returns: | |||
| """ | |||
| def __init__(self, logs_dir, saved_fn): | |||
| logger_fn = 'logger_{}.txt'.format(saved_fn) | |||
| logger_path = os.path.join(logs_dir, logger_fn) | |||
| self.logger = logging.getLogger(__name__) | |||
| self.logger.setLevel(logging.INFO) | |||
| # formatter = logging.Formatter('%(asctime)s:File %(module)s.py:Func %(funcName)s:Line %(lineno)d:%(levelname)s: %(message)s') | |||
| formatter = logging.Formatter( | |||
| '%(asctime)s: %(module)s.py - %(funcName)s(), at Line %(lineno)d:%(levelname)s:\n%(message)s') | |||
| file_handler = logging.FileHandler(logger_path) | |||
| file_handler.setLevel(logging.INFO) | |||
| file_handler.setFormatter(formatter) | |||
| stream_handler = logging.StreamHandler() | |||
| stream_handler.setFormatter(formatter) | |||
| self.logger.addHandler(file_handler) | |||
| self.logger.addHandler(stream_handler) | |||
| def info(self, message): | |||
| self.logger.info(message) | |||
| @@ -0,0 +1,312 @@ | |||
| import torch | |||
| from torch.optim import SGD, lr_scheduler | |||
| import numpy as np | |||
| class _LRMomentumScheduler(lr_scheduler._LRScheduler): | |||
| def __init__(self, optimizer, last_epoch=-1): | |||
| if last_epoch == -1: | |||
| for group in optimizer.param_groups: | |||
| group.setdefault('initial_momentum', group['momentum']) | |||
| else: | |||
| for i, group in enumerate(optimizer.param_groups): | |||
| if 'initial_momentum' not in group: | |||
| raise KeyError("param 'initial_momentum' is not specified " | |||
| "in param_groups[{}] when resuming an optimizer".format(i)) | |||
| self.base_momentums = list(map(lambda group: group['initial_momentum'], optimizer.param_groups)) | |||
| super().__init__(optimizer, last_epoch) | |||
| def get_lr(self): | |||
| raise NotImplementedError | |||
| def get_momentum(self): | |||
| raise NotImplementedError | |||
| def step(self, epoch=None): | |||
| if epoch is None: | |||
| epoch = self.last_epoch + 1 | |||
| self.last_epoch = epoch | |||
| for param_group, lr, momentum in zip(self.optimizer.param_groups, self.get_lr(), self.get_momentum()): | |||
| param_group['lr'] = lr | |||
| param_group['momentum'] = momentum | |||
| class ParameterUpdate(object): | |||
| """A callable class used to define an arbitrary schedule defined by a list. | |||
| This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply | |||
| the given schedule. | |||
| Arguments: | |||
| params {list or numpy.array} -- List or numpy array defining parameter schedule. | |||
| base_param {float} -- Parameter value used to initialize the optimizer. | |||
| """ | |||
| def __init__(self, params, base_param): | |||
| self.params = np.hstack([params, 0]) | |||
| self.base_param = base_param | |||
| def __call__(self, epoch): | |||
| return self.params[epoch] / self.base_param | |||
| def apply_lambda(last_epoch, bases, lambdas): | |||
| return [base * lmbda(last_epoch) for lmbda, base in zip(lambdas, bases)] | |||
| class LambdaScheduler(_LRMomentumScheduler): | |||
| """Sets the learning rate and momentum of each parameter group to the initial lr and momentum | |||
| times a given function. When last_epoch=-1, sets initial lr and momentum to the optimizer | |||
| values. | |||
| Args: | |||
| optimizer (Optimizer): Wrapped optimizer. | |||
| lr_lambda (function or list): A function which computes a multiplicative | |||
| factor given an integer parameter epoch, or a list of such | |||
| functions, one for each group in optimizer.param_groups. | |||
| Default: lambda x:x. | |||
| momentum_lambda (function or list): As for lr_lambda but applied to momentum. | |||
| Default: lambda x:x. | |||
| last_epoch (int): The index of last epoch. Default: -1. | |||
| Example: | |||
| >>> # Assuming optimizer has two groups. | |||
| >>> lr_lambda = [ | |||
| ... lambda epoch: epoch // 30, | |||
| ... lambda epoch: 0.95 ** epoch | |||
| ... ] | |||
| >>> mom_lambda = [ | |||
| ... lambda epoch: max(0, (50 - epoch) // 50), | |||
| ... lambda epoch: 0.99 ** epoch | |||
| ... ] | |||
| >>> scheduler = LambdaScheduler(optimizer, lr_lambda, mom_lambda) | |||
| >>> for epoch in range(100): | |||
| >>> train(...) | |||
| >>> validate(...) | |||
| >>> scheduler.step() | |||
| """ | |||
| def __init__(self, optimizer, lr_lambda=lambda x: x, momentum_lambda=lambda x: x, last_epoch=-1): | |||
| self.optimizer = optimizer | |||
| if not isinstance(lr_lambda, (list, tuple)): | |||
| self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups) | |||
| else: | |||
| if len(lr_lambda) != len(optimizer.param_groups): | |||
| raise ValueError("Expected {} lr_lambdas, but got {}".format( | |||
| len(optimizer.param_groups), len(lr_lambda))) | |||
| self.lr_lambdas = list(lr_lambda) | |||
| if not isinstance(momentum_lambda, (list, tuple)): | |||
| self.momentum_lambdas = [momentum_lambda] * len(optimizer.param_groups) | |||
| else: | |||
| if len(momentum_lambda) != len(optimizer.param_groups): | |||
| raise ValueError("Expected {} momentum_lambdas, but got {}".format( | |||
| len(optimizer.param_groups), len(momentum_lambda))) | |||
| self.momentum_lambdas = list(momentum_lambda) | |||
| self.last_epoch = last_epoch | |||
| super().__init__(optimizer, last_epoch) | |||
| def state_dict(self): | |||
| """Returns the state of the scheduler as a :class:`dict`. | |||
| It contains an entry for every variable in self.__dict__ which | |||
| is not the optimizer. | |||
| The learning rate and momentum lambda functions will only be saved if they are | |||
| callable objects and not if they are functions or lambdas. | |||
| """ | |||
| state_dict = {key: value for key, value in self.__dict__.items() | |||
| if key not in ('optimizer', 'lr_lambdas', 'momentum_lambdas')} | |||
| state_dict['lr_lambdas'] = [None] * len(self.lr_lambdas) | |||
| state_dict['momentum_lambdas'] = [None] * len(self.momentum_lambdas) | |||
| for idx, (lr_fn, mom_fn) in enumerate(zip(self.lr_lambdas, self.momentum_lambdas)): | |||
| if not isinstance(lr_fn, types.FunctionType): | |||
| state_dict['lr_lambdas'][idx] = lr_fn.__dict__.copy() | |||
| if not isinstance(mom_fn, types.FunctionType): | |||
| state_dict['momentum_lambdas'][idx] = mom_fn.__dict__.copy() | |||
| return state_dict | |||
| def load_state_dict(self, state_dict): | |||
| """Loads the schedulers state. | |||
| Arguments: | |||
| state_dict (dict): scheduler state. Should be an object returned | |||
| from a call to :meth:`state_dict`. | |||
| """ | |||
| lr_lambdas = state_dict.pop('lr_lambdas') | |||
| momentum_lambdas = state_dict.pop('momentum_lambdas') | |||
| self.__dict__.update(state_dict) | |||
| for idx, fn in enumerate(lr_lambdas): | |||
| if fn is not None: | |||
| self.lr_lambdas[idx].__dict__.update(fn) | |||
| for idx, fn in enumerate(momentum_lambdas): | |||
| if fn is not None: | |||
| self.momentum_lambdas[idx].__dict__.update(fn) | |||
| def get_lr(self): | |||
| return apply_lambda(self.last_epoch, self.base_lrs, self.lr_lambdas) | |||
| def get_momentum(self): | |||
| return apply_lambda(self.last_epoch, self.base_momentums, self.momentum_lambdas) | |||
| class ParameterUpdate(object): | |||
| """A callable class used to define an arbitrary schedule defined by a list. | |||
| This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply | |||
| the given schedule. If a base_param is zero, no updates are applied. | |||
| Arguments: | |||
| params {list or numpy.array} -- List or numpy array defining parameter schedule. | |||
| base_param {float} -- Parameter value used to initialize the optimizer. | |||
| """ | |||
| def __init__(self, params, base_param): | |||
| self.params = np.hstack([params, 0]) | |||
| self.base_param = base_param | |||
| if base_param < 1e-12: | |||
| self.base_param = 1 | |||
| self.params = self.params * 0.0 + 1.0 | |||
| def __call__(self, epoch): | |||
| return self.params[epoch] / self.base_param | |||
| class ListScheduler(LambdaScheduler): | |||
| """Sets the learning rate and momentum of each parameter group to values defined by lists. | |||
| When last_epoch=-1, sets initial lr and momentum to the optimizer values. One of both of lr | |||
| and momentum schedules may be specified. | |||
| Note that the parameters used to initialize the optimizer are overriden by those defined by | |||
| this scheduler. | |||
| Args: | |||
| optimizer (Optimizer): Wrapped optimizer. | |||
| lrs (list or numpy.ndarray): A list of learning rates, or a list of lists, one for each | |||
| parameter group. One- or two-dimensional numpy arrays may also be passed. | |||
| momentum (list or numpy.ndarray): A list of momentums, or a list of lists, one for each | |||
| parameter group. One- or two-dimensional numpy arrays may also be passed. | |||
| last_epoch (int): The index of last epoch. Default: -1. | |||
| Example: | |||
| >>> # Assuming optimizer has two groups. | |||
| >>> lrs = [ | |||
| ... np.linspace(0.01, 0.1, 100), | |||
| ... np.logspace(-2, 0, 100) | |||
| ... ] | |||
| >>> momentums = [ | |||
| ... np.linspace(0.85, 0.95, 100), | |||
| ... np.linspace(0.8, 0.99, 100) | |||
| ... ] | |||
| >>> scheduler = ListScheduler(optimizer, lrs, momentums) | |||
| >>> for epoch in range(100): | |||
| >>> train(...) | |||
| >>> validate(...) | |||
| >>> scheduler.step() | |||
| """ | |||
| def __init__(self, optimizer, lrs=None, momentums=None, last_epoch=-1): | |||
| groups = optimizer.param_groups | |||
| if lrs is None: | |||
| lr_lambda = lambda x: x | |||
| else: | |||
| lrs = np.array(lrs) if isinstance(lrs, (list, tuple)) else lrs | |||
| if len(lrs.shape) == 1: | |||
| lr_lambda = [ParameterUpdate(lrs, g['lr']) for g in groups] | |||
| else: | |||
| lr_lambda = [ParameterUpdate(l, g['lr']) for l, g in zip(lrs, groups)] | |||
| if momentums is None: | |||
| momentum_lambda = lambda x: x | |||
| else: | |||
| momentums = np.array(momentums) if isinstance(momentums, (list, tuple)) else momentums | |||
| if len(momentums.shape) == 1: | |||
| momentum_lambda = [ParameterUpdate(momentums, g['momentum']) for g in groups] | |||
| else: | |||
| momentum_lambda = [ParameterUpdate(l, g['momentum']) for l, g in zip(momentums, groups)] | |||
| super().__init__(optimizer, lr_lambda, momentum_lambda) | |||
| class RangeFinder(ListScheduler): | |||
| """Scheduler class that implements the LR range search specified in: | |||
| A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch | |||
| size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820. | |||
| Logarithmically spaced learning rates from 1e-7 to 1 are searched. The number of increments in | |||
| that range is determined by 'epochs'. | |||
| Note that the parameters used to initialize the optimizer are overriden by those defined by | |||
| this scheduler. | |||
| Args: | |||
| optimizer (Optimizer): Wrapped optimizer. | |||
| epochs (int): Number of epochs over which to run test. | |||
| Example: | |||
| >>> scheduler = RangeFinder(optimizer, 100) | |||
| >>> for epoch in range(100): | |||
| >>> train(...) | |||
| >>> validate(...) | |||
| >>> scheduler.step() | |||
| """ | |||
| def __init__(self, optimizer, epochs): | |||
| lrs = np.logspace(-7, 0, epochs) | |||
| super().__init__(optimizer, lrs) | |||
| class OneCyclePolicy(ListScheduler): | |||
| """Scheduler class that implements the 1cycle policy search specified in: | |||
| A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch | |||
| size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820. | |||
| Args: | |||
| optimizer (Optimizer): Wrapped optimizer. | |||
| lr (float or list). Maximum learning rate in range. If a list of values is passed, they | |||
| should correspond to parameter groups. | |||
| epochs (int): The number of epochs to use during search. | |||
| momentum_rng (list). Optional upper and lower momentum values (may be both equal). Set to | |||
| None to run without momentum. Default: [0.85, 0.95]. If a list of lists is passed, they | |||
| should correspond to parameter groups. | |||
| phase_ratio (float): Fraction of epochs used for the increasing and decreasing phase of | |||
| the schedule. For example, if phase_ratio=0.45 and epochs=100, the learning rate will | |||
| increase from lr/10 to lr over 45 epochs, then decrease back to lr/10 over 45 epochs, | |||
| then decrease to lr/100 over the remaining 10 epochs. Default: 0.45. | |||
| """ | |||
| def __init__(self, optimizer, lr, epochs, momentum_rng=[0.85, 0.95], phase_ratio=0.45): | |||
| phase_epochs = int(phase_ratio * epochs) | |||
| if isinstance(lr, (list, tuple)): | |||
| lrs = [ | |||
| np.hstack([ | |||
| np.linspace(l * 1e-1, l, phase_epochs), | |||
| np.linspace(l, l * 1e-1, phase_epochs), | |||
| np.linspace(l * 1e-1, l * 1e-2, epochs - 2 * phase_epochs), | |||
| ]) for l in lr | |||
| ] | |||
| else: | |||
| lrs = np.hstack([ | |||
| np.linspace(lr * 1e-1, lr, phase_epochs), | |||
| np.linspace(lr, lr * 1e-1, phase_epochs), | |||
| np.linspace(lr * 1e-1, lr * 1e-2, epochs - 2 * phase_epochs), | |||
| ]) | |||
| if momentum_rng is not None: | |||
| momentum_rng = np.array(momentum_rng) | |||
| if len(momentum_rng.shape) == 2: | |||
| for i, g in enumerate(optimizer.param_groups): | |||
| g['momentum'] = momentum_rng[i][1] | |||
| momentums = [ | |||
| np.hstack([ | |||
| np.linspace(m[1], m[0], phase_epochs), | |||
| np.linspace(m[0], m[1], phase_epochs), | |||
| np.linspace(m[1], m[1], epochs - 2 * phase_epochs), | |||
| ]) for m in momentum_rng | |||
| ] | |||
| else: | |||
| for i, g in enumerate(optimizer.param_groups): | |||
| g['momentum'] = momentum_rng[1] | |||
| momentums = np.hstack([ | |||
| np.linspace(momentum_rng[1], momentum_rng[0], phase_epochs), | |||
| np.linspace(momentum_rng[0], momentum_rng[1], phase_epochs), | |||
| np.linspace(momentum_rng[1], momentum_rng[1], epochs - 2 * phase_epochs), | |||
| ]) | |||
| else: | |||
| momentums = None | |||
| super().__init__(optimizer, lrs, momentums) | |||
| @@ -0,0 +1,71 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.07.31 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: This script for logging | |||
| """ | |||
| import os | |||
| import torch | |||
| import time | |||
| def make_folder(folder_name): | |||
| if not os.path.exists(folder_name): | |||
| os.makedirs(folder_name) | |||
| # or os.makedirs(folder_name, exist_ok=True) | |||
| class AverageMeter(object): | |||
| """Computes and stores the average and current value""" | |||
| def __init__(self, name, fmt=':f'): | |||
| self.name = name | |||
| self.fmt = fmt | |||
| self.reset() | |||
| def reset(self): | |||
| self.val = 0 | |||
| self.avg = 0 | |||
| self.sum = 0 | |||
| self.count = 0 | |||
| def update(self, val, n=1): | |||
| self.val = val | |||
| self.sum += val * n | |||
| self.count += n | |||
| self.avg = self.sum / self.count | |||
| def __str__(self): | |||
| fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' | |||
| return fmtstr.format(**self.__dict__) | |||
| class ProgressMeter(object): | |||
| def __init__(self, num_batches, meters, prefix=""): | |||
| self.batch_fmtstr = self._get_batch_fmtstr(num_batches) | |||
| self.meters = meters | |||
| self.prefix = prefix | |||
| def display(self, batch): | |||
| entries = [self.prefix + self.batch_fmtstr.format(batch)] | |||
| entries += [str(meter) for meter in self.meters] | |||
| print('\t'.join(entries)) | |||
| def get_message(self, batch): | |||
| entries = [self.prefix + self.batch_fmtstr.format(batch)] | |||
| entries += [str(meter) for meter in self.meters] | |||
| return '\t'.join(entries) | |||
| def _get_batch_fmtstr(self, num_batches): | |||
| num_digits = len(str(num_batches // 1)) | |||
| fmt = '{:' + str(num_digits) + 'd}' | |||
| return '[' + fmt + '/' + fmt.format(num_batches) + ']' | |||
| def time_synchronized(): | |||
| torch.cuda.synchronize() if torch.cuda.is_available() else None | |||
| return time.time() | |||
| @@ -0,0 +1,45 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.09 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: some utilities of torch (conversion) | |||
| ----------------------------------------------------------------------------------- | |||
| """ | |||
| import torch | |||
| import torch.distributed as dist | |||
| __all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu', 'reduce_tensor', 'to_python_float', '_sigmoid'] | |||
| def convert2cpu(gpu_matrix): | |||
| return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) | |||
| def convert2cpu_long(gpu_matrix): | |||
| return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) | |||
| def to_cpu(tensor): | |||
| return tensor.detach().cpu() | |||
| def reduce_tensor(tensor, world_size): | |||
| rt = tensor.clone() | |||
| dist.all_reduce(rt, op=dist.reduce_op.SUM) | |||
| rt /= world_size | |||
| return rt | |||
| def to_python_float(t): | |||
| if hasattr(t, 'item'): | |||
| return t.item() | |||
| else: | |||
| return t[0] | |||
| def _sigmoid(x): | |||
| return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) | |||
| @@ -0,0 +1,140 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.09 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: utils functions that use for training process | |||
| """ | |||
| import copy | |||
| import os | |||
| import math | |||
| import sys | |||
| import torch | |||
| from torch.optim.lr_scheduler import LambdaLR | |||
| import matplotlib.pyplot as plt | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| from utils.lr_scheduler import OneCyclePolicy | |||
| def create_optimizer(configs, model): | |||
| """Create optimizer for training process | |||
| """ | |||
| if hasattr(model, 'module'): | |||
| train_params = [param for param in model.module.parameters() if param.requires_grad] | |||
| else: | |||
| train_params = [param for param in model.parameters() if param.requires_grad] | |||
| if configs.optimizer_type == 'sgd': | |||
| optimizer = torch.optim.SGD(train_params, lr=configs.lr, momentum=configs.momentum, nesterov=True) | |||
| elif configs.optimizer_type == 'adam': | |||
| optimizer = torch.optim.Adam(train_params, lr=configs.lr, weight_decay=configs.weight_decay) | |||
| else: | |||
| assert False, "Unknown optimizer type" | |||
| return optimizer | |||
| def create_lr_scheduler(optimizer, configs): | |||
| """Create learning rate scheduler for training process""" | |||
| if configs.lr_type == 'multi_step': | |||
| def multi_step_scheduler(i): | |||
| if i < configs.steps[0]: | |||
| factor = 1. | |||
| elif i < configs.steps[1]: | |||
| factor = 0.1 | |||
| else: | |||
| factor = 0.01 | |||
| return factor | |||
| lr_scheduler = LambdaLR(optimizer, multi_step_scheduler) | |||
| elif configs.lr_type == 'cosin': | |||
| # Scheduler https://arxiv.org/pdf/1812.01187.pdf | |||
| lf = lambda x: (((1 + math.cos(x * math.pi / configs.num_epochs)) / 2) ** 1.0) * 0.9 + 0.1 # cosine | |||
| lr_scheduler = LambdaLR(optimizer, lr_lambda=lf) | |||
| elif configs.lr_type == 'one_cycle': | |||
| lr_scheduler = OneCyclePolicy(optimizer, configs.lr, configs.num_epochs, momentum_rng=[0.85, 0.95], | |||
| phase_ratio=0.45) | |||
| else: | |||
| raise ValueError | |||
| plot_lr_scheduler(optimizer, lr_scheduler, configs.num_epochs, save_dir=configs.logs_dir, lr_type=configs.lr_type) | |||
| return lr_scheduler | |||
| def get_saved_state(model, optimizer, lr_scheduler, epoch, configs): | |||
| """Get the information to save with checkpoints""" | |||
| if hasattr(model, 'module'): | |||
| model_state_dict = model.module.state_dict() | |||
| else: | |||
| model_state_dict = model.state_dict() | |||
| utils_state_dict = { | |||
| 'epoch': epoch, | |||
| 'configs': configs, | |||
| 'optimizer': copy.deepcopy(optimizer.state_dict()), | |||
| 'lr_scheduler': copy.deepcopy(lr_scheduler.state_dict()) | |||
| } | |||
| return model_state_dict, utils_state_dict | |||
| def save_checkpoint(checkpoints_dir, saved_fn, model_state_dict, utils_state_dict, epoch): | |||
| """Save checkpoint every epoch only is best model or after every checkpoint_freq epoch""" | |||
| model_save_path = os.path.join(checkpoints_dir, 'Model_{}_epoch_{}.pth'.format(saved_fn, epoch)) | |||
| utils_save_path = os.path.join(checkpoints_dir, 'Utils_{}_epoch_{}.pth'.format(saved_fn, epoch)) | |||
| torch.save(model_state_dict, model_save_path) | |||
| torch.save(utils_state_dict, utils_save_path) | |||
| print('save a checkpoint at {}'.format(model_save_path)) | |||
| def plot_lr_scheduler(optimizer, scheduler, num_epochs=300, save_dir='', lr_type=''): | |||
| # Plot LR simulating training for full num_epochs | |||
| optimizer, scheduler = copy.copy(optimizer), copy.copy(scheduler) # do not modify originals | |||
| y = [] | |||
| for _ in range(num_epochs): | |||
| scheduler.step() | |||
| y.append(optimizer.param_groups[0]['lr']) | |||
| plt.plot(y, '.-', label='LR') | |||
| plt.xlabel('epoch') | |||
| plt.ylabel('LR') | |||
| plt.grid() | |||
| plt.xlim(0, num_epochs) | |||
| plt.ylim(0) | |||
| plt.tight_layout() | |||
| plt.savefig(os.path.join(save_dir, 'LR_{}.png'.format(lr_type)), dpi=200) | |||
| if __name__ == '__main__': | |||
| from easydict import EasyDict as edict | |||
| from torchvision.models import resnet18 | |||
| configs = edict() | |||
| configs.steps = [150, 180] | |||
| configs.lr_type = 'one_cycle' # multi_step, cosin, one_csycle | |||
| configs.logs_dir = '../../logs/' | |||
| configs.num_epochs = 50 | |||
| configs.lr = 2.25e-3 | |||
| net = resnet18() | |||
| optimizer = torch.optim.Adam(net.parameters(), 0.0002) | |||
| # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 6, 9], gamma=0.1) | |||
| # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.1) | |||
| scheduler = create_lr_scheduler(optimizer, configs) | |||
| for i in range(configs.num_epochs): | |||
| print(i, scheduler.get_lr()) | |||
| scheduler.step() | |||
| @@ -0,0 +1,154 @@ | |||
| """ | |||
| # -*- coding: utf-8 -*- | |||
| ----------------------------------------------------------------------------------- | |||
| # Author: Nguyen Mau Dung | |||
| # DoC: 2020.08.09 | |||
| # email: nguyenmaudung93.kstn@gmail.com | |||
| ----------------------------------------------------------------------------------- | |||
| # Description: The utils of the kitti dataset | |||
| """ | |||
| from __future__ import print_function | |||
| import os | |||
| import sys | |||
| import numpy as np | |||
| import cv2 | |||
| src_dir = os.path.dirname(os.path.realpath(__file__)) | |||
| # while not src_dir.endswith("sfa"): | |||
| # src_dir = os.path.dirname(src_dir) | |||
| if src_dir not in sys.path: | |||
| sys.path.append(src_dir) | |||
| import config.kitti_config as cnf | |||
| def roty(angle): | |||
| # Rotation about the y-axis. | |||
| c = np.cos(angle) | |||
| s = np.sin(angle) | |||
| return np.array([[c, 0, s], | |||
| [0, 1, 0], | |||
| [-s, 0, c]]) | |||
| def compute_box_3d(dim, location, ry): | |||
| # dim: 3 | |||
| # location: 3 | |||
| # ry: 1 | |||
| # return: 8 x 3 | |||
| R = roty(ry) | |||
| h, w, l = dim | |||
| x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] | |||
| y_corners = [0, 0, 0, 0, -h, -h, -h, -h] | |||
| z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] | |||
| corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) | |||
| corners_3d = np.dot(R, corners) | |||
| corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1) | |||
| return corners_3d.transpose(1, 0) | |||
| def project_to_image(pts_3d, P): | |||
| # pts_3d: n x 3 | |||
| # P: 3 x 4 | |||
| # return: n x 2 | |||
| pts_3d_homo = np.concatenate([pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) | |||
| pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) | |||
| pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] | |||
| return pts_2d.astype(np.int) | |||
| def draw_box_3d_v2(image, qs, color=(255, 0, 255), thickness=2): | |||
| ''' Draw 3d bounding box in image | |||
| qs: (8,3) array of vertices for the 3d box in following order: | |||
| 1 -------- 0 | |||
| /| /| | |||
| 2 -------- 3 . | |||
| | | | | | |||
| . 5 -------- 4 | |||
| |/ |/ | |||
| 6 -------- 7 | |||
| ''' | |||
| qs = qs.astype(np.int32) | |||
| for k in range(0, 4): | |||
| # Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html | |||
| i, j = k, (k + 1) % 4 | |||
| # use LINE_AA for opencv3 | |||
| cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||
| i, j = k + 4, (k + 1) % 4 + 4 | |||
| cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||
| i, j = k, k + 4 | |||
| cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness) | |||
| return image | |||
| def draw_box_3d(image, corners, color=(0, 0, 255)): | |||
| ''' Draw 3d bounding box in image | |||
| corners: (8,3) array of vertices for the 3d box in following order: | |||
| 1 -------- 0 | |||
| /| /| | |||
| 2 -------- 3 . | |||
| | | | | | |||
| . 5 -------- 4 | |||
| |/ |/ | |||
| 6 -------- 7 | |||
| ''' | |||
| face_idx = [[0, 1, 5, 4], | |||
| [1, 2, 6, 5], | |||
| [2, 3, 7, 6], | |||
| [3, 0, 4, 7]] | |||
| for ind_f in range(3, -1, -1): | |||
| f = face_idx[ind_f] | |||
| for j in range(4): | |||
| cv2.line(image, (corners[f[j], 0], corners[f[j], 1]), | |||
| (corners[f[(j + 1) % 4], 0], corners[f[(j + 1) % 4], 1]), color, 2, lineType=cv2.LINE_AA) | |||
| if ind_f == 0: | |||
| cv2.line(image, (corners[f[0], 0], corners[f[0], 1]), | |||
| (corners[f[2], 0], corners[f[2], 1]), color, 1, lineType=cv2.LINE_AA) | |||
| cv2.line(image, (corners[f[1], 0], corners[f[1], 1]), | |||
| (corners[f[3], 0], corners[f[3], 1]), color, 1, lineType=cv2.LINE_AA) | |||
| return image | |||
| def show_rgb_image_with_boxes(img, labels, calib): | |||
| for box_idx, label in enumerate(labels): | |||
| cls_id, location, dim, ry = label[0], label[1:4], label[4:7], label[7] | |||
| if location[2] < 2.0: # The object is too close to the camera, ignore it during visualization | |||
| continue | |||
| if cls_id < 0: | |||
| continue | |||
| corners_3d = compute_box_3d(dim, location, ry) | |||
| corners_2d = project_to_image(corners_3d, calib.P2) | |||
| img = draw_box_3d(img, corners_2d, color=cnf.colors[int(cls_id)]) | |||
| return img | |||
| def merge_rgb_to_bev(img_rgb, img_bev, output_width): | |||
| img_rgb_h, img_rgb_w = img_rgb.shape[:2] | |||
| ratio_rgb = output_width / img_rgb_w | |||
| output_rgb_h = int(ratio_rgb * img_rgb_h) | |||
| ret_img_rgb = cv2.resize(img_rgb, (output_width, output_rgb_h)) | |||
| img_bev_h, img_bev_w = img_bev.shape[:2] | |||
| ratio_bev = output_width / img_bev_w | |||
| output_bev_h = int(ratio_bev * img_bev_h) | |||
| ret_img_bev = cv2.resize(img_bev, (output_width, output_bev_h)) | |||
| out_img = np.zeros((output_rgb_h + output_bev_h, output_width, 3), dtype=np.uint8) | |||
| # Upper: RGB --> BEV | |||
| out_img[:output_rgb_h, ...] = ret_img_rgb | |||
| out_img[output_rgb_h:, ...] = ret_img_bev | |||
| return out_img | |||