diff --git a/point-cloud b/point-cloud
deleted file mode 160000
index e4c429e..0000000
--- a/point-cloud
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit e4c429e813608acbcf487656abe2eb87dcc4636c
diff --git a/point-cloud/.DS_Store b/point-cloud/.DS_Store
new file mode 100644
index 0000000..2ab131c
Binary files /dev/null and b/point-cloud/.DS_Store differ
diff --git a/point-cloud/.gitignore b/point-cloud/.gitignore
new file mode 100644
index 0000000..5a80274
--- /dev/null
+++ b/point-cloud/.gitignore
@@ -0,0 +1,9 @@
+dataset
+# cache 
+__pycache__
+
+# results
+results
+
+# logs
+logs
\ No newline at end of file
diff --git a/point-cloud/.idea/deployment.xml b/point-cloud/.idea/deployment.xml
new file mode 100644
index 0000000..a28531d
--- /dev/null
+++ b/point-cloud/.idea/deployment.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData">
+    <serverData>
+      <paths name="root@10.5.24.134:10000">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+  </component>
+</project>
\ No newline at end of file
diff --git a/point-cloud/.idea/inspectionProfiles/profiles_settings.xml b/point-cloud/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..20fc29e
--- /dev/null
+++ b/point-cloud/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/point-cloud/.idea/misc.xml b/point-cloud/.idea/misc.xml
new file mode 100644
index 0000000..cbb5b0e
--- /dev/null
+++ b/point-cloud/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="JavaScriptSettings">
+    <option name="languageLevel" value="ES6" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/point-cloud/.idea/modules.xml b/point-cloud/.idea/modules.xml
new file mode 100644
index 0000000..34b8176
--- /dev/null
+++ b/point-cloud/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/sfa3d.iml" filepath="$PROJECT_DIR$/.idea/sfa3d.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/point-cloud/.idea/sfa3d.iml b/point-cloud/.idea/sfa3d.iml
new file mode 100644
index 0000000..2946dc0
--- /dev/null
+++ b/point-cloud/.idea/sfa3d.iml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/point-cloud/.idea/vcs.xml b/point-cloud/.idea/vcs.xml
new file mode 100644
index 0000000..9661ac7
--- /dev/null
+++ b/point-cloud/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/point-cloud/.idea/workspace.xml b/point-cloud/.idea/workspace.xml
new file mode 100644
index 0000000..80ba82a
--- /dev/null
+++ b/point-cloud/.idea/workspace.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_BRANCH_BY_REPOSITORY">
+      <map>
+        <entry key="$PROJECT_DIR$" value="master" />
+      </map>
+    </option>
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="ProjectId" id="2E4AHz6idZOBGHdHApv98dU5PkK" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent">
+    <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
+    <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="ba6cd492-6d49-41a8-a764-504006f2eb9a" name="Changes" comment="" />
+      <created>1661844398596</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1661844398596</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="Vcs.Log.Tabs.Properties">
+    <option name="TAB_STATES">
+      <map>
+        <entry key="MAIN">
+          <value>
+            <State />
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+</project>
\ No newline at end of file
diff --git a/point-cloud/LICENSE b/point-cloud/LICENSE
new file mode 100644
index 0000000..f35fbf5
--- /dev/null
+++ b/point-cloud/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Nguyen Mau Dung
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/point-cloud/README.md b/point-cloud/README.md
new file mode 100644
index 0000000..1bd2ab2
--- /dev/null
+++ b/point-cloud/README.md
@@ -0,0 +1,116 @@
+# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds
+
+[![python-image]][python-url]
+[![pytorch-image]][pytorch-url]
+
+---
+
+## 1. Getting Started
+### 1.1 Requirement
+
+The instructions for setting up a virtual environment is [here](https://github.com/maudzung/virtual_environment_python3).
+
+```shell script
+cd SFA3D/
+pip install -r requirements.txt
+```
+
+### 1.2 Data Preparation
+Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d).
+
+The downloaded data includes:
+
+- Velodyne point clouds _**(29 GB)**_
+- Training labels of object data set _**(5 MB)**_
+
+
+
+Please make sure that you construct the source code & dataset directories structure as below.
+
+## 2. How to run
+
+
+### 2.1 Inference
+
+The pre-trained model was pushed to this repo.
+- **CPU**
+```
+python inference.py --no_cuda=True
+```
+- **GPU**
+```
+python inference.py
+```
+Label of inference
+
+- Pedestrian
+- Car
+- Cyclist
+
+### 2.2 Training
+#### 2.2.1 CPU
+```
+python train.py --no_cuda=True
+```
+
+#### 2.2.2 Single machine, single gpu
+
+```shell script
+python train.py --gpu_idx 0
+```
+
+#### 2.2.3 Distributed Data Parallel Training
+- **Single machine (node), multiple GPUs**
+
+```
+python train.py --multiprocessing-distributed --world-size 1 --rank 0 --batch_size 64 --num_workers 8
+```
+
+- **Two machines (two nodes), multiple GPUs**
+
+   - _**First machine**_
+    ```
+    python train.py --dist-url 'tcp://IP_OF_NODE1:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 0 --batch_size 64 --num_workers 8
+    ```
+
+   - _**Second machine**_
+    ```
+    python train.py --dist-url 'tcp://IP_OF_NODE2:FREEPORT' --multiprocessing-distributed --world-size 2 --rank 1 --batch_size 64 --num_workers 8
+    ```
+
+## References
+[1] SFA3D: [PyTorch Implementation](https://github.com/maudzung/SFA3D)
+
+## Folder structure
+### Dataset
+```
+└── kitti/    
+     ├── image_2/ (left color camera，非必须)
+     ├── calib/ (非必须)
+     ├── label_2/ (标注结果/标签，非必须)
+     └── velodyne/ (点云文件，必须) 
+```
+### Checkpoints & Algorithm
+```
+${ROOT}
+└── checkpoints/
+    ├── fpn_resnet_18/    
+        ├── fpn_resnet_18_epoch_300.pth (点云目标检测标注模型)     
+└── sfa/ (点云标注算法)
+    ├── config/
+    ├── data_process/
+    ├── models/
+    ├── utils/
+    ├── inference.py
+    └── train.py
+├── README.md 
+├── LICENSE
+└── requirements.txt
+```
+
+
+
+[python-image]: https://img.shields.io/badge/Python-3.6-ff69b4.svg
+[python-url]: https://www.python.org/
+[pytorch-image]: https://img.shields.io/badge/PyTorch-1.5-2BAF2B.svg
+[pytorch-url]: https://pytorch.org/
diff --git a/point-cloud/Technical_details.md b/point-cloud/Technical_details.md
new file mode 100644
index 0000000..759ae4a
--- /dev/null
+++ b/point-cloud/Technical_details.md
@@ -0,0 +1,55 @@
+# Super Fast and Accurate 3D Object Detection based on 3D LiDAR Point Clouds
+
+---
+
+Technical details of the implementation
+
+
+## 1. Network architecture
+
+- The **ResNet-based Keypoint Feature Pyramid Network** (KFPN) that was proposed in [RTM3D paper](https://arxiv.org/pdf/2001.03343.pdf).
+The unofficial implementation of the RTM3D paper by using PyTorch is [here](https://github.com/maudzung/RTM3D)
+- **Input**: 
+    - The model takes a birds-eye-view (BEV) map as input. 
+    - The BEV map is encoded by height, intensity, and density of 3D LiDAR point clouds. Assume that the size of the BEV input is `(H, W, 3)`.
+
+- **Outputs**: 
+    - Heatmap for main center with a size of `(H/S, W/S, C)` where `S=4` _(the down-sample ratio)_, and `C=3` _(the number of classes)_
+    - Center offset: `(H/S, W/S, 2)`
+    - The heading angle _(yaw)_: `(H/S, W/S, 2)`. The model estimates the **im**aginary and the **re**al fraction (`sin(yaw)` and `cos(yaw)` values).
+    - Dimension _(h, w, l)_: `(H/S, W/S, 3)`
+    - `z` coordinate: `(H/S, W/S, 1)`
+
+- **Targets**: **7 degrees of freedom** _(7-DOF)_ of objects: `(cx, cy, cz, l, w, h, θ)`
+   - `cx, cy, cz`: The center coordinates.
+   - `l, w, h`: length, width, height of the bounding box.
+   - `θ`: The heading angle in radians of the bounding box.
+   
+- **Objects**: Cars, Pedestrians, Cyclists.
+
+## 2. Losses function
+
+- For main center heatmap: Used `focal loss`
+
+- For heading angle _(yaw)_: The `im` and `re` fractions are directly regressed by using `l1_loss`
+
+- For `z coordinate` and `3 dimensions` (height, width, length), I used `balanced l1 loss` that was proposed by the paper
+ [Libra R-CNN: Towards Balanced Learning for Object Detection](https://arxiv.org/pdf/1904.02701.pdf)
+
+## 3. Training in details
+
+- Set uniform weights to the above components of losses. (`=1.0` for all)
+- Number of epochs: 300.
+- Learning rate scheduler: [`cosine`](https://arxiv.org/pdf/1812.01187.pdf), initial learning rate: 0.001.
+- Batch size: `16` (on a single GTX 1080Ti).
+
+## 4. Inference
+
+- A `3 × 3` max-pooling operation was applied on the center heat map, then only `50` predictions whose 
+center confidences are larger than 0.2 were kept.
+- The heading angle _(yaw)_ = `arctan`(_imaginary fraction_ / _real fraction_)
+
+## 5. How to expand the work
+
+- The model could be trained with more classes and with a larger detected area by modifying configurations in 
+the [config/kitti_dataset.py](https://github.com/maudzung/Super-Fast-Accurate-3D-Object-Detection/blob/master/src/config/kitti_config.py) file.
\ No newline at end of file
diff --git a/point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth b/point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth
new file mode 100644
index 0000000..15ed4e2
Binary files /dev/null and b/point-cloud/checkpoints/fpn_resnet_18/Model_fpn_resnet_18_epoch_300.pth differ
diff --git a/point-cloud/requirements.txt b/point-cloud/requirements.txt
new file mode 100644
index 0000000..ac7fae2
--- /dev/null
+++ b/point-cloud/requirements.txt
@@ -0,0 +1,41 @@
+absl-py==1.1.0
+cachetools==4.2.4
+certifi==2022.6.15
+charset-normalizer==2.0.12
+cycler==0.11.0
+easydict==1.9
+future==0.18.2
+google-auth==1.35.0
+google-auth-oauthlib==0.4.6
+grpcio==1.46.3
+idna==3.3
+importlib-metadata==4.11.4
+joblib==1.1.0
+kiwisolver==1.4.3
+Markdown==3.3.7
+matplotlib==3.3.3
+numpy==1.18.3
+oauthlib==3.2.0
+opencv-python==4.2.0.34
+Pillow==8.4.0
+protobuf==3.19.1
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pyparsing==3.0.9
+python-dateutil==2.8.2
+requests==2.28.0
+requests-oauthlib==1.3.1
+rsa==4.8
+scikit-learn==0.22.2
+scipy==1.8.1
+six==1.16.0
+tensorboard==2.2.1
+tensorboard-plugin-wit==1.8.1
+torch==1.5.0
+torchsummary==1.5.1
+torchvision==0.6.0
+tqdm==4.54.0
+urllib3==1.26.9
+Werkzeug==2.1.2
+wget==3.2
+zipp==3.8.0
diff --git a/point-cloud/sfa/config/__init__.py b/point-cloud/sfa/config/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/point-cloud/sfa/config/kitti_config.py b/point-cloud/sfa/config/kitti_config.py
new file mode 100644
index 0000000..436616a
--- /dev/null
+++ b/point-cloud/sfa/config/kitti_config.py
@@ -0,0 +1,99 @@
+import math
+
+import numpy as np
+
+# Car and Van ==> Car class
+# Pedestrian and Person_Sitting ==> Pedestrian Class
+# for train
+CLASS_NAME_TO_ID = {
+    'Pedestrian': 0,
+    'Car': 1,
+    'Cyclist': 2,
+    'Van': 1,
+    'Truck': -3,
+    'Person_sitting': 0,
+    'Tram': -99,
+    'Misc': -99,
+    'TraffiCone': -1,
+    'DontCare': -1
+}
+
+# for test
+CLASS_ID_TO_NAME = {
+    0: 'Pedestrian', # Person_sitting in the same class
+    1: 'Car',  # Van in the same class
+    2: 'Cyclist'
+}
+
+
+colors = [[0, 255, 255], [0, 0, 255], [255, 0, 0], [255, 120, 0],
+          [255, 120, 120], [0, 120, 0], [120, 255, 255], [120, 0, 255]]
+
+#####################################################################################
+boundary = {
+    "minX": -50,
+    "maxX": 50,
+    "minY": -25,
+    "maxY": 25,
+    "minZ": -2.73,
+    "maxZ": 1.27
+}
+
+bound_size_x = boundary['maxX'] - boundary['minX']
+bound_size_y = boundary['maxY'] - boundary['minY']
+bound_size_z = boundary['maxZ'] - boundary['minZ']
+
+boundary_back = {
+    "minX": -50,
+    "maxX": 0,
+    "minY": -25,
+    "maxY": 25,
+    "minZ": -2.73,
+    "maxZ": 1.27
+}
+
+BEV_WIDTH = 608  # across y axis -25m ~ 25m
+BEV_HEIGHT = 1216  # across x axis 0m ~ 50m
+DISCRETIZATION = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT
+DISCRETIZATION_Y = (boundary["maxX"] - boundary["minX"]) / BEV_HEIGHT
+DISCRETIZATION_X = (boundary["maxY"] - boundary["minY"]) / BEV_WIDTH
+# maximum number of points per voxel
+T = 35
+
+# voxel size
+vd = 0.1  # z
+vh = 0.05  # y
+vw = 0.05  # x
+
+# voxel grid
+W = math.ceil(bound_size_x / vw)
+H = math.ceil(bound_size_y / vh)
+D = math.ceil(bound_size_z / vd)
+
+# Following parameters are calculated as an average from KITTI dataset for simplicity
+#####################################################################################
+Tr_velo_to_cam = np.array([
+    [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03],
+    [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02],
+    [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01],
+    [0, 0, 0, 1]
+])
+
+# cal mean from train set
+R0 = np.array([
+    [0.99992475, 0.00975976, -0.00734152, 0],
+    [-0.0097913, 0.99994262, -0.00430371, 0],
+    [0.00729911, 0.0043753, 0.99996319, 0],
+    [0, 0, 0, 1]
+])
+
+P2 = np.array([[719.787081, 0., 608.463003, 44.9538775],
+               [0., 719.787081, 174.545111, 0.1066855],
+               [0., 0., 1., 3.0106472e-03],
+               [0., 0., 0., 0]
+               ])
+
+R0_inv = np.linalg.inv(R0)
+Tr_velo_to_cam_inv = np.linalg.inv(Tr_velo_to_cam)
+P2_inv = np.linalg.pinv(P2)
+#####################################################################################
diff --git a/point-cloud/sfa/config/train_config.py b/point-cloud/sfa/config/train_config.py
new file mode 100644
index 0000000..4e4a971
--- /dev/null
+++ b/point-cloud/sfa/config/train_config.py
@@ -0,0 +1,172 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: The configurations of the project will be defined here
+"""
+
+import os
+import argparse
+
+import torch
+from easydict import EasyDict as edict
+
+
+def parse_train_configs():
+    parser = argparse.ArgumentParser(description='The Implementation using PyTorch')
+    parser.add_argument('--seed', type=int, default=2020,
+                        help='re-produce the results with seed random')
+    parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN',
+                        help='The name using for saving logs, models,...')
+
+    parser.add_argument('--root_dir', type=str, default='../', metavar='PATH',
+                        help='The ROOT working directory')
+    ####################################################################
+    ##############     Model configs            ########################
+    ####################################################################
+    parser.add_argument('--arch', type=str, default='fpn_resnet_18', metavar='ARCH',
+                        help='The name of the model architecture')
+    parser.add_argument('--model_load_dir', type=str, default=None, metavar='PATH',
+                        help='the path of the pretrained checkpoint')
+
+    ####################################################################
+    ##############     Dataloader and Running configs            #######
+    ####################################################################
+    parser.add_argument('--data_url', type=str, default='../dataset/apollo/training', metavar='PATH',
+                        help='the path of the dataset')
+    parser.add_argument('--val_data_url', type=str, default='../dataset/apollo/val', metavar='PATH',
+                        help='the path of the dataset')
+    parser.add_argument('--train_model_out', type=str, default='../checkpoints', metavar='PATH',
+                        help='the path of the model output')                
+    parser.add_argument('--train_out', type=str, default='../logs', metavar='PATH',
+                        help='the path of the logs output')      
+    parser.add_argument('--hflip_prob', type=float, default=0.5,
+                        help='The probability of horizontal flip')
+    parser.add_argument('--no-val', action='store_true',
+                        help='If true, dont evaluate the model on the val set')
+    parser.add_argument('--num_samples', type=int, default=None,
+                        help='Take a subset of the dataset to run and debug')
+    parser.add_argument('--num_workers', type=int, default=4,
+                        help='Number of threads for loading data')
+    parser.add_argument('--batch_size', type=int, default=8,
+                        help='mini-batch size (default: 16), this is the total'
+                             'batch size of all GPUs on the current node when using'
+                             'Data Parallel or Distributed Data Parallel')
+    parser.add_argument('--print_freq', type=int, default=50, metavar='N',
+                        help='print frequency (default: 50)')
+    parser.add_argument('--tensorboard_freq', type=int, default=50, metavar='N',
+                        help='frequency of saving tensorboard (default: 50)')
+    parser.add_argument('--checkpoint_freq', type=int, default=2, metavar='N',
+                        help='frequency of saving checkpoints (default: 5)')
+    parser.add_argument('--gpu_num_per_node', type=int, default=1,
+                        help='Number of GPU')
+    ####################################################################
+    ##############     Training strategy            ####################
+    ####################################################################
+
+    parser.add_argument('--start_epoch', type=int, default=1, metavar='N',
+                        help='the starting epoch')
+    parser.add_argument('--num_epochs', type=int, default=300, metavar='N',
+                        help='number of total epochs to run')
+    parser.add_argument('--lr_type', type=str, default='cosin',
+                        help='the type of learning rate scheduler (cosin or multi_step or one_cycle)')
+    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
+                        help='initial learning rate')
+    parser.add_argument('--minimum_lr', type=float, default=1e-7, metavar='MIN_LR',
+                        help='minimum learning rate during training')
+    parser.add_argument('--momentum', type=float, default=0.949, metavar='M',
+                        help='momentum')
+    parser.add_argument('-wd', '--weight_decay', type=float, default=0., metavar='WD',
+                        help='weight decay (default: 0.)')
+    parser.add_argument('--optimizer_type', type=str, default='adam', metavar='OPTIMIZER',
+                        help='the type of optimizer, it can be sgd or adam')
+    parser.add_argument('--steps', nargs='*', default=[150, 180],
+                        help='number of burn in step')
+
+    ####################################################################
+    ##############     Loss weight            ##########################
+    ####################################################################
+
+    ####################################################################
+    ##############     Distributed Data Parallel            ############
+    ####################################################################
+    parser.add_argument('--world-size', default=-1, type=int, metavar='N',
+                        help='number of nodes for distributed training')
+    parser.add_argument('--rank', default=-1, type=int, metavar='N',
+                        help='node rank for distributed training')
+    parser.add_argument('--dist-url', default='tcp://127.0.0.1:29500', type=str,
+                        help='url used to set up distributed training')
+    parser.add_argument('--dist-backend', default='nccl', type=str,
+                        help='distributed backend')
+    parser.add_argument('--gpu_idx', default=0, type=int,
+                        help='GPU index to use.')
+    parser.add_argument('--no_cuda', default= False,
+                        help='If true, cuda is not used.')
+    parser.add_argument('--multiprocessing-distributed', action='store_true',
+                        help='Use multi-processing distributed training to launch '
+                             'N processes per node, which has N GPUs. This is the '
+                             'fastest way to use PyTorch for either single node or '
+                             'multi node data parallel training')
+    ####################################################################
+    ##############     Evaluation configurations     ###################
+    ####################################################################
+    parser.add_argument('--evaluate', action='store_true',
+                        help='only evaluate the model, not training')
+    parser.add_argument('--resume_path', type=str, default=None, metavar='PATH',
+                        help='the path of the resumed checkpoint')
+    parser.add_argument('--K', type=int, default=50,
+                        help='the number of top K')
+
+    configs = edict(vars(parser.parse_args()))
+
+    ####################################################################
+    ############## Hardware configurations #############################
+    ####################################################################
+    # configs.device = torch.device('cpu' if configs.no_cuda else 'cuda')
+    configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx))
+    configs.ngpus_per_node = torch.cuda.device_count()
+
+    configs.pin_memory = True
+    configs.input_size = (1216, 608)
+    configs.hm_size = (304, 152)
+    configs.down_ratio = 4
+    configs.max_objects = 50
+
+    configs.imagenet_pretrained = True
+    configs.head_conv = 64
+    configs.num_classes = 3
+    configs.num_center_offset = 2
+    configs.num_z = 1
+    configs.num_dim = 3
+    configs.num_direction = 2  # sin, cos
+
+    configs.heads = {
+        'hm_cen': configs.num_classes,
+        'cen_offset': configs.num_center_offset,
+        'direction': configs.num_direction,
+        'z_coor': configs.num_z,
+        'dim': configs.num_dim
+    }
+
+    configs.num_input_features = 4
+
+    ####################################################################
+    ############## Dataset, logs, Checkpoints dir ######################
+    ####################################################################
+    configs.dataset = 'apollo' # or kitti
+    configs.dataset_dir = configs.data_url
+    # configs.checkpoints_dir = os.path.join(configs.train_model_out, configs.saved_fn)
+    configs.checkpoints_dir = configs.train_model_out
+    # configs.logs_dir = os.path.join(configs.train_out, configs.saved_fn)
+    configs.logs_dir = configs.train_out
+    configs.pretrained_path = configs.model_load_dir
+
+    if not os.path.isdir(configs.checkpoints_dir):
+        os.makedirs(configs.checkpoints_dir)
+    if not os.path.isdir(configs.logs_dir):
+        os.makedirs(configs.logs_dir)
+
+    return configs
diff --git a/point-cloud/sfa/data_process/__init__.py b/point-cloud/sfa/data_process/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/point-cloud/sfa/data_process/demo_dataset.py b/point-cloud/sfa/data_process/demo_dataset.py
new file mode 100644
index 0000000..0c78c84
--- /dev/null
+++ b/point-cloud/sfa/data_process/demo_dataset.py
@@ -0,0 +1,99 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: This script for the KITTI dataset
+"""
+
+import sys
+import os
+from builtins import int
+from glob import glob
+
+import numpy as np
+from torch.utils.data import Dataset
+import cv2
+import torch
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from data_process.kitti_data_utils import get_filtered_lidar
+from data_process.kitti_bev_utils import makeBEVMap
+import config.kitti_config as cnf
+
+
+class Demo_KittiDataset(Dataset):
+    def __init__(self, configs):
+        self.dataset_dir = os.path.join(configs.dataset_dir, configs.foldername, configs.foldername[:10],
+                                        configs.foldername)
+        self.input_size = configs.input_size
+        self.hm_size = configs.hm_size
+
+        self.num_classes = configs.num_classes
+        self.max_objects = configs.max_objects
+
+        self.image_dir = os.path.join(self.dataset_dir, "image_02", "data")
+        self.lidar_dir = os.path.join(self.dataset_dir, "velodyne_points", "data")
+        self.label_dir = os.path.join(self.dataset_dir, "label_2", "data")
+        self.sample_id_list = sorted(glob(os.path.join(self.lidar_dir, '*.bin')))
+        self.sample_id_list = [float(os.path.basename(fn)[:-4]) for fn in self.sample_id_list]
+        self.num_samples = len(self.sample_id_list)
+
+    def __len__(self):
+        return len(self.sample_id_list)
+
+    def __getitem__(self, index):
+        pass
+
+    def load_bevmap_front(self, index):
+        """Load only image for the testing phase"""
+        sample_id = int(self.sample_id_list[index])
+        img_path, img_rgb = self.get_image(sample_id)
+        lidarData = self.get_lidar(sample_id)
+        front_lidar = get_filtered_lidar(lidarData, cnf.boundary)
+        front_bevmap = makeBEVMap(front_lidar, cnf.boundary)
+        front_bevmap = torch.from_numpy(front_bevmap)
+
+        metadatas = {
+            'img_path': img_path,
+        }
+
+        return metadatas, front_bevmap, img_rgb
+
+    def load_bevmap_front_vs_back(self, index):
+        """Load only image for the testing phase"""
+        sample_id = int(self.sample_id_list[index])
+        img_path, img_rgb = self.get_image(sample_id)
+        lidarData = self.get_lidar(sample_id)
+
+        front_lidar = get_filtered_lidar(lidarData, cnf.boundary)
+        front_bevmap = makeBEVMap(front_lidar, cnf.boundary)
+        front_bevmap = torch.from_numpy(front_bevmap)
+
+        back_lidar = get_filtered_lidar(lidarData, cnf.boundary_back)
+        back_bevmap = makeBEVMap(back_lidar, cnf.boundary_back)
+        back_bevmap = torch.from_numpy(back_bevmap)
+
+        metadatas = {
+            'img_path': img_path,
+        }
+
+        return metadatas, front_bevmap, back_bevmap, img_rgb
+
+    def get_image(self, idx):
+        img_path = os.path.join(self.image_dir, '{:010d}.png'.format(idx))
+        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
+
+        return img_path, img
+
+    def get_lidar(self, idx):
+        lidar_file = os.path.join(self.lidar_dir, '{:010d}.bin'.format(idx))
+        # assert os.path.isfile(lidar_file)
+        return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
diff --git a/point-cloud/sfa/data_process/kitti_bev_utils.py b/point-cloud/sfa/data_process/kitti_bev_utils.py
new file mode 100644
index 0000000..b25f79a
--- /dev/null
+++ b/point-cloud/sfa/data_process/kitti_bev_utils.py
@@ -0,0 +1,98 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+"""
+
+import math
+import os
+import sys
+
+import cv2
+import numpy as np
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+import config.kitti_config as cnf
+
+
+def makeBEVMap(PointCloud_, boundary):
+    Height = cnf.BEV_HEIGHT + 1
+    Width = cnf.BEV_WIDTH + 1
+
+    # Discretize Feature Map
+    PointCloud = np.copy(PointCloud_)
+    # PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION))
+    # PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION) + Width / 2)
+
+    # 针对Apollo数据集，检测360°
+    PointCloud[:, 0] = np.int_(np.floor(PointCloud[:, 0] / cnf.DISCRETIZATION_Y) + Height / 2)
+    PointCloud[:, 1] = np.int_(np.floor(PointCloud[:, 1] / cnf.DISCRETIZATION_X) + Width / 2)
+
+    # sort-3times
+    indices = np.lexsort((-PointCloud[:, 2], PointCloud[:, 1], PointCloud[:, 0]))
+    PointCloud = PointCloud[indices]
+
+    # Height Map
+    heightMap = np.zeros((Height, Width))
+
+    _, indices = np.unique(PointCloud[:, 0:2], axis=0, return_index=True)
+    PointCloud_frac = PointCloud[indices]
+    
+    # some important problem is image coordinate is (y,x), not (x,y)
+    max_height = float(np.abs(boundary['maxZ'] - boundary['minZ']))
+
+    heightMap[np.int_(PointCloud_frac[:, 0]), np.int_(PointCloud_frac[:, 1])] = PointCloud_frac[:, 2] / max_height #(1217,609)
+    # Intensity Map & DensityMap
+    intensityMap = np.zeros((Height, Width))
+    densityMap = np.zeros((Height, Width))
+
+    _, indices, counts = np.unique(PointCloud[:, 0:2], axis=0, return_index=True, return_counts=True)
+    PointCloud_top = PointCloud[indices]
+
+    normalizedCounts = np.minimum(1.0, np.log(counts + 1) / np.log(64))
+
+    intensityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = PointCloud_top[:, 3] / 255.0 # hesai40p的反射强度0~255
+    densityMap[np.int_(PointCloud_top[:, 0]), np.int_(PointCloud_top[:, 1])] = normalizedCounts
+    RGB_Map = np.zeros((3, Height - 1, Width - 1))
+    RGB_Map[2, :, :] = densityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH]  # r_map
+    RGB_Map[1, :, :] = heightMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH]  # g_map
+    RGB_Map[0, :, :] = intensityMap[:cnf.BEV_HEIGHT, :cnf.BEV_WIDTH]  # b_map
+
+    return RGB_Map
+
+
+# bev image coordinates format
+def get_corners(x, y, w, l, yaw):
+    bev_corners = np.zeros((4, 2), dtype=np.float32)
+    cos_yaw = np.cos(yaw)
+    sin_yaw = np.sin(yaw)
+    # front left
+    bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
+    bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
+
+    # rear left
+    bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
+    bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
+
+    # rear right
+    bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
+    bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
+
+    # front right
+    bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
+    bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
+
+    return bev_corners
+
+
+def drawRotatedBox(img, x, y, w, l, yaw, color):
+    img_cp = img.copy()
+    bev_corners = get_corners(x, y, w, l, yaw)
+    corners_int = bev_corners.reshape(-1, 1, 2).astype(int)
+    cv2.polylines(img, [corners_int], True, color, 2)
+    corners_int = bev_corners.reshape(-1, 2)
+    cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2)
+    # return img_cp
diff --git a/point-cloud/sfa/data_process/kitti_data_utils.py b/point-cloud/sfa/data_process/kitti_data_utils.py
new file mode 100644
index 0000000..12fdb3b
--- /dev/null
+++ b/point-cloud/sfa/data_process/kitti_data_utils.py
@@ -0,0 +1,324 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: The utils of the kitti dataset
+"""
+
+from __future__ import print_function
+import os
+import sys
+
+import numpy as np
+import cv2
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+import config.kitti_config as cnf
+
+
+class Object3d(object):
+    ''' 3d object label '''
+
+    def __init__(self, label_file_line):
+        data = label_file_line.split(' ')
+        data[1:] = [float(x) for x in data[1:]]
+        # extract label, truncation, occlusion
+        self.type = data[0]  # 'Car', 'Pedestrian', ...
+        self.cls_id = self.cls_type_to_id(self.type)
+        self.truncation = data[1]  # truncated pixel ratio [0..1]
+        self.occlusion = int(data[2])  # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
+        self.alpha = data[3]  # object observation angle [-pi..pi]
+
+        # extract 2d bounding box in 0-based coordinates
+        self.xmin = data[4]  # left
+        self.ymin = data[5]  # top
+        self.xmax = data[6]  # right
+        self.ymax = data[7]  # bottom
+        self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
+
+        # extract 3d bounding box information
+        self.h = data[8]  # box height
+        self.w = data[9]  # box width
+        self.l = data[10]  # box length (in meters)
+        self.t = (data[11], data[12], data[13])  # location (x,y,z) in camera coord.
+        self.dis_to_cam = np.linalg.norm(self.t)
+        self.ry = data[14]  # yaw angle (around Y-axis in camera coordinates) [-pi..pi]
+        self.score = data[15] if data.__len__() == 16 else -1.0
+        self.level_str = None
+        self.level = self.get_obj_level()
+
+    def cls_type_to_id(self, cls_type):
+        if cls_type not in cnf.CLASS_NAME_TO_ID.keys():
+            return -1
+
+        return cnf.CLASS_NAME_TO_ID[cls_type]
+
+    def get_obj_level(self):
+        height = float(self.box2d[3]) - float(self.box2d[1]) + 1
+
+        if height >= 40 and self.truncation <= 0.15 and self.occlusion <= 0:
+            self.level_str = 'Easy'
+            return 1  # Easy
+        elif height >= 25 and self.truncation <= 0.3 and self.occlusion <= 1:
+            self.level_str = 'Moderate'
+            return 2  # Moderate
+        elif height >= 25 and self.truncation <= 0.5 and self.occlusion <= 2:
+            self.level_str = 'Hard'
+            return 3  # Hard
+        else:
+            self.level_str = 'UnKnown'
+            return 4
+
+    def print_object(self):
+        print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \
+              (self.type, self.truncation, self.occlusion, self.alpha))
+        print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \
+              (self.xmin, self.ymin, self.xmax, self.ymax))
+        print('3d bbox h,w,l: %f, %f, %f' % \
+              (self.h, self.w, self.l))
+        print('3d bbox location, ry: (%f, %f, %f), %f' % \
+              (self.t[0], self.t[1], self.t[2], self.ry))
+
+    def to_kitti_format(self):
+        kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \
+                    % (self.type, self.truncation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1],
+                       self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.t[0], self.t[1], self.t[2],
+                       self.ry, self.score)
+        return kitti_str
+
+
+def read_label(label_filename):
+    lines = [line.rstrip() for line in open(label_filename)]
+    objects = [Object3d(line) for line in lines]
+    return objects
+
+
+class Calibration(object):
+    ''' Calibration matrices and utils
+        3d XYZ in <label>.txt are in rect camera coord.
+        2d box xy are in image2 coord
+        Points in <lidar>.bin are in Velodyne coord.
+
+        y_image2 = P^2_rect * x_rect
+        y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
+        x_ref = Tr_velo_to_cam * x_velo
+        x_rect = R0_rect * x_ref
+
+        P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;
+                    0,      f^2_v,  c^2_v,  -f^2_v b^2_y;
+                    0,      0,      1,      0]
+                 = K * [1|t]
+
+        image2 coord:
+         ----> x-axis (u)
+        |
+        |
+        v y-axis (v)
+
+        velodyne coord:
+        front x, left y, up z
+
+        rect/ref camera coord:
+        right x, down y, front z
+
+        Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf
+
+        TODO(rqi): do matrix multiplication only once for each projection.
+    '''
+
+    def __init__(self, calib_filepath):
+        calibs = self.read_calib_file(calib_filepath)
+        # Projection matrix from rect camera coord to image2 coord
+        self.P2 = calibs['P2']
+        self.P2 = np.reshape(self.P2, [3, 4])
+        self.P3 = calibs['P3']
+        self.P3 = np.reshape(self.P3, [3, 4])
+        # Rigid transform from Velodyne coord to reference camera coord
+        self.V2C = calibs['Tr_velo2cam']
+        self.V2C = np.reshape(self.V2C, [3, 4])
+        # Rotation from reference camera coord to rect camera coord
+        self.R0 = calibs['R_rect']
+        self.R0 = np.reshape(self.R0, [3, 3])
+
+        # Camera intrinsics and extrinsics
+        self.c_u = self.P2[0, 2]
+        self.c_v = self.P2[1, 2]
+        self.f_u = self.P2[0, 0]
+        self.f_v = self.P2[1, 1]
+        self.b_x = self.P2[0, 3] / (-self.f_u)  # relative
+        self.b_y = self.P2[1, 3] / (-self.f_v)
+
+    def read_calib_file(self, filepath):
+        with open(filepath) as f:
+            lines = f.readlines()
+
+        obj = lines[2].strip().split(' ')[1:]
+        P2 = np.array(obj, dtype=np.float32)
+        obj = lines[3].strip().split(' ')[1:]
+        P3 = np.array(obj, dtype=np.float32)
+        obj = lines[4].strip().split(' ')[1:]
+        R0 = np.array(obj, dtype=np.float32)
+        obj = lines[5].strip().split(' ')[1:]
+        Tr_velo_to_cam = np.array(obj, dtype=np.float32)
+
+        return {'P2': P2.reshape(3, 4),
+                'P3': P3.reshape(3, 4),
+                'R_rect': R0.reshape(3, 3),
+                'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)}
+
+    def cart2hom(self, pts_3d):
+        """
+        :param pts: (N, 3 or 2)
+        :return pts_hom: (N, 4 or 3)
+        """
+        pts_hom = np.hstack((pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)))
+        return pts_hom
+
+
+def compute_radius(det_size, min_overlap=0.7):
+    height, width = det_size
+
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
+    r1 = (b1 + sq1) / 2
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
+    r2 = (b2 + sq2) / 2
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
+    r3 = (b3 + sq3) / 2
+
+    return min(r1, r2, r3)
+
+
+def gaussian2D(shape, sigma=1):
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+
+    return h
+
+
+def gen_hm_radius(heatmap, center, radius, k=1):
+    diameter = 2 * radius + 1
+    gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
+
+    x, y = int(center[0]), int(center[1])
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:  # TODO debug
+        np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+
+    return heatmap
+
+
+def get_filtered_lidar(lidar, boundary, labels=None):
+    minX = boundary['minX']
+    maxX = boundary['maxX']
+    minY = boundary['minY']
+    maxY = boundary['maxY']
+    minZ = boundary['minZ']
+    maxZ = boundary['maxZ']
+
+    # Remove the point out of range x,y,z
+    mask = np.where((lidar[:, 0] >= minX) & (lidar[:, 0] <= maxX) &
+                    (lidar[:, 1] >= minY) & (lidar[:, 1] <= maxY) &
+                    (lidar[:, 2] >= minZ) & (lidar[:, 2] <= maxZ))
+    lidar = lidar[mask]
+    lidar[:, 2] = lidar[:, 2] - minZ
+
+    if labels is not None:
+        label_x = (labels[:, 1] >= minX) & (labels[:, 1] < maxX)
+        label_y = (labels[:, 2] >= minY) & (labels[:, 2] < maxY)
+        label_z = (labels[:, 3] >= minZ) & (labels[:, 3] < maxZ)
+        mask_label = label_x & label_y & label_z
+        labels = labels[mask_label]
+        return lidar, labels
+    else:
+        return lidar
+
+
+def box3d_corners_to_center(box3d_corner):
+    # (N, 8, 3) -> (N, 7)
+    assert box3d_corner.ndim == 3
+
+    xyz = np.mean(box3d_corner, axis=1)
+
+    h = abs(np.mean(box3d_corner[:, 4:, 2] - box3d_corner[:, :4, 2], axis=1, keepdims=True))
+    w = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 1, [0, 1]]) ** 2, axis=1, keepdims=True)) +
+         np.sqrt(np.sum((box3d_corner[:, 2, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) +
+         np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 5, [0, 1]]) ** 2, axis=1, keepdims=True)) +
+         np.sqrt(np.sum((box3d_corner[:, 6, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4
+
+    l = (np.sqrt(np.sum((box3d_corner[:, 0, [0, 1]] - box3d_corner[:, 3, [0, 1]]) ** 2, axis=1, keepdims=True)) +
+         np.sqrt(np.sum((box3d_corner[:, 1, [0, 1]] - box3d_corner[:, 2, [0, 1]]) ** 2, axis=1, keepdims=True)) +
+         np.sqrt(np.sum((box3d_corner[:, 4, [0, 1]] - box3d_corner[:, 7, [0, 1]]) ** 2, axis=1, keepdims=True)) +
+         np.sqrt(np.sum((box3d_corner[:, 5, [0, 1]] - box3d_corner[:, 6, [0, 1]]) ** 2, axis=1, keepdims=True))) / 4
+
+    yaw = (np.arctan2(box3d_corner[:, 2, 1] - box3d_corner[:, 1, 1],
+                      box3d_corner[:, 2, 0] - box3d_corner[:, 1, 0]) +
+           np.arctan2(box3d_corner[:, 3, 1] - box3d_corner[:, 0, 1],
+                      box3d_corner[:, 3, 0] - box3d_corner[:, 0, 0]) +
+           np.arctan2(box3d_corner[:, 2, 0] - box3d_corner[:, 3, 0],
+                      box3d_corner[:, 3, 1] - box3d_corner[:, 2, 1]) +
+           np.arctan2(box3d_corner[:, 1, 0] - box3d_corner[:, 0, 0],
+                      box3d_corner[:, 0, 1] - box3d_corner[:, 1, 1]))[:, np.newaxis] / 4
+
+    return np.concatenate([h, w, l, xyz, yaw], axis=1).reshape(-1, 7)
+
+
+def box3d_center_to_conners(box3d_center):
+    h, w, l, x, y, z, yaw = box3d_center
+    Box = np.array([[-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2],
+                    [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2],
+                    [0, 0, 0, 0, h, h, h, h]])
+
+    rotMat = np.array([
+        [np.cos(yaw), -np.sin(yaw), 0.0],
+        [np.sin(yaw), np.cos(yaw), 0.0],
+        [0.0, 0.0, 1.0]])
+
+    velo_box = np.dot(rotMat, Box)
+    cornerPosInVelo = velo_box + np.tile(np.array([x, y, z]), (8, 1)).T
+    box3d_corner = cornerPosInVelo.transpose()
+
+    return box3d_corner.astype(np.float32)
+
+
+if __name__ == '__main__':
+    heatmap = np.zeros((96, 320))
+
+    h, w = 40, 50
+    radius = compute_radius((h, w))
+    radius = max(0, int(radius))
+    print('h: {}, w: {}, radius: {}, sigma: {}'.format(h, w, radius, (2 * radius + 1) / 6.))
+    gen_hm_radius(heatmap, center=(200, 50), radius=radius)
+    while True:
+        cv2.imshow('heatmap', heatmap)
+        if cv2.waitKey(0) & 0xff == 27:
+            break
+    max_pos = np.unravel_index(heatmap.argmax(), shape=heatmap.shape)
+    print('max_pos: {}'.format(max_pos))
diff --git a/point-cloud/sfa/data_process/kitti_dataloader.py b/point-cloud/sfa/data_process/kitti_dataloader.py
new file mode 100644
index 0000000..951bd2d
--- /dev/null
+++ b/point-cloud/sfa/data_process/kitti_dataloader.py
@@ -0,0 +1,67 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: This script for creating the dataloader for training/validation/test phase
+"""
+
+import os
+import sys
+
+import torch
+from torch.utils.data import DataLoader
+import numpy as np
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from data_process.kitti_dataset import KittiDataset
+from data_process.transformation import OneOf, Random_Rotation, Random_Scaling
+
+
+def create_train_dataloader(configs):
+    """Create dataloader for training"""
+    train_lidar_aug = OneOf([
+        Random_Rotation(limit_angle=np.pi / 4, p=1.0),
+        Random_Scaling(scaling_range=(0.95, 1.05), p=1.0),
+    ], p=0.66)
+    train_dataset = KittiDataset(configs, mode='train', lidar_aug=train_lidar_aug, hflip_prob=configs.hflip_prob,
+                                 num_samples=configs.num_samples)
+    train_sampler = None
+    if configs.distributed:
+        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
+    train_dataloader = DataLoader(train_dataset, batch_size=configs.batch_size, shuffle=(train_sampler is None),
+                                  pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=train_sampler)
+
+    return train_dataloader, train_sampler
+
+
+def create_val_dataloader(configs):
+    """Create dataloader for validation"""
+    val_sampler = None
+    val_dataset = KittiDataset(configs, mode='val', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples)
+    if configs.distributed:
+        val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False)
+    val_dataloader = DataLoader(val_dataset, batch_size=configs.batch_size, shuffle=False,
+                                pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=val_sampler)
+
+    return val_dataloader
+
+
+def create_test_dataloader(configs):
+    """Create dataloader for testing phase"""
+
+    test_dataset = KittiDataset(configs, mode='test', lidar_aug=None, hflip_prob=0., num_samples=configs.num_samples)
+    test_sampler = None
+    if configs.distributed:
+        test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
+    test_dataloader = DataLoader(test_dataset, batch_size=configs.batch_size, shuffle=False,
+                                 pin_memory=configs.pin_memory, num_workers=configs.num_workers, sampler=test_sampler)
+
+    return test_dataloader
diff --git a/point-cloud/sfa/data_process/kitti_dataset.py b/point-cloud/sfa/data_process/kitti_dataset.py
new file mode 100644
index 0000000..c2158e4
--- /dev/null
+++ b/point-cloud/sfa/data_process/kitti_dataset.py
@@ -0,0 +1,335 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: This script for the KITTI dataset
+"""
+
+import sys
+import os
+import math
+from builtins import int
+
+import numpy as np
+from torch.utils.data import Dataset
+import cv2
+import torch
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from data_process.kitti_data_utils import gen_hm_radius, compute_radius, Calibration, get_filtered_lidar
+from data_process.kitti_bev_utils import makeBEVMap, drawRotatedBox, get_corners
+from data_process import transformation
+import config.kitti_config as cnf
+
+
+class KittiDataset(Dataset):
+    def __init__(self, configs, mode='train', lidar_aug=None, hflip_prob=None, num_samples=None):
+        self.dataset_dir = configs.dataset_dir
+        self.input_size = configs.input_size
+        self.hm_size = configs.hm_size
+
+        self.num_classes = configs.num_classes
+        self.max_objects = configs.max_objects
+
+        assert mode in ['train', 'val', 'test'], 'Invalid mode: {}'.format(mode)
+        self.mode = mode
+        self.is_test = (self.mode == 'test')
+        # sub_folder = 'testing' if self.is_test else 'training'
+        self.lidar_aug = lidar_aug
+        self.hflip_prob = hflip_prob
+        if mode == 'val':
+            self.val_data_url = configs.val_data_url
+            self.lidar_dir = os.path.join(self.val_data_url, "velodyne")
+            self.calib_dir = os.path.join(self.val_data_url, "calib")
+            self.label_dir = os.path.join(self.val_data_url, "label_2")
+        # self.image_dir = os.path.join(self.dataset_dir, sub_folder, "image_2")
+        else:
+            self.lidar_dir = os.path.join(self.dataset_dir, "velodyne")
+            self.calib_dir = os.path.join(self.dataset_dir, "calib")
+            self.label_dir = os.path.join(self.dataset_dir, "label_2")
+        # split_txt_path = os.path.join('../dataset/apollo/', 'ImageSets', '{}.txt'.format(mode))
+        sample_list = []
+        sample_files = os.listdir(self.lidar_dir)
+        for bin_file in sample_files:
+            bin_name = bin_file.split('.')[0]
+            sample_list.append(bin_name)
+
+        self.sample_id_list = sample_list
+        if num_samples is not None:
+            self.sample_id_list = self.sample_id_list[:num_samples]
+        self.num_samples = len(self.sample_id_list)
+
+    def __len__(self):
+        return len(self.sample_id_list)
+
+    def __getitem__(self, index):
+        if self.is_test:
+            return self.load_img_only(index)
+        else:
+            return self.load_img_with_targets(index)
+
+    def load_img_only(self, index):
+        """Load only image for the testing phase"""
+        sample_id = self.sample_id_list[index]
+        # print(sample_id)
+        # img_path, img_rgb = self.get_image(sample_id)
+        lidarData = self.get_lidar(sample_id)
+        lidarData = get_filtered_lidar(lidarData, cnf.boundary)
+        bev_map = makeBEVMap(lidarData, cnf.boundary)
+        bev_map = torch.from_numpy(bev_map)
+        bev_path = os.path.join(self.lidar_dir, '{}.png'.format(sample_id))
+        metadatas = {
+            'bev_path': bev_path,
+        }
+        # return metadatas, bev_map, img_rgb
+
+        return bev_map,metadatas
+
+    def load_img_with_targets(self, index):
+        """Load images and targets for the training and validation phase"""
+        sample_id = self.sample_id_list[index]
+        # img_path = os.path.join(self.image_dir, '{}.png'.format(sample_id))
+        lidarData = self.get_lidar(sample_id)
+        # calib = self.get_calib(sample_id)
+        labels, has_labels = self.get_label(sample_id)
+        # if has_labels:
+        #     labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2)
+
+        if self.lidar_aug:
+            lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:])
+
+        lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels)
+
+        bev_map = makeBEVMap(lidarData, cnf.boundary)
+        bev_map = torch.from_numpy(bev_map)
+
+        hflipped = False
+        if np.random.random() < self.hflip_prob:
+            hflipped = True
+            # C, H, W
+            bev_map = torch.flip(bev_map, [-1])
+
+        targets = self.build_targets(labels, hflipped)
+
+        # metadatas = {
+        #     'img_path': img_path,
+        #     'hflipped': hflipped
+        # }
+
+        # return metadatas, bev_map, targets
+        return bev_map, targets
+
+    def get_image(self, idx):
+        img_path = os.path.join(self.image_dir, '{}.png'.format(idx))
+        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
+
+        return img_path, img
+
+    def get_calib(self, idx):
+        calib_file = os.path.join(self.calib_dir, '{}.txt'.format(idx))
+        # assert os.path.isfile(calib_file)
+        return Calibration(calib_file)
+
+    def get_lidar(self, idx):
+        lidar_file = os.path.join(self.lidar_dir, '{}.bin'.format(idx))
+        # assert os.path.isfile(lidar_file)
+        return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
+
+    def get_label(self, idx):
+        labels = []
+        label_path = os.path.join(self.label_dir, '{}.txt'.format(idx))
+        for line in open(label_path, 'r'):
+            line = line.rstrip()
+            line_parts = line.split(' ')
+            obj_name = line_parts[0]  # 'Car', 'Pedestrian', ...
+            cat_id = int(cnf.CLASS_NAME_TO_ID[obj_name])
+            if cat_id <= -99:  # ignore Tram and Misc
+                continue
+            truncated = int(float(line_parts[1]))  # truncated pixel ratio [0..1]
+            occluded = int(line_parts[2])  # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
+            alpha = float(line_parts[3])  # object observation angle [-pi..pi]
+            # xmin, ymin, xmax, ymax
+            # bbox = np.array([float(line_parts[4]), float(line_parts[5]), float(line_parts[6]), float(line_parts[7])])
+            # height, width, length (h, w, l)
+            h, w, l = float(line_parts[8]), float(line_parts[9]), float(line_parts[10])
+            # location (x,y,z) in camera coord.
+            x, y, z = float(line_parts[11]), float(line_parts[12]), float(line_parts[13])
+            ry = float(line_parts[14])  # yaw angle (around Y-axis in camera coordinates) [-pi..pi]
+
+            object_label = [cat_id, x, y, z, h, w, l, ry]
+            labels.append(object_label)
+
+        if len(labels) == 0:
+            labels = np.zeros((1, 8), dtype=np.float32)
+            has_labels = False
+        else:
+            labels = np.array(labels, dtype=np.float32)
+            has_labels = True
+
+        return labels, has_labels
+
+    def build_targets(self, labels, hflipped):
+        minX = cnf.boundary['minX']
+        maxX = cnf.boundary['maxX']
+        minY = cnf.boundary['minY']
+        maxY = cnf.boundary['maxY']
+        minZ = cnf.boundary['minZ']
+        maxZ = cnf.boundary['maxZ']
+
+        num_objects = min(len(labels), self.max_objects)
+        hm_l, hm_w = self.hm_size
+
+        hm_main_center = np.zeros((self.num_classes, hm_l, hm_w), dtype=np.float32)
+        cen_offset = np.zeros((self.max_objects, 2), dtype=np.float32)
+        direction = np.zeros((self.max_objects, 2), dtype=np.float32)
+        z_coor = np.zeros((self.max_objects, 1), dtype=np.float32)
+        dimension = np.zeros((self.max_objects, 3), dtype=np.float32)
+
+        indices_center = np.zeros((self.max_objects), dtype=np.int64)
+        obj_mask = np.zeros((self.max_objects), dtype=np.uint8)
+
+        for k in range(num_objects):
+            cls_id, x, y, z, h, w, l, yaw = labels[k]
+            cls_id = int(cls_id)
+            # Invert yaw angle
+            yaw = -yaw
+            if not ((minX <= x <= maxX) and (minY <= y <= maxY) and (minZ <= z <= maxZ)):
+                continue
+            if (h <= 0) or (w <= 0) or (l <= 0):
+                continue
+
+            bbox_l = l / cnf.bound_size_x * hm_l
+            bbox_w = w / cnf.bound_size_y * hm_w
+            radius = compute_radius((math.ceil(bbox_l), math.ceil(bbox_w)))
+            radius = max(0, int(radius))
+
+            center_y = (x - minX) / cnf.bound_size_x * hm_l  # x --> y (invert to 2D image space)
+            center_x = (y - minY) / cnf.bound_size_y * hm_w  # y --> x
+            center = np.array([center_x, center_y], dtype=np.float32)
+
+            if hflipped:
+                center[0] = hm_w - center[0] - 1
+
+            center_int = center.astype(np.int32)
+            if cls_id < 0:
+                ignore_ids = [_ for _ in range(self.num_classes)] if cls_id == - 1 else [- cls_id - 2]
+                # Consider to make mask ignore
+                for cls_ig in ignore_ids:
+                    gen_hm_radius(hm_main_center[cls_ig], center_int, radius)
+                hm_main_center[ignore_ids, center_int[1], center_int[0]] = 0.9999
+                continue
+
+            # Generate heatmaps for main center
+            gen_hm_radius(hm_main_center[cls_id], center, radius)
+            # Index of the center
+            indices_center[k] = center_int[1] * hm_w + center_int[0]
+
+            # targets for center offset
+            cen_offset[k] = center - center_int
+
+            # targets for dimension
+            dimension[k, 0] = h
+            dimension[k, 1] = w
+            dimension[k, 2] = l
+
+            # targets for direction
+            direction[k, 0] = math.sin(float(yaw))  # im
+            direction[k, 1] = math.cos(float(yaw))  # re
+            # im -->> -im
+            if hflipped:
+                direction[k, 0] = - direction[k, 0]
+
+            # targets for depth
+            z_coor[k] = z - minZ
+
+            # Generate object masks
+            obj_mask[k] = 1
+
+        targets = {
+            'hm_cen': hm_main_center,
+            'cen_offset': cen_offset,
+            'direction': direction,
+            'z_coor': z_coor,
+            'dim': dimension,
+            'indices_center': indices_center,
+            'obj_mask': obj_mask,
+        }
+
+        return targets
+
+    def draw_img_with_label(self, index):
+        sample_id = self.sample_id_list[index]
+        lidar_path = os.path.join(self.lidar_dir, '{}.bin'.format(sample_id))
+        lidarData = self.get_lidar(sample_id)
+        calib = self.get_calib(sample_id)
+        labels, has_labels = self.get_label(sample_id)
+        print(lidar_path)
+        if has_labels:
+            labels[:, 1:] = transformation.camera_to_lidar_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2)
+            
+        if self.lidar_aug:
+            lidarData, labels[:, 1:] = self.lidar_aug(lidarData, labels[:, 1:])
+
+        lidarData, labels = get_filtered_lidar(lidarData, cnf.boundary, labels)
+        bev_map = makeBEVMap(lidarData, cnf.boundary)
+        print(labels)
+        return bev_map, labels, lidar_path
+
+
+if __name__ == '__main__':
+    from easydict import EasyDict as edict
+    from data_process.transformation import OneOf, Random_Scaling, Random_Rotation, lidar_to_camera_box
+    from utils.visualization_utils import merge_rgb_to_bev, show_rgb_image_with_boxes
+
+    configs = edict()
+    configs.distributed = False  # For testing
+    configs.pin_memory = False
+    configs.num_samples = None
+    configs.input_size = (1216, 608)
+    configs.hm_size = (304, 152)
+    configs.max_objects = 50
+    configs.num_classes = 3
+    configs.output_width = 608
+
+    # configs.dataset_dir = os.path.join('../../', 'dataset', 'kitti')
+    # lidar_aug = OneOf([
+    #     Random_Rotation(limit_angle=np.pi / 4, p=1.),
+    #     Random_Scaling(scaling_range=(0.95, 1.05), p=1.),
+    # ], p=1.)
+    lidar_aug = None
+
+    dataset = KittiDataset(configs, mode='val', lidar_aug=lidar_aug, hflip_prob=0., num_samples=configs.num_samples)
+
+    print('\n\nPress n to see the next sample >>> Press Esc to quit...')
+    for idx in range(len(dataset)):
+        bev_map, labels, lidar_path = dataset.draw_img_with_label(idx)
+        calib = Calibration(lidar_path.replace(".bin", ".txt").replace("velodyne", "calib"))
+        bev_map = (bev_map.transpose(1, 2, 0) * 255).astype(np.uint8)
+        # bev_map = cv2.resize(bev_map, (cnf.BEV_HEIGHT, cnf.BEV_WIDTH))
+        print(bev_map.shape)
+        for box_idx, (cls_id, x, y, z, h, w, l, yaw) in enumerate(labels):
+            # Draw rotated box
+            yaw = -yaw
+            
+            y1 = int((x - cnf.boundary['minX']) / cnf.DISCRETIZATION)
+            x1 = int((y - cnf.boundary['minY']) / cnf.DISCRETIZATION)
+            w1 = int(w / cnf.DISCRETIZATION)
+            l1 = int(l / cnf.DISCRETIZATION)
+            drawRotatedBox(bev_map, x1, y1, w1, l1, yaw, cnf.colors[int(cls_id)])
+        # Rotate the bev_map
+        bev_map = cv2.rotate(bev_map, cv2.ROTATE_180)
+
+        # labels[:, 1:] = lidar_to_camera_box(labels[:, 1:], calib.V2C, calib.R0, calib.P2)
+
+        cv2.imshow('bev_map', bev_map)
+
+        if cv2.waitKey(0) & 0xff == 27:
+            break
diff --git a/point-cloud/sfa/data_process/transformation.py b/point-cloud/sfa/data_process/transformation.py
new file mode 100644
index 0000000..3c4521f
--- /dev/null
+++ b/point-cloud/sfa/data_process/transformation.py
@@ -0,0 +1,426 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Refer: https://github.com/ghimiredhikura/Complex-YOLOv3
+# Source : https://github.com/jeasinema/VoxelNet-tensorflow/blob/master/utils/utils.py
+"""
+import os
+import sys
+import math
+
+import numpy as np
+import torch
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from config import kitti_config as cnf
+
+
+def angle_in_limit(angle):
+    # To limit the angle in -pi/2 - pi/2
+    limit_degree = 5
+    while angle >= np.pi / 2:
+        angle -= np.pi
+    while angle < -np.pi / 2:
+        angle += np.pi
+    if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi:
+        angle = np.pi / 2
+    return angle
+
+
+def camera_to_lidar(x, y, z, V2C=None, R0=None, P2=None):
+    p = np.array([x, y, z, 1])
+    if V2C is None or R0 is None:
+        p = np.matmul(cnf.R0_inv, p)
+        p = np.matmul(cnf.Tr_velo_to_cam_inv, p)
+    else:
+        R0_i = np.zeros((4, 4))
+        R0_i[:3, :3] = R0
+        R0_i[3, 3] = 1
+        p = np.matmul(np.linalg.inv(R0_i), p)
+        p = np.matmul(inverse_rigid_trans(V2C), p)
+    p = p[0:3]
+    return tuple(p)
+
+
+def lidar_to_camera(x, y, z, V2C=None, R0=None, P2=None):
+    p = np.array([x, y, z, 1])
+    if V2C is None or R0 is None:
+        p = np.matmul(cnf.Tr_velo_to_cam, p)
+        p = np.matmul(cnf.R0, p)
+    else:
+        p = np.matmul(V2C, p)
+        p = np.matmul(R0, p)
+    p = p[0:3]
+    return tuple(p)
+
+
+def camera_to_lidar_point(points):
+    # (N, 3) -> (N, 3)
+    N = points.shape[0]
+    points = np.hstack([points, np.ones((N, 1))]).T  # (N,4) -> (4,N)
+
+    points = np.matmul(cnf.R0_inv, points)
+    points = np.matmul(cnf.Tr_velo_to_cam_inv, points).T  # (4, N) -> (N, 4)
+    points = points[:, 0:3]
+    return points.reshape(-1, 3)
+
+
+def lidar_to_camera_point(points, V2C=None, R0=None):
+    # (N, 3) -> (N, 3)
+    N = points.shape[0]
+    points = np.hstack([points, np.ones((N, 1))]).T
+
+    if V2C is None or R0 is None:
+        points = np.matmul(cnf.Tr_velo_to_cam, points)
+        points = np.matmul(cnf.R0, points).T
+    else:
+        points = np.matmul(V2C, points)
+        points = np.matmul(R0, points).T
+    points = points[:, 0:3]
+    return points.reshape(-1, 3)
+
+
+def camera_to_lidar_box(boxes, V2C=None, R0=None, P2=None):
+    # (N, 7) -> (N, 7) x,y,z,h,w,l,r
+    ret = []
+    for box in boxes:
+        x, y, z, h, w, l, ry = box
+        # print(x, y, z, h, w, l, ry)
+        (x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -ry - np.pi / 2
+        # print(x, y, z, h, w, l, ry)
+        # print("camera_to_lidar")
+        # rz = angle_in_limit(rz)
+        ret.append([x, y, z, h, w, l, rz])
+    return np.array(ret).reshape(-1, 7)
+
+
+def lidar_to_camera_box(boxes, V2C=None, R0=None, P2=None):
+    # (N, 7) -> (N, 7) x,y,z,h,w,l,r
+    ret = []
+    for box in boxes:
+        x, y, z, h, w, l, rz = box
+        # (x, y, z), h, w, l, ry = lidar_to_camera(x, y, z, V2C=V2C, R0=R0, P2=P2), h, w, l, -rz - np.pi / 2
+        # ry = angle_in_limit(ry)
+        ry = -rz - np.pi / 2
+        ret.append([x, y, z, h, w, l, ry])
+    return np.array(ret).reshape(-1, 7)
+
+
+def center_to_corner_box2d(boxes_center, coordinate='lidar'):
+    # (N, 5) -> (N, 4, 2)
+    N = boxes_center.shape[0]
+    boxes3d_center = np.zeros((N, 7))
+    boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center
+    boxes3d_corner = center_to_corner_box3d(boxes3d_center, coordinate=coordinate)
+
+    return boxes3d_corner[:, 0:4, 0:2]
+
+
+def center_to_corner_box3d(boxes_center, coordinate='lidar'):
+    # (N, 7) -> (N, 8, 3)
+    N = boxes_center.shape[0]
+    ret = np.zeros((N, 8, 3), dtype=np.float32)
+
+    if coordinate == 'camera':
+        boxes_center = camera_to_lidar_box(boxes_center)
+
+    for i in range(N):
+        box = boxes_center[i]
+        translation = box[0:3]
+        size = box[3:6]
+        rotation = [0, 0, box[-1]]
+
+        h, w, l = size[0], size[1], size[2]
+        trackletBox = np.array([  # in velodyne coordinates around zero point and without orientation yet
+            [-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \
+            [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \
+            [0, 0, 0, 0, h, h, h, h]])
+
+        # re-create 3D bounding box in velodyne coordinate system
+        yaw = rotation[2]
+        rotMat = np.array([
+            [np.cos(yaw), -np.sin(yaw), 0.0],
+            [np.sin(yaw), np.cos(yaw), 0.0],
+            [0.0, 0.0, 1.0]])
+        cornerPosInVelo = np.dot(rotMat, trackletBox) + np.tile(translation, (8, 1)).T
+        box3d = cornerPosInVelo.transpose()
+        ret[i] = box3d
+
+    if coordinate == 'camera':
+        for idx in range(len(ret)):
+            ret[idx] = lidar_to_camera_point(ret[idx])
+
+    return ret
+
+
+CORNER2CENTER_AVG = True
+
+
+def corner_to_center_box3d(boxes_corner, coordinate='camera'):
+    # (N, 8, 3) -> (N, 7) x,y,z,h,w,l,ry/z
+    if coordinate == 'lidar':
+        for idx in range(len(boxes_corner)):
+            boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx])
+
+    ret = []
+    for roi in boxes_corner:
+        if CORNER2CENTER_AVG:  # average version
+            roi = np.array(roi)
+            h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4)
+            w = np.sum(
+                np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2))
+            ) / 4
+            l = np.sum(
+                np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2))
+            ) / 4
+            x = np.sum(roi[:, 0], axis=0) / 8
+            y = np.sum(roi[0:4, 1], axis=0) / 4
+            z = np.sum(roi[:, 2], axis=0) / 8
+            ry = np.sum(
+                math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) +
+                math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) +
+                math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) +
+                math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) +
+                math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) +
+                math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) +
+                math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) +
+                math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0])
+            ) / 8
+            if w > l:
+                w, l = l, w
+                ry = ry - np.pi / 2
+            elif l > w:
+                l, w = w, l
+                ry = ry - np.pi / 2
+            ret.append([x, y, z, h, w, l, ry])
+
+        else:  # max version
+            h = max(abs(roi[:4, 1] - roi[4:, 1]))
+            w = np.max(
+                np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]]) ** 2))
+            )
+            l = np.max(
+                np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]]) ** 2)) +
+                np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]]) ** 2))
+            )
+            x = np.sum(roi[:, 0], axis=0) / 8
+            y = np.sum(roi[0:4, 1], axis=0) / 4
+            z = np.sum(roi[:, 2], axis=0) / 8
+            ry = np.sum(
+                math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) +
+                math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) +
+                math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) +
+                math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) +
+                math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) +
+                math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) +
+                math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) +
+                math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0])
+            ) / 8
+            if w > l:
+                w, l = l, w
+                ry = angle_in_limit(ry + np.pi / 2)
+            ret.append([x, y, z, h, w, l, ry])
+
+    if coordinate == 'lidar':
+        ret = camera_to_lidar_box(np.array(ret))
+
+    return np.array(ret)
+
+
+def point_transform(points, tx, ty, tz, rx=0, ry=0, rz=0):
+    # Input:
+    #   points: (N, 3)
+    #   rx/y/z: in radians
+    # Output:
+    #   points: (N, 3)
+    N = points.shape[0]
+    points = np.hstack([points, np.ones((N, 1))])
+
+    mat1 = np.eye(4)
+    mat1[3, 0:3] = tx, ty, tz
+    points = np.matmul(points, mat1)
+
+    if rx != 0:
+        mat = np.zeros((4, 4))
+        mat[0, 0] = 1
+        mat[3, 3] = 1
+        mat[1, 1] = np.cos(rx)
+        mat[1, 2] = -np.sin(rx)
+        mat[2, 1] = np.sin(rx)
+        mat[2, 2] = np.cos(rx)
+        points = np.matmul(points, mat)
+
+    if ry != 0:
+        mat = np.zeros((4, 4))
+        mat[1, 1] = 1
+        mat[3, 3] = 1
+        mat[0, 0] = np.cos(ry)
+        mat[0, 2] = np.sin(ry)
+        mat[2, 0] = -np.sin(ry)
+        mat[2, 2] = np.cos(ry)
+        points = np.matmul(points, mat)
+
+    if rz != 0:
+        mat = np.zeros((4, 4))
+        mat[2, 2] = 1
+        mat[3, 3] = 1
+        mat[0, 0] = np.cos(rz)
+        mat[0, 1] = -np.sin(rz)
+        mat[1, 0] = np.sin(rz)
+        mat[1, 1] = np.cos(rz)
+        points = np.matmul(points, mat)
+
+    return points[:, 0:3]
+
+
+def box_transform(boxes, tx, ty, tz, r=0, coordinate='lidar'):
+    # Input:
+    #   boxes: (N, 7) x y z h w l rz/y
+    # Output:
+    #   boxes: (N, 7) x y z h w l rz/y
+    boxes_corner = center_to_corner_box3d(boxes, coordinate=coordinate)  # (N, 8, 3)
+    for idx in range(len(boxes_corner)):
+        if coordinate == 'lidar':
+            boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, rz=r)
+        else:
+            boxes_corner[idx] = point_transform(boxes_corner[idx], tx, ty, tz, ry=r)
+
+    return corner_to_center_box3d(boxes_corner, coordinate=coordinate)
+
+
+def inverse_rigid_trans(Tr):
+    ''' Inverse a rigid body transform matrix (3x4 as [R|t])
+        [R'|-R't; 0|1]
+    '''
+    inv_Tr = np.zeros_like(Tr)  # 3x4
+    inv_Tr[0:3, 0:3] = np.transpose(Tr[0:3, 0:3])
+    inv_Tr[0:3, 3] = np.dot(-np.transpose(Tr[0:3, 0:3]), Tr[0:3, 3])
+    return inv_Tr
+
+
+class Compose(object):
+    def __init__(self, transforms, p=1.0):
+        self.transforms = transforms
+        self.p = p
+
+    def __call__(self, lidar, labels):
+        if np.random.random() <= self.p:
+            for t in self.transforms:
+                lidar, labels = t(lidar, labels)
+        return lidar, labels
+
+
+class OneOf(object):
+    def __init__(self, transforms, p=1.0):
+        self.transforms = transforms
+        self.p = p
+
+    def __call__(self, lidar, labels):
+        if np.random.random() <= self.p:
+            choice = np.random.randint(low=0, high=len(self.transforms))
+            lidar, labels = self.transforms[choice](lidar, labels)
+
+        return lidar, labels
+
+
+class Random_Rotation(object):
+    def __init__(self, limit_angle=np.pi / 4, p=0.5):
+        self.limit_angle = limit_angle
+        self.p = p
+
+    def __call__(self, lidar, labels):
+        """
+        :param labels: # (N', 7) x, y, z, h, w, l, r
+        :return:
+        """
+        if np.random.random() <= self.p:
+            angle = np.random.uniform(-self.limit_angle, self.limit_angle)
+            lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz=angle)
+            labels = box_transform(labels, 0, 0, 0, r=angle, coordinate='lidar')
+
+        return lidar, labels
+
+
+class Random_Scaling(object):
+    def __init__(self, scaling_range=(0.95, 1.05), p=0.5):
+        self.scaling_range = scaling_range
+        self.p = p
+
+    def __call__(self, lidar, labels):
+        """
+        :param labels: # (N', 7) x, y, z, h, w, l, r
+        :return:
+        """
+        if np.random.random() <= self.p:
+            factor = np.random.uniform(self.scaling_range[0], self.scaling_range[0])
+            lidar[:, 0:3] = lidar[:, 0:3] * factor
+            labels[:, 0:6] = labels[:, 0:6] * factor
+
+        return lidar, labels
+
+
+class Cutout(object):
+    """Randomly mask out one or more patches from an image.
+    Args:
+        n_holes (int): Number of patches to cut out of each image.
+        length (int): The length (in pixels) of each square patch.
+        Refer from: https://github.com/uoguelph-mlrg/Cutout/blob/master/util/cutout.py
+    """
+
+    def __init__(self, n_holes, ratio, fill_value=0., p=1.0):
+        self.n_holes = n_holes
+        self.ratio = ratio
+        assert 0. <= fill_value <= 1., "the fill value is in a range of 0 to 1"
+        self.fill_value = fill_value
+        self.p = p
+
+    def __call__(self, img, targets):
+        """
+        Args:
+            img (Tensor): Tensor image of size (C, H, W).
+        Returns:
+            Tensor: Image with n_holes of dimension length x length cut out of it.
+        """
+        if np.random.random() <= self.p:
+            h = img.size(1)
+            w = img.size(2)
+
+            h_cutout = int(self.ratio * h)
+            w_cutout = int(self.ratio * w)
+
+            for n in range(self.n_holes):
+                y = np.random.randint(h)
+                x = np.random.randint(w)
+
+                y1 = np.clip(y - h_cutout // 2, 0, h)
+                y2 = np.clip(y + h_cutout // 2, 0, h)
+                x1 = np.clip(x - w_cutout // 2, 0, w)
+                x2 = np.clip(x + w_cutout // 2, 0, w)
+
+                img[:, y1: y2, x1: x2] = self.fill_value  # Zero out the selected area
+                # Remove targets that are in the selected area
+                keep_target = []
+                for target_idx, target in enumerate(targets):
+                    _, _, target_x, target_y, target_w, target_l, _, _ = target
+                    if (x1 <= target_x * w <= x2) and (y1 <= target_y * h <= y2):
+                        continue
+                    keep_target.append(target_idx)
+                targets = targets[keep_target]
+
+        return img, targets
diff --git a/point-cloud/sfa/inference.py b/point-cloud/sfa/inference.py
new file mode 100644
index 0000000..7253729
--- /dev/null
+++ b/point-cloud/sfa/inference.py
@@ -0,0 +1,378 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: Testing script
+"""
+import argparse
+import sys
+import os
+import time
+import warnings
+
+warnings.filterwarnings("ignore", category=UserWarning)
+
+from easydict import EasyDict as edict
+import cv2
+import torch
+import numpy as np
+
+
+
+import torch.nn.functional as F
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from data_process.kitti_dataloader import create_test_dataloader
+from models.model_utils import create_model
+import config.kitti_config as cnf
+
+
+def parse_test_configs():
+    parser = argparse.ArgumentParser(description='Testing config for the Implementation')
+    parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN',
+                        help='The name using for saving logs, models,...')
+    parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH',
+                        help='The name of the model architecture')
+    parser.add_argument('--model_dir', type=str,
+                        default='/train_out_model/', metavar='PATH',
+                        help='the path of the pretrained checkpoint')
+    parser.add_argument('--K', type=int, default=50,
+                        help='the number of top K')
+    parser.add_argument('--no_cuda', default= False,
+                        help='If true, cuda is not used.')
+    parser.add_argument('--gpu_idx', default=0, type=int,
+                        help='GPU index to use.')
+    parser.add_argument('--num_samples', type=int, default=None,
+                        help='Take a subset of the dataset to run and debug')
+    parser.add_argument('--num_workers', type=int, default=1,
+                        help='Number of threads for loading data')
+    parser.add_argument('--batch_size', type=int, default=1,
+                        help='mini-batch size (default: 4)')
+    parser.add_argument('--peak_thresh', type=float, default=0.2)
+    parser.add_argument('--dataset_dir', type=str,default='/dataset_dir/',
+                        help='If true, the output image of the testing phase will be saved')
+    parser.add_argument('--results_dir', type=str,default='/results_dir/',
+                        help='If true, the output image of the testing phase will be saved')
+    parser.add_argument('--save_test_output', type=bool, default=True, 
+                        help='save the test output or not')
+    parser.add_argument('--output_format', type=str, default='txt', metavar='PATH',
+                        help='the type of the test output (support image, video or none)')
+    parser.add_argument('--output_video_fn', type=str, default='out_fpn_resnet_18', metavar='PATH',
+                        help='the video filename if the output format is video')
+    parser.add_argument('--output-width', type=int, default=608,
+                        help='the width of showing output, the height maybe vary')
+
+    configs = edict(vars(parser.parse_args()))
+    configs.pin_memory = True
+    configs.distributed = False  # For testing on 1 GPU only
+
+    configs.input_size = (1216, 608)
+    configs.hm_size = (304, 152)
+    configs.down_ratio = 4
+    configs.max_objects = 50
+
+    configs.imagenet_pretrained = False
+    configs.head_conv = 64
+    configs.num_classes = 3
+    configs.num_center_offset = 2
+    configs.num_z = 1
+    configs.num_dim = 3
+    configs.num_direction = 2  # sin, cos
+
+    configs.heads = {
+        'hm_cen': configs.num_classes,
+        'cen_offset': configs.num_center_offset,
+        'direction': configs.num_direction,
+        'z_coor': configs.num_z,
+        'dim': configs.num_dim
+    }
+    configs.num_input_features = 4
+
+    ####################################################################
+    ##############Dataset, Checkpoints, and results dir configs#########
+    ####################################################################
+    configs.root_dir = '../'
+    # configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'apollo')
+
+
+    # configs.results_dir_img = os.path.join(configs.results_dir, configs.saved_fn, 'image')
+    # configs.results_dir_txt = os.path.join(configs.results_dir, configs.saved_fn, 'txt')
+    # make_folder(configs.results_dir_img)
+    # make_folder(configs.results_dir_txt)
+    make_folder(configs.results_dir)
+
+    return configs
+
+def _sigmoid(x):
+    return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
+
+def time_synchronized():
+    torch.cuda.synchronize() if torch.cuda.is_available() else None
+    return time.time()
+
+def make_folder(folder_name):
+    if not os.path.exists(folder_name):
+        os.makedirs(folder_name)
+
+
+def drawRotatedBox(img, x, y, w, l, yaw, color):
+    bev_corners = get_corners(x, y, w, l, yaw)
+    corners_int = bev_corners.reshape(-1, 1, 2).astype(int)
+    cv2.polylines(img, [corners_int], True, color, 2)
+    corners_int = bev_corners.reshape(-1, 2)
+    cv2.line(img, (int(corners_int[0, 0]), int(corners_int[0, 1])), (int(corners_int[3, 0]), int(corners_int[3, 1])), (255, 255, 0), 2)
+
+
+# bev image coordinates format
+def get_corners(x, y, w, l, yaw):
+    bev_corners = np.zeros((4, 2), dtype=np.float32)
+    cos_yaw = np.cos(yaw)
+    sin_yaw = np.sin(yaw)
+    # front left
+    bev_corners[0, 0] = x - w / 2 * cos_yaw - l / 2 * sin_yaw
+    bev_corners[0, 1] = y - w / 2 * sin_yaw + l / 2 * cos_yaw
+
+    # rear left
+    bev_corners[1, 0] = x - w / 2 * cos_yaw + l / 2 * sin_yaw
+    bev_corners[1, 1] = y - w / 2 * sin_yaw - l / 2 * cos_yaw
+
+    # rear right
+    bev_corners[2, 0] = x + w / 2 * cos_yaw + l / 2 * sin_yaw
+    bev_corners[2, 1] = y + w / 2 * sin_yaw - l / 2 * cos_yaw
+
+    # front right
+    bev_corners[3, 0] = x + w / 2 * cos_yaw - l / 2 * sin_yaw
+    bev_corners[3, 1] = y + w / 2 * sin_yaw + l / 2 * cos_yaw
+
+    return bev_corners
+
+def _nms(heat, kernel=3):
+    pad = (kernel - 1) // 2
+    hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
+    keep = (hmax == heat).float()
+
+    return heat * keep
+
+
+def _gather_feat(feat, ind, mask=None):
+    dim = feat.size(2)
+    ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
+    feat = feat.gather(1, ind)
+    if mask is not None:
+        mask = mask.unsqueeze(2).expand_as(feat)
+        feat = feat[mask]
+        feat = feat.view(-1, dim)
+    return feat
+
+
+def _transpose_and_gather_feat(feat, ind):
+    feat = feat.permute(0, 2, 3, 1).contiguous()
+    feat = feat.view(feat.size(0), -1, feat.size(3))
+    feat = _gather_feat(feat, ind)
+    return feat
+
+
+def _topk(scores, K=40):
+    batch, cat, height, width = scores.size()
+
+    topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
+
+    topk_inds = topk_inds % (height * width)
+    topk_ys = (torch.floor_divide(topk_inds, width)).float()
+    topk_xs = (topk_inds % width).int().float()
+
+    topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
+    topk_clses = (torch.floor_divide(topk_ind, K)).int()
+    topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
+    topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
+    topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
+
+    return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
+
+
+def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40):
+    batch_size, num_classes, height, width = hm_cen.size()
+
+    hm_cen = _nms(hm_cen)
+    scores, inds, clses, ys, xs = _topk(hm_cen, K=K)
+    if cen_offset is not None:
+        cen_offset = _transpose_and_gather_feat(cen_offset, inds)
+        cen_offset = cen_offset.view(batch_size, K, 2)
+        xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1]
+        ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2]
+    else:
+        xs = xs.view(batch_size, K, 1) + 0.5
+        ys = ys.view(batch_size, K, 1) + 0.5
+
+    direction = _transpose_and_gather_feat(direction, inds)
+    direction = direction.view(batch_size, K, 2)
+    z_coor = _transpose_and_gather_feat(z_coor, inds)
+    z_coor = z_coor.view(batch_size, K, 1)
+    dim = _transpose_and_gather_feat(dim, inds)
+    dim = dim.view(batch_size, K, 3)
+    clses = clses.view(batch_size, K, 1).float()
+    scores = scores.view(batch_size, K, 1)
+
+    # (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
+    # (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
+    # detections: [batch_size, K, 10]
+    detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2)
+
+    return detections
+
+
+def get_yaw(direction):
+    return np.arctan2(direction[:, 0:1], direction[:, 1:2])
+
+
+def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2):
+    """
+    :param detections: [batch_size, K, 10]
+    # (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
+    # (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
+    :return:
+    """
+    # TODO: Need to consider rescale to the original scale: x, y
+
+    ret = []
+    for i in range(detections.shape[0]):
+        top_preds = {}
+        classes = detections[i, :, -1]
+        for j in range(num_classes):
+            inds = (classes == j)
+            # x, y, z, h, w, l, yaw
+            top_preds[j] = np.concatenate([
+                detections[i, inds, 0:1],
+                detections[i, inds, 1:2] * down_ratio,
+                detections[i, inds, 2:3] * down_ratio,
+                detections[i, inds, 3:4],
+                detections[i, inds, 4:5],
+                detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH,
+                detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT,
+                get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1)
+            # Filter by peak_thresh
+            if len(top_preds[j]) > 0:
+                keep_inds = (top_preds[j][:, 0] > peak_thresh)
+                top_preds[j] = top_preds[j][keep_inds]
+        ret.append(top_preds)
+
+    return ret
+
+
+def draw_predictions(img, detections, num_classes=3):
+    for j in range(num_classes):
+        if len(detections[j]) > 0:
+            for det in detections[j]:
+                # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
+                _score, _x, _y, _z, _h, _w, _l, _yaw = det
+                drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)])
+
+    return img
+
+
+def convert_det_to_real_values(detections, num_classes=3):
+    kitti_dets = []
+    for cls_id in range(num_classes):
+        if len(detections[cls_id]) > 0:
+            for det in detections[cls_id]:
+                # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
+                _score, _x, _y, _z, _h, _w, _l, _yaw = det
+                _yaw = round(-_yaw/1, 2)
+                x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2)
+                y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2)
+                z = round(_z + cnf.boundary['minZ'], 2)
+                w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2)
+                l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2)
+                h = round(_h/1, 2)
+                kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw])
+
+    return np.array(kitti_dets)
+
+if __name__ == '__main__':
+    print("=".ljust(66, "="))
+    configs = parse_test_configs()
+
+    model = create_model(configs)
+    print('\n\n' + '-*=' * 30 + '\n\n')
+    # assert os.path.isfile(configs.model_dir), "No file at {}".format(configs.model_dir)
+    if os.path.isfile(configs.model_dir):
+        model_path = configs.model_dir
+    else:
+        # for file in os.listdir(configs.model_dir):
+        #     model_path = os.path.join(configs.model_dir, file)
+        # 取最后一个模型
+        model_path = os.path.join(configs.model_dir, os.listdir(configs.model_dir)[-1])
+    print('Loaded weights from {}\n'.format(model_path))
+    # model.load_state_dict(torch.load(model_path))
+
+    configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx))
+    model.load_state_dict(torch.load(model_path, map_location=configs.device))
+    model = model.to(device=configs.device)
+
+    out_cap = None
+
+    model.eval()
+
+    test_dataloader = create_test_dataloader(configs)
+    with torch.no_grad():
+        for batch_idx, batch_data in enumerate(test_dataloader):
+            bev_maps, metadatas  = batch_data
+            input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float()
+            t1 = time_synchronized()
+            outputs = model(input_bev_maps)
+            outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
+            outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
+            # detections size (batch_size, K, 10)
+            detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],
+                                outputs['dim'], K=configs.K)
+            detections = detections.cpu().numpy().astype(np.float32)
+            detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh)
+            t2 = time_synchronized()
+
+            detections = detections[0]  # only first batch
+            # Draw prediction in the image
+            bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8)
+            bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT))
+            bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes)
+
+            # Rotate the bev_map
+            bev_map = cv2.rotate(bev_map, cv2.ROTATE_180)
+
+            kitti_dets = convert_det_to_real_values(detections)
+
+            print('\tDone testing the {}th sample, time: {:.1f}ms, speed {:.2f}FPS'.format(batch_idx, (t2 - t1) * 1000,
+                                                                                           1 / (t2 - t1)))
+            if configs.save_test_output:
+                img_fn = os.path.basename(metadatas['bev_path'][0])[:-4]
+                if configs.output_format == 'image':
+                    cv2.imwrite(os.path.join(configs.results_dir_img, '{}.jpg'.format(img_fn)), bev_map)
+                elif configs.output_format == 'video':
+                    if out_cap is None:
+                        out_cap_h, out_cap_w = bev_map.shape[:2]
+                        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
+                        out_cap = cv2.VideoWriter(
+                            os.path.join(configs.results_dir_img, '{}.avi'.format(configs.output_video_fn)),
+                            fourcc, 30, (out_cap_w, out_cap_h))
+                    out_cap.write(bev_map)
+                else:
+                    pass
+                txt_path = os.path.join(configs.results_dir,'{}.txt'.format(img_fn))
+                txt_file = open(txt_path, 'w')
+                for det in kitti_dets:
+                    
+                    write_line = cnf.CLASS_ID_TO_NAME[det[0]] + ' 0 0 0 0 0 0 0 ' + str(det[1]) + ' ' + str(det[2]) +\
+                                 ' ' + str(det[3]) + ' ' + str(det[4]) + ' ' + str(det[5]) + ' ' + str(det[6]) + ' ' + str(det[7]) +'\n'
+                    txt_file.writelines(write_line)
+                txt_file.close()
+
+    if out_cap:
+        out_cap.release()
+    cv2.destroyAllWindows()
diff --git a/point-cloud/sfa/losses/__init__.py b/point-cloud/sfa/losses/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/point-cloud/sfa/losses/losses.py b/point-cloud/sfa/losses/losses.py
new file mode 100644
index 0000000..770855d
--- /dev/null
+++ b/point-cloud/sfa/losses/losses.py
@@ -0,0 +1,163 @@
+# ------------------------------------------------------------------------------
+# Portions of this code are from
+# CornerNet (https://github.com/princeton-vl/CornerNet)
+# Copyright (c) 2018, University of Michigan
+# Licensed under the BSD 3-Clause License
+# Modified by Nguyen Mau Dung (2020.08.09)
+# ------------------------------------------------------------------------------
+
+import os
+import sys
+import math
+
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from utils.torch_utils import to_cpu, _sigmoid
+
+
+def _gather_feat(feat, ind, mask=None):
+    dim = feat.size(2)
+    ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
+    feat = feat.gather(1, ind)
+    if mask is not None:
+        mask = mask.unsqueeze(2).expand_as(feat)
+        feat = feat[mask]
+        feat = feat.view(-1, dim)
+    return feat
+
+
+def _transpose_and_gather_feat(feat, ind):
+    feat = feat.permute(0, 2, 3, 1).contiguous()
+    feat = feat.view(feat.size(0), -1, feat.size(3))
+    feat = _gather_feat(feat, ind)
+    return feat
+
+
+def _neg_loss(pred, gt, alpha=2, beta=4):
+    ''' Modified focal loss. Exactly the same as CornerNet.
+        Runs faster and costs a little bit more memory
+      Arguments:
+        pred (batch x c x h x w)
+        gt_regr (batch x c x h x w)
+    '''
+    pos_inds = gt.eq(1).float()
+    neg_inds = gt.lt(1).float()
+
+    neg_weights = torch.pow(1 - gt, beta)
+
+    loss = 0
+
+    pos_loss = torch.log(pred) * torch.pow(1 - pred, alpha) * pos_inds
+    neg_loss = torch.log(1 - pred) * torch.pow(pred, alpha) * neg_weights * neg_inds
+
+    num_pos = pos_inds.float().sum()
+    pos_loss = pos_loss.sum()
+    neg_loss = neg_loss.sum()
+
+    if num_pos == 0:
+        loss = loss - neg_loss
+    else:
+        loss = loss - (pos_loss + neg_loss) / num_pos
+    return loss
+
+
+class FocalLoss(nn.Module):
+    '''nn.Module warpper for focal loss'''
+
+    def __init__(self):
+        super(FocalLoss, self).__init__()
+        self.neg_loss = _neg_loss
+
+    def forward(self, out, target):
+        return self.neg_loss(out, target)
+
+
+class L1Loss(nn.Module):
+    def __init__(self):
+        super(L1Loss, self).__init__()
+
+    def forward(self, output, mask, ind, target):
+        pred = _transpose_and_gather_feat(output, ind)
+        mask = mask.unsqueeze(2).expand_as(pred).float()
+        loss = F.l1_loss(pred * mask, target * mask, size_average=False)
+        loss = loss / (mask.sum() + 1e-4)
+        return loss
+
+
+class L1Loss_Balanced(nn.Module):
+    """Balanced L1 Loss
+    paper: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
+    Code refer from: https://github.com/OceanPang/Libra_R-CNN
+    """
+
+    def __init__(self, alpha=0.5, gamma=1.5, beta=1.0):
+        super(L1Loss_Balanced, self).__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        assert beta > 0
+        self.beta = beta
+
+    def forward(self, output, mask, ind, target):
+        pred = _transpose_and_gather_feat(output, ind)
+        mask = mask.unsqueeze(2).expand_as(pred).float()
+        loss = self.balanced_l1_loss(pred * mask, target * mask)
+        loss = loss.sum() / (mask.sum() + 1e-4)
+
+        return loss
+
+    def balanced_l1_loss(self, pred, target):
+        assert pred.size() == target.size() and target.numel() > 0
+
+        diff = torch.abs(pred - target)
+        b = math.exp(self.gamma / self.alpha) - 1
+        loss = torch.where(diff < self.beta,
+                           self.alpha / b * (b * diff + 1) * torch.log(b * diff / self.beta + 1) - self.alpha * diff,
+                           self.gamma * diff + self.gamma / b - self.alpha * self.beta)
+
+        return loss
+
+
+class Compute_Loss(nn.Module):
+    def __init__(self, device):
+        super(Compute_Loss, self).__init__()
+        self.device = device
+        self.focal_loss = FocalLoss()
+        self.l1_loss = L1Loss()
+        self.l1_loss_balanced = L1Loss_Balanced(alpha=0.5, gamma=1.5, beta=1.0)
+        self.weight_hm_cen = 1.
+        self.weight_z_coor, self.weight_cenoff, self.weight_dim, self.weight_direction = 1., 1., 1., 1.
+
+    def forward(self, outputs, tg):
+        # tg: targets
+        outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
+        outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
+
+        l_hm_cen = self.focal_loss(outputs['hm_cen'], tg['hm_cen'])
+        l_cen_offset = self.l1_loss(outputs['cen_offset'], tg['obj_mask'], tg['indices_center'], tg['cen_offset'])
+        l_direction = self.l1_loss(outputs['direction'], tg['obj_mask'], tg['indices_center'], tg['direction'])
+        # Apply the L1_loss balanced for z coor and dimension regression
+        l_z_coor = self.l1_loss_balanced(outputs['z_coor'], tg['obj_mask'], tg['indices_center'], tg['z_coor'])
+        l_dim = self.l1_loss_balanced(outputs['dim'], tg['obj_mask'], tg['indices_center'], tg['dim'])
+
+        total_loss = l_hm_cen * self.weight_hm_cen + l_cen_offset * self.weight_cenoff + \
+                     l_dim * self.weight_dim + l_direction * self.weight_direction + \
+                     l_z_coor * self.weight_z_coor
+
+        loss_stats = {
+            'total_loss': to_cpu(total_loss).item(),
+            'hm_cen_loss': to_cpu(l_hm_cen).item(),
+            'cen_offset_loss': to_cpu(l_cen_offset).item(),
+            'dim_loss': to_cpu(l_dim).item(),
+            'direction_loss': to_cpu(l_direction).item(),
+            'z_coor_loss': to_cpu(l_z_coor).item(),
+        }
+
+        return total_loss, loss_stats
diff --git a/point-cloud/sfa/models/__init__.py b/point-cloud/sfa/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/point-cloud/sfa/models/fpn_resnet.py b/point-cloud/sfa/models/fpn_resnet.py
new file mode 100644
index 0000000..6524d2a
--- /dev/null
+++ b/point-cloud/sfa/models/fpn_resnet.py
@@ -0,0 +1,252 @@
+"""
+# ---------------------------------------------------------------------------------
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# Modified by Xingyi Zhou
+# Refer from: https://github.com/xingyizhou/CenterNet
+
+# Modifier: Nguyen Mau Dung (2020.08.09)
+# ------------------------------------------------------------------------------
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import torch
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+import torch.nn.functional as F
+
+BN_MOMENTUM = 0.1
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class PoseResNet(nn.Module):
+
+    def __init__(self, block, layers, heads, head_conv, **kwargs):
+        self.inplanes = 64
+        self.deconv_with_bias = False
+        self.heads = heads
+
+        super(PoseResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        self.conv_up_level1 = nn.Conv2d(768, 256, kernel_size=1, stride=1, padding=0)
+        self.conv_up_level2 = nn.Conv2d(384, 128, kernel_size=1, stride=1, padding=0)
+        self.conv_up_level3 = nn.Conv2d(192, 64, kernel_size=1, stride=1, padding=0)
+
+        fpn_channels = [256, 128, 64]
+        for fpn_idx, fpn_c in enumerate(fpn_channels):
+            for head in sorted(self.heads):
+                num_output = self.heads[head]
+                if head_conv > 0:
+                    fc = nn.Sequential(
+                        nn.Conv2d(fpn_c, head_conv, kernel_size=3, padding=1, bias=True),
+                        nn.ReLU(inplace=True),
+                        nn.Conv2d(head_conv, num_output, kernel_size=1, stride=1, padding=0))
+                else:
+                    fc = nn.Conv2d(in_channels=fpn_c, out_channels=num_output, kernel_size=1, stride=1, padding=0)
+
+                self.__setattr__('fpn{}_{}'.format(fpn_idx, head), fc)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        _, _, input_h, input_w = x.size()
+        hm_h, hm_w = input_h // 4, input_w // 4
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        out_layer1 = self.layer1(x)
+        out_layer2 = self.layer2(out_layer1)
+
+        out_layer3 = self.layer3(out_layer2)
+
+        out_layer4 = self.layer4(out_layer3)
+
+        # up_level1: torch.Size([b, 512, 14, 14])
+        up_level1 = F.interpolate(out_layer4, scale_factor=2, mode='bilinear', align_corners=True)
+
+        concat_level1 = torch.cat((up_level1, out_layer3), dim=1)
+        # up_level2: torch.Size([b, 256, 28, 28])
+        up_level2 = F.interpolate(self.conv_up_level1(concat_level1), scale_factor=2, mode='bilinear',
+                                  align_corners=True)
+
+        concat_level2 = torch.cat((up_level2, out_layer2), dim=1)
+        # up_level3: torch.Size([b, 128, 56, 56]),
+        up_level3 = F.interpolate(self.conv_up_level2(concat_level2), scale_factor=2, mode='bilinear',
+                                  align_corners=True)
+        # up_level4: torch.Size([b, 64, 56, 56])
+        up_level4 = self.conv_up_level3(torch.cat((up_level3, out_layer1), dim=1))
+
+        ret = {}
+        for head in self.heads:
+            temp_outs = []
+            for fpn_idx, fdn_input in enumerate([up_level2, up_level3, up_level4]):
+                fpn_out = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))(fdn_input)
+                _, _, fpn_out_h, fpn_out_w = fpn_out.size()
+                # Make sure the added features having same size of heatmap output
+                if (fpn_out_w != hm_w) or (fpn_out_h != hm_h):
+                    fpn_out = F.interpolate(fpn_out, size=(hm_h, hm_w))
+                temp_outs.append(fpn_out)
+            # Take the softmax in the keypoint feature pyramid network
+            final_out = self.apply_kfpn(temp_outs)
+
+            ret[head] = final_out
+
+        return ret
+
+    def apply_kfpn(self, outs):
+        outs = torch.cat([out.unsqueeze(-1) for out in outs], dim=-1)
+        softmax_outs = F.softmax(outs, dim=-1)
+        ret_outs = (outs * softmax_outs).sum(dim=-1)
+        return ret_outs
+
+    def init_weights(self, num_layers, pretrained=True):
+        if pretrained:
+            # TODO: Check initial weights for head later
+            for fpn_idx in [0, 1, 2]:  # 3 FPN layers
+                for head in self.heads:
+                    final_layer = self.__getattr__('fpn{}_{}'.format(fpn_idx, head))
+                    for i, m in enumerate(final_layer.modules()):
+                        if isinstance(m, nn.Conv2d):
+                            # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                            # print('=> init {}.weight as normal(0, 0.001)'.format(name))
+                            # print('=> init {}.bias as 0'.format(name))
+                            if m.weight.shape[0] == self.heads[head]:
+                                if 'hm' in head:
+                                    nn.init.constant_(m.bias, -2.19)
+                                else:
+                                    nn.init.normal_(m.weight, std=0.001)
+                                    nn.init.constant_(m.bias, 0)
+            # pretrained_state_dict = torch.load(pretrained)
+            url = model_urls['resnet{}'.format(num_layers)]
+            pretrained_state_dict = model_zoo.load_url(url)
+            print('=> loading pretrained model {}'.format(url))
+            self.load_state_dict(pretrained_state_dict, strict=False)
+
+
+resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
+               34: (BasicBlock, [3, 4, 6, 3]),
+               50: (Bottleneck, [3, 4, 6, 3]),
+               101: (Bottleneck, [3, 4, 23, 3]),
+               152: (Bottleneck, [3, 8, 36, 3])}
+
+
+def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained):
+    block_class, layers = resnet_spec[num_layers]
+
+    model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
+    model.init_weights(num_layers, pretrained=imagenet_pretrained)
+    return model
diff --git a/point-cloud/sfa/models/model_utils.py b/point-cloud/sfa/models/model_utils.py
new file mode 100644
index 0000000..c8b1e41
--- /dev/null
+++ b/point-cloud/sfa/models/model_utils.py
@@ -0,0 +1,134 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.09
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: utils functions that use for model
+"""
+
+import os
+import sys
+
+import torch
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from models import resnet, fpn_resnet
+
+
+def create_model(configs):
+    """Create model based on architecture name"""
+    try:
+        arch_parts = configs.arch.split('_')
+        num_layers = int(arch_parts[-1])
+    except:
+        raise ValueError
+    if 'fpn_resnet' in configs.arch:
+        print('using ResNet architecture with feature pyramid')
+        model = fpn_resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv,
+                                        imagenet_pretrained=configs.imagenet_pretrained)
+    elif 'resnet' in configs.arch:
+        print('using ResNet architecture')
+        model = resnet.get_pose_net(num_layers=num_layers, heads=configs.heads, head_conv=configs.head_conv,
+                                    imagenet_pretrained=configs.imagenet_pretrained)
+    else:
+        assert False, 'Undefined model backbone'
+
+    return model
+
+
+def get_num_parameters(model):
+    """Count number of trained parameters of the model"""
+    if hasattr(model, 'module'):
+        num_parameters = sum(p.numel() for p in model.module.parameters() if p.requires_grad)
+    else:
+        num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+    return num_parameters
+
+
+def make_data_parallel(model, configs):
+    if configs.distributed:
+        # For multiprocessing distributed, DistributedDataParallel constructor
+        # should always set the single device scope, otherwise,
+        # DistributedDataParallel will use all available devices.
+        if configs.gpu_idx is not None:
+            torch.cuda.set_device(configs.gpu_idx)
+            model.cuda(configs.gpu_idx)
+            # When using a single GPU per process and per
+            # DistributedDataParallel, we need to divide the batch size
+            # ourselves based on the total number of GPUs we have
+            configs.batch_size = int(configs.batch_size / configs.ngpus_per_node)
+            configs.num_workers = int((configs.num_workers + configs.ngpus_per_node - 1) / configs.ngpus_per_node)
+            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[configs.gpu_idx])
+        else:
+            model.cuda()
+            # DistributedDataParallel will divide and allocate batch_size to all
+            # available GPUs if device_ids are not set
+            model = torch.nn.parallel.DistributedDataParallel(model)
+    elif configs.gpu_idx is not None:
+        torch.cuda.set_device(configs.gpu_idx)
+        model = model.cuda(configs.gpu_idx)
+    else:
+        # DataParallel will divide and allocate batch_size to all available GPUs
+        model = torch.nn.DataParallel(model).cuda()
+
+    return model
+
+
+if __name__ == '__main__':
+    import argparse
+
+    from torchsummary import summary
+    from easydict import EasyDict as edict
+
+    parser = argparse.ArgumentParser(description='RTM3D Implementation')
+    parser.add_argument('-a', '--arch', type=str, default='resnet_18', metavar='ARCH',
+                        help='The name of the model architecture')
+    parser.add_argument('--head_conv', type=int, default=-1,
+                        help='conv layer channels for output head'
+                             '0 for no conv layer'
+                             '-1 for default setting: '
+                             '64 for resnets and 256 for dla.')
+
+    configs = edict(vars(parser.parse_args()))
+    if configs.head_conv == -1:  # init default head_conv
+        configs.head_conv = 256 if 'dla' in configs.arch else 64
+
+    configs.num_classes = 3
+    configs.num_vertexes = 8
+    configs.num_center_offset = 2
+    configs.num_vertexes_offset = 2
+    configs.num_dimension = 3
+    configs.num_rot = 8
+    configs.num_depth = 1
+    configs.num_wh = 2
+    configs.heads = {
+        'hm_mc': configs.num_classes,
+        'hm_ver': configs.num_vertexes,
+        'vercoor': configs.num_vertexes * 2,
+        'cenoff': configs.num_center_offset,
+        'veroff': configs.num_vertexes_offset,
+        'dim': configs.num_dimension,
+        'rot': configs.num_rot,
+        'depth': configs.num_depth,
+        'wh': configs.num_wh
+    }
+
+    configs.device = torch.device('cuda:1')
+    # configs.device = torch.device('cpu')
+
+    model = create_model(configs).to(device=configs.device)
+    sample_input = torch.randn((1, 3, 224, 224)).to(device=configs.device)
+    # summary(model.cuda(1), (3, 224, 224))
+    output = model(sample_input)
+    for hm_name, hm_out in output.items():
+        print('hm_name: {}, hm_out size: {}'.format(hm_name, hm_out.size()))
+
+    print('number of parameters: {}'.format(get_num_parameters(model)))
diff --git a/point-cloud/sfa/models/resnet.py b/point-cloud/sfa/models/resnet.py
new file mode 100644
index 0000000..03f9a60
--- /dev/null
+++ b/point-cloud/sfa/models/resnet.py
@@ -0,0 +1,284 @@
+"""
+# ---------------------------------------------------------------------------------
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# Modified by Xingyi Zhou
+# Refer from: https://github.com/xingyizhou/CenterNet
+
+# Modifier: Nguyen Mau Dung (2020.08.09)
+# ------------------------------------------------------------------------------
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import torch
+import torch.nn as nn
+import torch.utils.model_zoo as model_zoo
+
+BN_MOMENTUM = 0.1
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
+                                  momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class PoseResNet(nn.Module):
+
+    def __init__(self, block, layers, heads, head_conv, **kwargs):
+        self.inplanes = 64
+        self.deconv_with_bias = False
+        self.heads = heads
+
+        super(PoseResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        # used for deconv layers
+        self.deconv_layers = self._make_deconv_layer(
+            3,
+            [256, 256, 256],
+            [4, 4, 4],
+        )
+        # self.final_layer = []
+
+        for head in sorted(self.heads):
+            num_output = self.heads[head]
+            if head_conv > 0:
+                fc = nn.Sequential(
+                    nn.Conv2d(256, head_conv,
+                              kernel_size=3, padding=1, bias=True),
+                    nn.ReLU(inplace=True),
+                    nn.Conv2d(head_conv, num_output,
+                              kernel_size=1, stride=1, padding=0))
+            else:
+                fc = nn.Conv2d(
+                    in_channels=256,
+                    out_channels=num_output,
+                    kernel_size=1,
+                    stride=1,
+                    padding=0
+                )
+            self.__setattr__(head, fc)
+
+        # self.final_layer = nn.ModuleList(self.final_layer)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def _get_deconv_cfg(self, deconv_kernel, index):
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+
+        return deconv_kernel, padding, output_padding
+
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        assert num_layers == len(num_filters), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        assert num_layers == len(num_kernels), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = \
+                self._get_deconv_cfg(num_kernels[i], i)
+
+            planes = num_filters[i]
+            layers.append(
+                nn.ConvTranspose2d(
+                    in_channels=self.inplanes,
+                    out_channels=planes,
+                    kernel_size=kernel,
+                    stride=2,
+                    padding=padding,
+                    output_padding=output_padding,
+                    bias=self.deconv_with_bias))
+            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
+            layers.append(nn.ReLU(inplace=True))
+            self.inplanes = planes
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.deconv_layers(x)
+        ret = {}
+        for head in self.heads:
+            ret[head] = self.__getattr__(head)(x)
+        return ret
+
+    def init_weights(self, num_layers, pretrained=True):
+        if pretrained:
+            # print('=> init resnet deconv weights from normal distribution')
+            for _, m in self.deconv_layers.named_modules():
+                if isinstance(m, nn.ConvTranspose2d):
+                    # print('=> init {}.weight as normal(0, 0.001)'.format(name))
+                    # print('=> init {}.bias as 0'.format(name))
+                    nn.init.normal_(m.weight, std=0.001)
+                    if self.deconv_with_bias:
+                        nn.init.constant_(m.bias, 0)
+                elif isinstance(m, nn.BatchNorm2d):
+                    # print('=> init {}.weight as 1'.format(name))
+                    # print('=> init {}.bias as 0'.format(name))
+                    nn.init.constant_(m.weight, 1)
+                    nn.init.constant_(m.bias, 0)
+            # print('=> init final conv weights from normal distribution')
+            for head in self.heads:
+                final_layer = self.__getattr__(head)
+                for i, m in enumerate(final_layer.modules()):
+                    if isinstance(m, nn.Conv2d):
+                        # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                        # print('=> init {}.weight as normal(0, 0.001)'.format(name))
+                        # print('=> init {}.bias as 0'.format(name))
+                        if m.weight.shape[0] == self.heads[head]:
+                            if 'hm' in head:
+                                nn.init.constant_(m.bias, -2.19)
+                            else:
+                                nn.init.normal_(m.weight, std=0.001)
+                                nn.init.constant_(m.bias, 0)
+            # pretrained_state_dict = torch.load(pretrained)
+            url = model_urls['resnet{}'.format(num_layers)]
+            pretrained_state_dict = model_zoo.load_url(url)
+            print('=> loading pretrained model {}'.format(url))
+            self.load_state_dict(pretrained_state_dict, strict=False)
+
+
+resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
+               34: (BasicBlock, [3, 4, 6, 3]),
+               50: (Bottleneck, [3, 4, 6, 3]),
+               101: (Bottleneck, [3, 4, 23, 3]),
+               152: (Bottleneck, [3, 8, 36, 3])}
+
+
+def get_pose_net(num_layers, heads, head_conv, imagenet_pretrained):
+    block_class, layers = resnet_spec[num_layers]
+
+    model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
+    model.init_weights(num_layers, pretrained=imagenet_pretrained)
+    return model
diff --git a/point-cloud/sfa/train.py b/point-cloud/sfa/train.py
new file mode 100644
index 0000000..a9db1a2
--- /dev/null
+++ b/point-cloud/sfa/train.py
@@ -0,0 +1,290 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: This script for training
+
+"""
+
+import time
+import numpy as np
+import sys
+import random
+import os
+import warnings
+
+warnings.filterwarnings("ignore", category=UserWarning)
+
+import torch
+from torch.utils.tensorboard import SummaryWriter
+import torch.distributed as dist
+import torch.multiprocessing as mp
+import torch.utils.data.distributed
+from tqdm import tqdm
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from data_process.kitti_dataloader import create_train_dataloader, create_val_dataloader
+from models.model_utils import create_model, make_data_parallel, get_num_parameters
+from utils.train_utils import create_optimizer, create_lr_scheduler, get_saved_state, save_checkpoint
+from utils.torch_utils import reduce_tensor, to_python_float
+from utils.misc import AverageMeter, ProgressMeter
+from utils.logger import Logger
+from config.train_config import parse_train_configs
+from losses.losses import Compute_Loss
+
+
+def main():
+    configs = parse_train_configs()
+
+    # Re-produce results
+    if configs.seed is not None:
+        random.seed(configs.seed)
+        np.random.seed(configs.seed)
+        torch.manual_seed(configs.seed)
+        torch.backends.cudnn.deterministic = True
+        torch.backends.cudnn.benchmark = False
+
+    if configs.gpu_idx is not None:
+        print('You have chosen a specific GPU. This will completely disable data parallelism.')
+
+    if configs.dist_url == "env://" and configs.world_size == -1:
+        configs.world_size = int(os.environ["WORLD_SIZE"])
+
+    configs.distributed = configs.world_size > 1 or configs.multiprocessing_distributed
+
+    if configs.multiprocessing_distributed:
+        configs.world_size = configs.ngpus_per_node * configs.world_size
+        mp.spawn(main_worker, nprocs=configs.ngpus_per_node, args=(configs,))
+    else:
+        main_worker(configs.gpu_idx, configs)
+
+
+def main_worker(gpu_idx, configs):
+    configs.gpu_idx = gpu_idx
+    # configs.device = torch.device('cpu' if configs.gpu_idx is None else 'cuda:{}'.format(configs.gpu_idx))
+
+    if configs.distributed:
+        if configs.dist_url == "env://" and configs.rank == -1:
+            configs.rank = int(os.environ["RANK"])
+        if configs.multiprocessing_distributed:
+            # For multiprocessing distributed training, rank needs to be the
+            # global rank among all the processes
+            configs.rank = configs.rank * configs.ngpus_per_node + gpu_idx
+
+        dist.init_process_group(backend=configs.dist_backend, init_method=configs.dist_url,
+                                world_size=configs.world_size, rank=configs.rank)
+        configs.subdivisions = int(64 / configs.batch_size / configs.ngpus_per_node)
+    else:
+        configs.subdivisions = int(64 / configs.batch_size)
+
+    configs.is_master_node = (not configs.distributed) or (
+            configs.distributed and (configs.rank % configs.ngpus_per_node == 0))
+
+    if configs.is_master_node:
+        logger = Logger(configs.logs_dir, configs.saved_fn)
+        logger.info('>>> Created a new logger')
+        logger.info('>>> configs: {}'.format(configs))
+        tb_writer = SummaryWriter(log_dir=os.path.join(configs.logs_dir, 'tensorboard'))
+    else:
+        logger = None
+        tb_writer = None
+
+    # model
+    model = create_model(configs)
+
+    # load weight from a checkpoint
+    if configs.pretrained_path is not None:
+        # assert os.path.isfile(configs.pretrained_path), "=> no checkpoint found at '{}'".format(configs.pretrained_path)
+        if os.path.isfile(configs.pretrained_path):
+            model_path = configs.pretrained_path
+        else:
+            # 取最后一个模型
+            model_path = os.path.join(configs.pretrained_path, os.listdir(configs.pretrained_path)[-1])
+        model.load_state_dict(torch.load(model_path, map_location=configs.device))
+        if logger is not None:
+            logger.info('loaded pretrained model at {}'.format(configs.pretrained_path))
+
+    # resume weights of model from a checkpoint
+    if configs.resume_path is not None:
+        assert os.path.isfile(configs.resume_path), "=> no checkpoint found at '{}'".format(configs.resume_path)
+        model.load_state_dict(torch.load(configs.resume_path, map_location='cpu'))
+        if logger is not None:
+            logger.info('resume training model from checkpoint {}'.format(configs.resume_path))
+
+    # Data Parallel
+    model = make_data_parallel(model, configs)
+
+    # Make sure to create optimizer after moving the model to cuda
+    optimizer = create_optimizer(configs, model)
+    lr_scheduler = create_lr_scheduler(optimizer, configs)
+    configs.step_lr_in_epoch = False if configs.lr_type in ['multi_step', 'cosin', 'one_cycle'] else True
+
+    # resume optimizer, lr_scheduler from a checkpoint
+    if configs.resume_path is not None:
+        utils_path = configs.resume_path.replace('Model_', 'Utils_')
+        assert os.path.isfile(utils_path), "=> no checkpoint found at '{}'".format(utils_path)
+        utils_state_dict = torch.load(utils_path, map_location='cuda:{}'.format(configs.gpu_idx))
+        optimizer.load_state_dict(utils_state_dict['optimizer'])
+        lr_scheduler.load_state_dict(utils_state_dict['lr_scheduler'])
+        configs.start_epoch = utils_state_dict['epoch'] + 1
+
+    if configs.is_master_node:
+        num_parameters = get_num_parameters(model)
+        logger.info('number of trained parameters of the model: {}'.format(num_parameters))
+
+    if logger is not None:
+        logger.info(">>> Loading dataset & getting dataloader...")
+    # Create dataloader
+    train_dataloader, train_sampler = create_train_dataloader(configs)
+    if logger is not None:
+        logger.info('number of batches in training set: {}'.format(len(train_dataloader)))
+
+    if configs.evaluate:
+        val_dataloader = create_val_dataloader(configs)
+        val_loss = validate(val_dataloader, model, configs)
+        print('val_loss: {:.4e}'.format(val_loss))
+        return
+
+    for epoch in range(configs.start_epoch, configs.num_epochs + 1):
+        if logger is not None:
+            logger.info('{}'.format('*-' * 40))
+            logger.info('{} {}/{} {}'.format('=' * 35, epoch, configs.num_epochs, '=' * 35))
+            logger.info('{}'.format('*-' * 40))
+            logger.info('>>> Epoch: [{}/{}]'.format(epoch, configs.num_epochs))
+
+        if configs.distributed:
+            train_sampler.set_epoch(epoch)
+        # train for one epoch
+        train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer)
+        if (not configs.no_val) and (epoch % configs.checkpoint_freq == 0):
+            val_dataloader = create_val_dataloader(configs)
+            print('number of batches in val_dataloader: {}'.format(len(val_dataloader)))
+            val_loss = validate(val_dataloader, model, configs)
+            print('val_loss: {:.4e}'.format(val_loss))
+            if tb_writer is not None:
+                tb_writer.add_scalar('Val_loss', val_loss, epoch)
+
+        # Save checkpoint
+        if configs.is_master_node and ((epoch % configs.checkpoint_freq) == 0):
+            model_state_dict, utils_state_dict = get_saved_state(model, optimizer, lr_scheduler, epoch, configs)
+            save_checkpoint(configs.checkpoints_dir, configs.saved_fn, model_state_dict, utils_state_dict, epoch)
+
+        if not configs.step_lr_in_epoch:
+            lr_scheduler.step()
+            if tb_writer is not None:
+                tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], epoch)
+
+    if tb_writer is not None:
+        tb_writer.close()
+    if configs.distributed:
+        cleanup()
+
+
+def cleanup():
+    dist.destroy_process_group()
+
+
+def train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer):
+    batch_time = AverageMeter('Time', ':6.3f')
+    data_time = AverageMeter('Data', ':6.3f')
+    losses = AverageMeter('Loss', ':.4e')
+
+    progress = ProgressMeter(len(train_dataloader), [batch_time, data_time, losses],
+                             prefix="Train - Epoch: [{}/{}]".format(epoch, configs.num_epochs))
+
+    criterion = Compute_Loss(device=configs.device)
+    num_iters_per_epoch = len(train_dataloader)
+    # switch to train mode
+    model.train()
+    start_time = time.time()
+    for batch_idx, batch_data in enumerate(tqdm(train_dataloader)):
+        data_time.update(time.time() - start_time)
+        imgs, targets = batch_data
+        batch_size = imgs.size(0)
+        global_step = num_iters_per_epoch * (epoch - 1) + batch_idx + 1
+        for k in targets.keys():
+            targets[k] = targets[k].to(configs.device, non_blocking=True)
+        imgs = imgs.to(configs.device, non_blocking=True).float()
+        outputs = model(imgs)
+        total_loss, loss_stats = criterion(outputs, targets)
+        # For torch.nn.DataParallel case
+        if (not configs.distributed) and (configs.gpu_idx is None):
+            total_loss = torch.mean(total_loss)
+
+        # compute gradient and perform backpropagation
+        total_loss.backward()
+        if global_step % configs.subdivisions == 0:
+            optimizer.step()
+            # zero the parameter gradients
+            optimizer.zero_grad()
+            # Adjust learning rate
+            if configs.step_lr_in_epoch:
+                lr_scheduler.step()
+                if tb_writer is not None:
+                    tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], global_step)
+
+        if configs.distributed:
+            reduced_loss = reduce_tensor(total_loss.data, configs.world_size)
+        else:
+            reduced_loss = total_loss.data
+        losses.update(to_python_float(reduced_loss), batch_size)
+        # measure elapsed time
+        # torch.cuda.synchronize()
+        batch_time.update(time.time() - start_time)
+
+        if tb_writer is not None:
+            if (global_step % configs.tensorboard_freq) == 0:
+                loss_stats['avg_loss'] = losses.avg
+                tb_writer.add_scalars('Train', loss_stats, global_step)
+        # Log message
+        if logger is not None:
+            if (global_step % configs.print_freq) == 0:
+                logger.info(progress.get_message(batch_idx))
+
+        start_time = time.time()
+
+
+def validate(val_dataloader, model, configs):
+    losses = AverageMeter('Loss', ':.4e')
+    criterion = Compute_Loss(device=configs.device)
+    # switch to train mode
+    model.eval()
+    with torch.no_grad():
+        for batch_idx, batch_data in enumerate(tqdm(val_dataloader)):
+            imgs, targets = batch_data
+            batch_size = imgs.size(0)
+            for k in targets.keys():
+                targets[k] = targets[k].to(configs.device, non_blocking=True)
+            imgs = imgs.to(configs.device, non_blocking=True).float()
+            outputs = model(imgs)
+            total_loss, loss_stats = criterion(outputs, targets)
+            # For torch.nn.DataParallel case
+            if (not configs.distributed) and (configs.gpu_idx is None):
+                total_loss = torch.mean(total_loss)
+
+            if configs.distributed:
+                reduced_loss = reduce_tensor(total_loss.data, configs.world_size)
+            else:
+                reduced_loss = total_loss.data
+            losses.update(to_python_float(reduced_loss), batch_size)
+
+    return losses.avg
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        try:
+            cleanup()
+            sys.exit(0)
+        except SystemExit:
+            os._exit(0)
diff --git a/point-cloud/sfa/utils/__init__.py b/point-cloud/sfa/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/point-cloud/sfa/utils/demo_utils.py b/point-cloud/sfa/utils/demo_utils.py
new file mode 100644
index 0000000..3181416
--- /dev/null
+++ b/point-cloud/sfa/utils/demo_utils.py
@@ -0,0 +1,137 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: Demonstration utils script
+"""
+
+import argparse
+import sys
+import os
+import warnings
+import zipfile
+
+warnings.filterwarnings("ignore", category=UserWarning)
+
+from easydict import EasyDict as edict
+import numpy as np
+import wget
+import torch
+import cv2
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from utils.misc import make_folder, time_synchronized
+from utils.evaluation_utils import decode, post_processing
+from utils.torch_utils import _sigmoid
+
+
+def parse_demo_configs():
+    parser = argparse.ArgumentParser(description='Demonstration config for the implementation')
+    parser.add_argument('--saved_fn', type=str, default='fpn_resnet_18', metavar='FN',
+                        help='The name using for saving logs, models,...')
+    parser.add_argument('-a', '--arch', type=str, default='fpn_resnet_18', metavar='ARCH',
+                        help='The name of the model architecture')
+    parser.add_argument('--pretrained_path', type=str,
+                        default='../checkpoints/fpn_resnet_18/fpn_resnet_18_epoch_300.pth', metavar='PATH',
+                        help='the path of the pretrained checkpoint')
+    parser.add_argument('--foldername', type=str, default='2011_09_26_drive_0014_sync', metavar='FN',
+                        help='Folder name for demostration dataset')
+    parser.add_argument('--K', type=int, default=50,
+                        help='the number of top K')
+    parser.add_argument('--no_cuda', action='store_true',
+                        help='If true, cuda is not used.')
+    parser.add_argument('--gpu_idx', default=0, type=int,
+                        help='GPU index to use.')
+    parser.add_argument('--peak_thresh', type=float, default=0.2)
+    parser.add_argument('--output_format', type=str, default='image', metavar='PATH',
+                        help='the type of the test output (support image or video)')
+    parser.add_argument('--output-width', type=int, default=608,
+                        help='the width of showing output, the height maybe vary')
+
+    configs = edict(vars(parser.parse_args()))
+    configs.pin_memory = True
+    configs.distributed = False  # For testing on 1 GPU only
+
+    configs.input_size = (608, 608)
+    configs.hm_size = (152, 152)
+    configs.down_ratio = 4
+    configs.max_objects = 50
+
+    configs.imagenet_pretrained = False
+    configs.head_conv = 64
+    configs.num_classes = 3
+    configs.num_center_offset = 2
+    configs.num_z = 1
+    configs.num_dim = 3
+    configs.num_direction = 2  # sin, cos
+
+    configs.heads = {
+        'hm_cen': configs.num_classes,
+        'cen_offset': configs.num_center_offset,
+        'direction': configs.num_direction,
+        'z_coor': configs.num_z,
+        'dim': configs.num_dim
+    }
+
+    ####################################################################
+    ##############Dataset, Checkpoints, and results dir configs#########
+    ####################################################################
+    configs.root_dir = '../'
+    configs.dataset_dir = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo')
+    configs.calib_path = os.path.join(configs.root_dir, 'dataset', 'kitti', 'demo', 'calib.txt')
+    configs.results_dir = os.path.join(configs.root_dir, 'results', configs.saved_fn)
+    make_folder(configs.results_dir)
+
+    return configs
+
+
+def download_and_unzip(demo_dataset_dir, download_url):
+    filename = download_url.split('/')[-1]
+    filepath = os.path.join(demo_dataset_dir, filename)
+    if os.path.isfile(filepath):
+        print('The dataset have been downloaded')
+        return
+    print('\nDownloading data for demonstration...')
+    wget.download(download_url, filepath)
+    print('\nUnzipping the downloaded data...')
+    with zipfile.ZipFile(filepath, "r") as zip_ref:
+        zip_ref.extractall(os.path.join(demo_dataset_dir, filename[:-4]))
+
+
+def do_detect(configs, model, bevmap, is_front):
+    if not is_front:
+        bevmap = torch.flip(bevmap, [1, 2])
+
+    input_bev_maps = bevmap.unsqueeze(0).to(configs.device, non_blocking=True).float()
+    t1 = time_synchronized()
+    outputs = model(input_bev_maps)
+    outputs['hm_cen'] = _sigmoid(outputs['hm_cen'])
+    outputs['cen_offset'] = _sigmoid(outputs['cen_offset'])
+    # detections size (batch_size, K, 10)
+    detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],
+                        outputs['dim'], K=configs.K)
+    detections = detections.cpu().numpy().astype(np.float32)
+    detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh)
+    t2 = time_synchronized()
+    # Inference speed
+    fps = 1 / (t2 - t1)
+
+    return detections[0], bevmap, fps
+
+
+def write_credit(img, org_author=(500, 400), text_author='github.com/maudzung', org_fps=(50, 1000), fps=None):
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    fontScale = 1
+    color = (255, 255, 255)
+    thickness = 2
+
+    cv2.putText(img, text_author, org_author, font, fontScale, color, thickness, cv2.LINE_AA)
+    cv2.putText(img, 'Speed: {:.1f} FPS'.format(fps), org_fps, font, fontScale, color, thickness, cv2.LINE_AA)
diff --git a/point-cloud/sfa/utils/evaluation_utils.py b/point-cloud/sfa/utils/evaluation_utils.py
new file mode 100644
index 0000000..6152ded
--- /dev/null
+++ b/point-cloud/sfa/utils/evaluation_utils.py
@@ -0,0 +1,183 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.17
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: The utils for evaluation
+# Refer from: https://github.com/xingyizhou/CenterNet
+"""
+
+from __future__ import division
+import os
+import sys
+
+import torch
+import numpy as np
+import torch.nn.functional as F
+import cv2
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+import config.kitti_config as cnf
+from data_process.kitti_bev_utils import drawRotatedBox
+
+
+def _nms(heat, kernel=3):
+    pad = (kernel - 1) // 2
+    hmax = F.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
+    keep = (hmax == heat).float()
+
+    return heat * keep
+
+
+def _gather_feat(feat, ind, mask=None):
+    dim = feat.size(2)
+    ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
+    feat = feat.gather(1, ind)
+    if mask is not None:
+        mask = mask.unsqueeze(2).expand_as(feat)
+        feat = feat[mask]
+        feat = feat.view(-1, dim)
+    return feat
+
+
+def _transpose_and_gather_feat(feat, ind):
+    feat = feat.permute(0, 2, 3, 1).contiguous()
+    feat = feat.view(feat.size(0), -1, feat.size(3))
+    feat = _gather_feat(feat, ind)
+    return feat
+
+
+def _topk(scores, K=40):
+    batch, cat, height, width = scores.size()
+
+    topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
+
+    topk_inds = topk_inds % (height * width)
+    topk_ys = (torch.floor_divide(topk_inds, width)).float()
+    topk_xs = (topk_inds % width).int().float()
+
+    topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
+    topk_clses = (torch.floor_divide(topk_ind, K)).int()
+    topk_inds = _gather_feat(topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
+    topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
+    topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
+
+    return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
+
+
+def _topk_channel(scores, K=40):
+    batch, cat, height, width = scores.size()
+
+    topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
+
+    topk_inds = topk_inds % (height * width)
+    topk_ys = (topk_inds / width).int().float()
+    topk_xs = (topk_inds % width).int().float()
+
+    return topk_scores, topk_inds, topk_ys, topk_xs
+
+
+def decode(hm_cen, cen_offset, direction, z_coor, dim, K=40):
+    batch_size, num_classes, height, width = hm_cen.size()
+
+    hm_cen = _nms(hm_cen)
+    scores, inds, clses, ys, xs = _topk(hm_cen, K=K)
+    if cen_offset is not None:
+        cen_offset = _transpose_and_gather_feat(cen_offset, inds)
+        cen_offset = cen_offset.view(batch_size, K, 2)
+        xs = xs.view(batch_size, K, 1) + cen_offset[:, :, 0:1]
+        ys = ys.view(batch_size, K, 1) + cen_offset[:, :, 1:2]
+    else:
+        xs = xs.view(batch_size, K, 1) + 0.5
+        ys = ys.view(batch_size, K, 1) + 0.5
+
+    direction = _transpose_and_gather_feat(direction, inds)
+    direction = direction.view(batch_size, K, 2)
+    z_coor = _transpose_and_gather_feat(z_coor, inds)
+    z_coor = z_coor.view(batch_size, K, 1)
+    dim = _transpose_and_gather_feat(dim, inds)
+    dim = dim.view(batch_size, K, 3)
+    clses = clses.view(batch_size, K, 1).float()
+    scores = scores.view(batch_size, K, 1)
+
+    # (scores x 1, ys x 1, xs x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
+    # (scores-0:1, ys-1:2, xs-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
+    # detections: [batch_size, K, 10]
+    detections = torch.cat([scores, xs, ys, z_coor, dim, direction, clses], dim=2)
+
+    return detections
+
+
+def get_yaw(direction):
+    return np.arctan2(direction[:, 0:1], direction[:, 1:2])
+
+
+def post_processing(detections, num_classes=3, down_ratio=4, peak_thresh=0.2):
+    """
+    :param detections: [batch_size, K, 10]
+    # (scores x 1, xs x 1, ys x 1, z_coor x 1, dim x 3, direction x 2, clses x 1)
+    # (scores-0:1, xs-1:2, ys-2:3, z_coor-3:4, dim-4:7, direction-7:9, clses-9:10)
+    :return:
+    """
+    # TODO: Need to consider rescale to the original scale: x, y
+
+    ret = []
+    for i in range(detections.shape[0]):
+        top_preds = {}
+        classes = detections[i, :, -1]
+        for j in range(num_classes):
+            inds = (classes == j)
+            # x, y, z, h, w, l, yaw
+            top_preds[j] = np.concatenate([
+                detections[i, inds, 0:1],
+                detections[i, inds, 1:2] * down_ratio,
+                detections[i, inds, 2:3] * down_ratio,
+                detections[i, inds, 3:4],
+                detections[i, inds, 4:5],
+                detections[i, inds, 5:6] / cnf.bound_size_y * cnf.BEV_WIDTH,
+                detections[i, inds, 6:7] / cnf.bound_size_x * cnf.BEV_HEIGHT,
+                get_yaw(detections[i, inds, 7:9]).astype(np.float32)], axis=1)
+            # Filter by peak_thresh
+            if len(top_preds[j]) > 0:
+                keep_inds = (top_preds[j][:, 0] > peak_thresh)
+                top_preds[j] = top_preds[j][keep_inds]
+        ret.append(top_preds)
+
+    return ret
+
+
+def draw_predictions(img, detections, num_classes=3):
+    for j in range(num_classes):
+        if len(detections[j]) > 0:
+            for det in detections[j]:
+                # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
+                _score, _x, _y, _z, _h, _w, _l, _yaw = det
+                drawRotatedBox(img, _x, _y, _w, _l, _yaw, cnf.colors[int(j)])
+
+    return img
+
+
+def convert_det_to_real_values(detections, num_classes=3):
+    kitti_dets = []
+    for cls_id in range(num_classes):
+        if len(detections[cls_id]) > 0:
+            for det in detections[cls_id]:
+                # (scores-0:1, x-1:2, y-2:3, z-3:4, dim-4:7, yaw-7:8)
+                _score, _x, _y, _z, _h, _w, _l, _yaw = det
+                _yaw = round(-_yaw, 2)
+                x = round(_y / cnf.BEV_HEIGHT * cnf.bound_size_x + cnf.boundary['minX'], 2)
+                y = round(_x / cnf.BEV_WIDTH * cnf.bound_size_y + cnf.boundary['minY'], 2)
+                z = round(_z + cnf.boundary['minZ'], 2)
+                w = round(_w / cnf.BEV_WIDTH * cnf.bound_size_y, 2)
+                l = round(_l / cnf.BEV_HEIGHT * cnf.bound_size_x, 2)
+                h = round(_h/1, 2)
+                kitti_dets.append([cls_id, h, w, l, x, y, z, _yaw])
+
+    return np.array(kitti_dets)
diff --git a/point-cloud/sfa/utils/logger.py b/point-cloud/sfa/utils/logger.py
new file mode 100644
index 0000000..2bdcd4f
--- /dev/null
+++ b/point-cloud/sfa/utils/logger.py
@@ -0,0 +1,49 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.07.31
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: This script for logging
+
+"""
+
+import os
+import logging
+
+
+class Logger():
+    """
+        Create logger to save logs during training
+        Args:
+            logs_dir:
+            saved_fn:
+
+        Returns:
+
+        """
+
+    def __init__(self, logs_dir, saved_fn):
+        logger_fn = 'logger_{}.txt'.format(saved_fn)
+        logger_path = os.path.join(logs_dir, logger_fn)
+
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(logging.INFO)
+
+        # formatter = logging.Formatter('%(asctime)s:File %(module)s.py:Func %(funcName)s:Line %(lineno)d:%(levelname)s: %(message)s')
+        formatter = logging.Formatter(
+            '%(asctime)s: %(module)s.py - %(funcName)s(), at Line %(lineno)d:%(levelname)s:\n%(message)s')
+
+        file_handler = logging.FileHandler(logger_path)
+        file_handler.setLevel(logging.INFO)
+        file_handler.setFormatter(formatter)
+
+        stream_handler = logging.StreamHandler()
+        stream_handler.setFormatter(formatter)
+
+        self.logger.addHandler(file_handler)
+        self.logger.addHandler(stream_handler)
+
+    def info(self, message):
+        self.logger.info(message)
diff --git a/point-cloud/sfa/utils/lr_scheduler.py b/point-cloud/sfa/utils/lr_scheduler.py
new file mode 100644
index 0000000..0a61a1b
--- /dev/null
+++ b/point-cloud/sfa/utils/lr_scheduler.py
@@ -0,0 +1,312 @@
+import torch
+from torch.optim import SGD, lr_scheduler
+import numpy as np
+
+
+class _LRMomentumScheduler(lr_scheduler._LRScheduler):
+    def __init__(self, optimizer, last_epoch=-1):
+        if last_epoch == -1:
+            for group in optimizer.param_groups:
+                group.setdefault('initial_momentum', group['momentum'])
+        else:
+            for i, group in enumerate(optimizer.param_groups):
+                if 'initial_momentum' not in group:
+                    raise KeyError("param 'initial_momentum' is not specified "
+                                   "in param_groups[{}] when resuming an optimizer".format(i))
+        self.base_momentums = list(map(lambda group: group['initial_momentum'], optimizer.param_groups))
+        super().__init__(optimizer, last_epoch)
+
+    def get_lr(self):
+        raise NotImplementedError
+
+    def get_momentum(self):
+        raise NotImplementedError
+
+    def step(self, epoch=None):
+        if epoch is None:
+            epoch = self.last_epoch + 1
+        self.last_epoch = epoch
+        for param_group, lr, momentum in zip(self.optimizer.param_groups, self.get_lr(), self.get_momentum()):
+            param_group['lr'] = lr
+            param_group['momentum'] = momentum
+
+
+class ParameterUpdate(object):
+    """A callable class used to define an arbitrary schedule defined by a list.
+    This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply
+    the given schedule.
+
+    Arguments:
+        params {list or numpy.array} -- List or numpy array defining parameter schedule.
+        base_param {float} -- Parameter value used to initialize the optimizer.
+    """
+
+    def __init__(self, params, base_param):
+        self.params = np.hstack([params, 0])
+        self.base_param = base_param
+
+    def __call__(self, epoch):
+        return self.params[epoch] / self.base_param
+
+
+def apply_lambda(last_epoch, bases, lambdas):
+    return [base * lmbda(last_epoch) for lmbda, base in zip(lambdas, bases)]
+
+
+class LambdaScheduler(_LRMomentumScheduler):
+    """Sets the learning rate and momentum of each parameter group to the initial lr and momentum
+    times a given function. When last_epoch=-1, sets initial lr and momentum to the optimizer
+    values.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        lr_lambda (function or list): A function which computes a multiplicative
+            factor given an integer parameter epoch, or a list of such
+            functions, one for each group in optimizer.param_groups.
+            Default: lambda x:x.
+        momentum_lambda (function or list): As for lr_lambda but applied to momentum.
+            Default: lambda x:x.
+        last_epoch (int): The index of last epoch. Default: -1.
+    Example:
+        >>> # Assuming optimizer has two groups.
+        >>> lr_lambda = [
+        ...     lambda epoch: epoch // 30,
+        ...     lambda epoch: 0.95 ** epoch
+        ... ]
+        >>> mom_lambda = [
+        ...     lambda epoch: max(0, (50 - epoch) // 50),
+        ...     lambda epoch: 0.99 ** epoch
+        ... ]
+        >>> scheduler = LambdaScheduler(optimizer, lr_lambda, mom_lambda)
+        >>> for epoch in range(100):
+        >>>     train(...)
+        >>>     validate(...)
+        >>>     scheduler.step()
+    """
+
+    def __init__(self, optimizer, lr_lambda=lambda x: x, momentum_lambda=lambda x: x, last_epoch=-1):
+        self.optimizer = optimizer
+
+        if not isinstance(lr_lambda, (list, tuple)):
+            self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups)
+        else:
+            if len(lr_lambda) != len(optimizer.param_groups):
+                raise ValueError("Expected {} lr_lambdas, but got {}".format(
+                    len(optimizer.param_groups), len(lr_lambda)))
+            self.lr_lambdas = list(lr_lambda)
+
+        if not isinstance(momentum_lambda, (list, tuple)):
+            self.momentum_lambdas = [momentum_lambda] * len(optimizer.param_groups)
+        else:
+            if len(momentum_lambda) != len(optimizer.param_groups):
+                raise ValueError("Expected {} momentum_lambdas, but got {}".format(
+                    len(optimizer.param_groups), len(momentum_lambda)))
+            self.momentum_lambdas = list(momentum_lambda)
+
+        self.last_epoch = last_epoch
+        super().__init__(optimizer, last_epoch)
+
+    def state_dict(self):
+        """Returns the state of the scheduler as a :class:`dict`.
+        It contains an entry for every variable in self.__dict__ which
+        is not the optimizer.
+        The learning rate and momentum lambda functions will only be saved if they are
+        callable objects and not if they are functions or lambdas.
+        """
+        state_dict = {key: value for key, value in self.__dict__.items()
+                      if key not in ('optimizer', 'lr_lambdas', 'momentum_lambdas')}
+        state_dict['lr_lambdas'] = [None] * len(self.lr_lambdas)
+        state_dict['momentum_lambdas'] = [None] * len(self.momentum_lambdas)
+
+        for idx, (lr_fn, mom_fn) in enumerate(zip(self.lr_lambdas, self.momentum_lambdas)):
+            if not isinstance(lr_fn, types.FunctionType):
+                state_dict['lr_lambdas'][idx] = lr_fn.__dict__.copy()
+            if not isinstance(mom_fn, types.FunctionType):
+                state_dict['momentum_lambdas'][idx] = mom_fn.__dict__.copy()
+
+        return state_dict
+
+    def load_state_dict(self, state_dict):
+        """Loads the schedulers state.
+        Arguments:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        lr_lambdas = state_dict.pop('lr_lambdas')
+        momentum_lambdas = state_dict.pop('momentum_lambdas')
+        self.__dict__.update(state_dict)
+
+        for idx, fn in enumerate(lr_lambdas):
+            if fn is not None:
+                self.lr_lambdas[idx].__dict__.update(fn)
+
+        for idx, fn in enumerate(momentum_lambdas):
+            if fn is not None:
+                self.momentum_lambdas[idx].__dict__.update(fn)
+
+    def get_lr(self):
+        return apply_lambda(self.last_epoch, self.base_lrs, self.lr_lambdas)
+
+    def get_momentum(self):
+        return apply_lambda(self.last_epoch, self.base_momentums, self.momentum_lambdas)
+
+
+class ParameterUpdate(object):
+    """A callable class used to define an arbitrary schedule defined by a list.
+    This object is designed to be passed to the LambdaLR or LambdaScheduler scheduler to apply
+    the given schedule. If a base_param is zero, no updates are applied.
+
+    Arguments:
+        params {list or numpy.array} -- List or numpy array defining parameter schedule.
+        base_param {float} -- Parameter value used to initialize the optimizer.
+    """
+
+    def __init__(self, params, base_param):
+        self.params = np.hstack([params, 0])
+        self.base_param = base_param
+
+        if base_param < 1e-12:
+            self.base_param = 1
+            self.params = self.params * 0.0 + 1.0
+
+    def __call__(self, epoch):
+        return self.params[epoch] / self.base_param
+
+
+class ListScheduler(LambdaScheduler):
+    """Sets the learning rate and momentum of each parameter group to values defined by lists.
+    When last_epoch=-1, sets initial lr and momentum to the optimizer values. One of both of lr
+    and momentum schedules may be specified.
+    Note that the parameters used to initialize the optimizer are overriden by those defined by
+    this scheduler.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        lrs (list or numpy.ndarray): A list of learning rates, or a list of lists, one for each
+            parameter group. One- or two-dimensional numpy arrays may also be passed.
+        momentum (list or numpy.ndarray): A list of momentums, or a list of lists, one for each
+            parameter group. One- or two-dimensional numpy arrays may also be passed.
+        last_epoch (int): The index of last epoch. Default: -1.
+    Example:
+        >>> # Assuming optimizer has two groups.
+        >>> lrs = [
+        ...     np.linspace(0.01, 0.1, 100),
+        ...     np.logspace(-2, 0, 100)
+        ... ]
+        >>> momentums = [
+        ...     np.linspace(0.85, 0.95, 100),
+        ...     np.linspace(0.8, 0.99, 100)
+        ... ]
+        >>> scheduler = ListScheduler(optimizer, lrs, momentums)
+        >>> for epoch in range(100):
+        >>>     train(...)
+        >>>     validate(...)
+        >>>     scheduler.step()
+    """
+
+    def __init__(self, optimizer, lrs=None, momentums=None, last_epoch=-1):
+        groups = optimizer.param_groups
+        if lrs is None:
+            lr_lambda = lambda x: x
+        else:
+            lrs = np.array(lrs) if isinstance(lrs, (list, tuple)) else lrs
+            if len(lrs.shape) == 1:
+                lr_lambda = [ParameterUpdate(lrs, g['lr']) for g in groups]
+            else:
+                lr_lambda = [ParameterUpdate(l, g['lr']) for l, g in zip(lrs, groups)]
+
+        if momentums is None:
+            momentum_lambda = lambda x: x
+        else:
+            momentums = np.array(momentums) if isinstance(momentums, (list, tuple)) else momentums
+            if len(momentums.shape) == 1:
+                momentum_lambda = [ParameterUpdate(momentums, g['momentum']) for g in groups]
+            else:
+                momentum_lambda = [ParameterUpdate(l, g['momentum']) for l, g in zip(momentums, groups)]
+        super().__init__(optimizer, lr_lambda, momentum_lambda)
+
+
+class RangeFinder(ListScheduler):
+    """Scheduler class that implements the LR range search specified in:
+        A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch
+        size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820.
+
+    Logarithmically spaced learning rates from 1e-7 to 1 are searched. The number of increments in
+    that range is determined by 'epochs'.
+    Note that the parameters used to initialize the optimizer are overriden by those defined by
+    this scheduler.
+
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        epochs (int): Number of epochs over which to run test.
+    Example:
+        >>> scheduler = RangeFinder(optimizer, 100)
+        >>> for epoch in range(100):
+        >>>     train(...)
+        >>>     validate(...)
+        >>>     scheduler.step()
+    """
+
+    def __init__(self, optimizer, epochs):
+        lrs = np.logspace(-7, 0, epochs)
+        super().__init__(optimizer, lrs)
+
+
+class OneCyclePolicy(ListScheduler):
+    """Scheduler class that implements the 1cycle policy search specified in:
+        A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch
+        size, momentum, and weight decay. Leslie N. Smith, 2018, arXiv:1803.09820.
+
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        lr (float or list). Maximum learning rate in range. If a list of values is passed, they
+            should correspond to parameter groups.
+        epochs (int): The number of epochs to use during search.
+        momentum_rng (list). Optional upper and lower momentum values (may be both equal). Set to
+            None to run without momentum. Default: [0.85, 0.95]. If a list of lists is passed, they
+            should correspond to parameter groups.
+        phase_ratio (float): Fraction of epochs used for the increasing and decreasing phase of
+            the schedule. For example, if phase_ratio=0.45 and epochs=100, the learning rate will
+            increase from lr/10 to lr over 45 epochs, then decrease back to lr/10 over 45 epochs,
+            then decrease to lr/100 over the remaining 10 epochs. Default: 0.45.
+    """
+
+    def __init__(self, optimizer, lr, epochs, momentum_rng=[0.85, 0.95], phase_ratio=0.45):
+        phase_epochs = int(phase_ratio * epochs)
+        if isinstance(lr, (list, tuple)):
+            lrs = [
+                np.hstack([
+                    np.linspace(l * 1e-1, l, phase_epochs),
+                    np.linspace(l, l * 1e-1, phase_epochs),
+                    np.linspace(l * 1e-1, l * 1e-2, epochs - 2 * phase_epochs),
+                ]) for l in lr
+            ]
+        else:
+            lrs = np.hstack([
+                np.linspace(lr * 1e-1, lr, phase_epochs),
+                np.linspace(lr, lr * 1e-1, phase_epochs),
+                np.linspace(lr * 1e-1, lr * 1e-2, epochs - 2 * phase_epochs),
+            ])
+
+        if momentum_rng is not None:
+            momentum_rng = np.array(momentum_rng)
+            if len(momentum_rng.shape) == 2:
+                for i, g in enumerate(optimizer.param_groups):
+                    g['momentum'] = momentum_rng[i][1]
+                momentums = [
+                    np.hstack([
+                        np.linspace(m[1], m[0], phase_epochs),
+                        np.linspace(m[0], m[1], phase_epochs),
+                        np.linspace(m[1], m[1], epochs - 2 * phase_epochs),
+                    ]) for m in momentum_rng
+                ]
+            else:
+                for i, g in enumerate(optimizer.param_groups):
+                    g['momentum'] = momentum_rng[1]
+                momentums = np.hstack([
+                    np.linspace(momentum_rng[1], momentum_rng[0], phase_epochs),
+                    np.linspace(momentum_rng[0], momentum_rng[1], phase_epochs),
+                    np.linspace(momentum_rng[1], momentum_rng[1], epochs - 2 * phase_epochs),
+                ])
+        else:
+            momentums = None
+
+        super().__init__(optimizer, lrs, momentums)
diff --git a/point-cloud/sfa/utils/misc.py b/point-cloud/sfa/utils/misc.py
new file mode 100644
index 0000000..08f5718
--- /dev/null
+++ b/point-cloud/sfa/utils/misc.py
@@ -0,0 +1,71 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.07.31
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: This script for logging
+
+"""
+
+import os
+import torch
+import time
+
+def make_folder(folder_name):
+    if not os.path.exists(folder_name):
+        os.makedirs(folder_name)
+    # or os.makedirs(folder_name, exist_ok=True)
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+
+    def __init__(self, name, fmt=':f'):
+        self.name = name
+        self.fmt = fmt
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def __str__(self):
+        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+        return fmtstr.format(**self.__dict__)
+
+
+class ProgressMeter(object):
+    def __init__(self, num_batches, meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.prefix = prefix
+
+    def display(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        print('\t'.join(entries))
+
+    def get_message(self, batch):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        return '\t'.join(entries)
+
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = '{:' + str(num_digits) + 'd}'
+        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
+
+
+def time_synchronized():
+    torch.cuda.synchronize() if torch.cuda.is_available() else None
+    return time.time()
\ No newline at end of file
diff --git a/point-cloud/sfa/utils/torch_utils.py b/point-cloud/sfa/utils/torch_utils.py
new file mode 100644
index 0000000..45ea85f
--- /dev/null
+++ b/point-cloud/sfa/utils/torch_utils.py
@@ -0,0 +1,45 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.09
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: some utilities of torch (conversion)
+-----------------------------------------------------------------------------------
+"""
+
+import torch
+import torch.distributed as dist
+
+__all__ = ['convert2cpu', 'convert2cpu_long', 'to_cpu', 'reduce_tensor', 'to_python_float', '_sigmoid']
+
+
+def convert2cpu(gpu_matrix):
+    return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
+
+
+def convert2cpu_long(gpu_matrix):
+    return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
+
+
+def to_cpu(tensor):
+    return tensor.detach().cpu()
+
+
+def reduce_tensor(tensor, world_size):
+    rt = tensor.clone()
+    dist.all_reduce(rt, op=dist.reduce_op.SUM)
+    rt /= world_size
+    return rt
+
+
+def to_python_float(t):
+    if hasattr(t, 'item'):
+        return t.item()
+    else:
+        return t[0]
+
+
+def _sigmoid(x):
+    return torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
diff --git a/point-cloud/sfa/utils/train_utils.py b/point-cloud/sfa/utils/train_utils.py
new file mode 100644
index 0000000..ec3cd76
--- /dev/null
+++ b/point-cloud/sfa/utils/train_utils.py
@@ -0,0 +1,140 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.09
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: utils functions that use for training process
+"""
+
+import copy
+import os
+import math
+import sys
+
+import torch
+from torch.optim.lr_scheduler import LambdaLR
+import matplotlib.pyplot as plt
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+from utils.lr_scheduler import OneCyclePolicy
+
+
+def create_optimizer(configs, model):
+    """Create optimizer for training process
+    """
+    if hasattr(model, 'module'):
+        train_params = [param for param in model.module.parameters() if param.requires_grad]
+    else:
+        train_params = [param for param in model.parameters() if param.requires_grad]
+
+    if configs.optimizer_type == 'sgd':
+        optimizer = torch.optim.SGD(train_params, lr=configs.lr, momentum=configs.momentum, nesterov=True)
+    elif configs.optimizer_type == 'adam':
+        optimizer = torch.optim.Adam(train_params, lr=configs.lr, weight_decay=configs.weight_decay)
+    else:
+        assert False, "Unknown optimizer type"
+
+    return optimizer
+
+
+def create_lr_scheduler(optimizer, configs):
+    """Create learning rate scheduler for training process"""
+
+    if configs.lr_type == 'multi_step':
+        def multi_step_scheduler(i):
+            if i < configs.steps[0]:
+                factor = 1.
+            elif i < configs.steps[1]:
+                factor = 0.1
+            else:
+                factor = 0.01
+
+            return factor
+
+        lr_scheduler = LambdaLR(optimizer, multi_step_scheduler)
+
+    elif configs.lr_type == 'cosin':
+        # Scheduler https://arxiv.org/pdf/1812.01187.pdf
+        lf = lambda x: (((1 + math.cos(x * math.pi / configs.num_epochs)) / 2) ** 1.0) * 0.9 + 0.1  # cosine
+        lr_scheduler = LambdaLR(optimizer, lr_lambda=lf)
+    elif configs.lr_type == 'one_cycle':
+        lr_scheduler = OneCyclePolicy(optimizer, configs.lr, configs.num_epochs, momentum_rng=[0.85, 0.95],
+                                      phase_ratio=0.45)
+    else:
+        raise ValueError
+
+    plot_lr_scheduler(optimizer, lr_scheduler, configs.num_epochs, save_dir=configs.logs_dir, lr_type=configs.lr_type)
+
+    
+    return lr_scheduler
+
+
+def get_saved_state(model, optimizer, lr_scheduler, epoch, configs):
+    """Get the information to save with checkpoints"""
+    if hasattr(model, 'module'):
+        model_state_dict = model.module.state_dict()
+    else:
+        model_state_dict = model.state_dict()
+    utils_state_dict = {
+        'epoch': epoch,
+        'configs': configs,
+        'optimizer': copy.deepcopy(optimizer.state_dict()),
+        'lr_scheduler': copy.deepcopy(lr_scheduler.state_dict())
+    }
+
+    return model_state_dict, utils_state_dict
+
+
+def save_checkpoint(checkpoints_dir, saved_fn, model_state_dict, utils_state_dict, epoch):
+    """Save checkpoint every epoch only is best model or after every checkpoint_freq epoch"""
+    model_save_path = os.path.join(checkpoints_dir, 'Model_{}_epoch_{}.pth'.format(saved_fn, epoch))
+    utils_save_path = os.path.join(checkpoints_dir, 'Utils_{}_epoch_{}.pth'.format(saved_fn, epoch))
+
+    torch.save(model_state_dict, model_save_path)
+    torch.save(utils_state_dict, utils_save_path)
+
+    print('save a checkpoint at {}'.format(model_save_path))
+
+
+def plot_lr_scheduler(optimizer, scheduler, num_epochs=300, save_dir='', lr_type=''):
+    # Plot LR simulating training for full num_epochs
+    optimizer, scheduler = copy.copy(optimizer), copy.copy(scheduler)  # do not modify originals
+    y = []
+    for _ in range(num_epochs):
+        scheduler.step()
+        y.append(optimizer.param_groups[0]['lr'])
+    plt.plot(y, '.-', label='LR')
+    plt.xlabel('epoch')
+    plt.ylabel('LR')
+    plt.grid()
+    plt.xlim(0, num_epochs)
+    plt.ylim(0)
+    plt.tight_layout()
+    plt.savefig(os.path.join(save_dir, 'LR_{}.png'.format(lr_type)), dpi=200)
+
+
+if __name__ == '__main__':
+    from easydict import EasyDict as edict
+    from torchvision.models import resnet18
+
+    configs = edict()
+    configs.steps = [150, 180]
+    configs.lr_type = 'one_cycle'  # multi_step, cosin, one_csycle
+    configs.logs_dir = '../../logs/'
+    configs.num_epochs = 50
+    configs.lr = 2.25e-3
+    net = resnet18()
+    optimizer = torch.optim.Adam(net.parameters(), 0.0002)
+    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 6, 9], gamma=0.1)
+    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 3, gamma=0.1)
+    scheduler = create_lr_scheduler(optimizer, configs)
+    for i in range(configs.num_epochs):
+        print(i, scheduler.get_lr())
+        scheduler.step()
diff --git a/point-cloud/sfa/utils/visualization_utils.py b/point-cloud/sfa/utils/visualization_utils.py
new file mode 100644
index 0000000..818b228
--- /dev/null
+++ b/point-cloud/sfa/utils/visualization_utils.py
@@ -0,0 +1,154 @@
+"""
+# -*- coding: utf-8 -*-
+-----------------------------------------------------------------------------------
+# Author: Nguyen Mau Dung
+# DoC: 2020.08.09
+# email: nguyenmaudung93.kstn@gmail.com
+-----------------------------------------------------------------------------------
+# Description: The utils of the kitti dataset
+"""
+
+from __future__ import print_function
+import os
+import sys
+
+import numpy as np
+import cv2
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+# while not src_dir.endswith("sfa"):
+#     src_dir = os.path.dirname(src_dir)
+if src_dir not in sys.path:
+    sys.path.append(src_dir)
+
+import config.kitti_config as cnf
+
+
+def roty(angle):
+    # Rotation about the y-axis.
+    c = np.cos(angle)
+    s = np.sin(angle)
+    return np.array([[c, 0, s],
+                     [0, 1, 0],
+                     [-s, 0, c]])
+
+
+def compute_box_3d(dim, location, ry):
+    # dim: 3
+    # location: 3
+    # ry: 1
+    # return: 8 x 3
+    R = roty(ry)
+    h, w, l = dim
+    x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
+    y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
+    z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
+
+    corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
+    corners_3d = np.dot(R, corners)
+    corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1)
+    return corners_3d.transpose(1, 0)
+
+
+def project_to_image(pts_3d, P):
+    # pts_3d: n x 3
+    # P: 3 x 4
+    # return: n x 2
+    pts_3d_homo = np.concatenate([pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
+    pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
+    pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
+
+    return pts_2d.astype(np.int)
+
+
+def draw_box_3d_v2(image, qs, color=(255, 0, 255), thickness=2):
+    ''' Draw 3d bounding box in image
+        qs: (8,3) array of vertices for the 3d box in following order:
+            1 -------- 0
+           /|         /|
+          2 -------- 3 .
+          | |        | |
+          . 5 -------- 4
+          |/         |/
+          6 -------- 7
+
+    '''
+    qs = qs.astype(np.int32)
+    for k in range(0, 4):
+        # Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
+        i, j = k, (k + 1) % 4
+        # use LINE_AA for opencv3
+        cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness)
+
+        i, j = k + 4, (k + 1) % 4 + 4
+        cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness)
+
+        i, j = k, k + 4
+        cv2.line(image, (qs[i, 0], qs[i, 1]), (qs[j, 0], qs[j, 1]), color, thickness)
+
+    return image
+
+
+def draw_box_3d(image, corners, color=(0, 0, 255)):
+    ''' Draw 3d bounding box in image
+        corners: (8,3) array of vertices for the 3d box in following order:
+            1 -------- 0
+           /|         /|
+          2 -------- 3 .
+          | |        | |
+          . 5 -------- 4
+          |/         |/
+          6 -------- 7
+
+    '''
+
+    face_idx = [[0, 1, 5, 4],
+                [1, 2, 6, 5],
+                [2, 3, 7, 6],
+                [3, 0, 4, 7]]
+    for ind_f in range(3, -1, -1):
+        f = face_idx[ind_f]
+        for j in range(4):
+            cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
+                     (corners[f[(j + 1) % 4], 0], corners[f[(j + 1) % 4], 1]), color, 2, lineType=cv2.LINE_AA)
+        if ind_f == 0:
+            cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
+                     (corners[f[2], 0], corners[f[2], 1]), color, 1, lineType=cv2.LINE_AA)
+            cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
+                     (corners[f[3], 0], corners[f[3], 1]), color, 1, lineType=cv2.LINE_AA)
+
+    return image
+
+
+def show_rgb_image_with_boxes(img, labels, calib):
+    for box_idx, label in enumerate(labels):
+        cls_id, location, dim, ry = label[0], label[1:4], label[4:7], label[7]
+        if location[2] < 2.0:  # The object is too close to the camera, ignore it during visualization
+            continue
+        if cls_id < 0:
+            continue
+        corners_3d = compute_box_3d(dim, location, ry)
+        corners_2d = project_to_image(corners_3d, calib.P2)
+        img = draw_box_3d(img, corners_2d, color=cnf.colors[int(cls_id)])
+
+    return img
+
+
+def merge_rgb_to_bev(img_rgb, img_bev, output_width):
+    img_rgb_h, img_rgb_w = img_rgb.shape[:2]
+    ratio_rgb = output_width / img_rgb_w
+    output_rgb_h = int(ratio_rgb * img_rgb_h)
+    ret_img_rgb = cv2.resize(img_rgb, (output_width, output_rgb_h))
+
+    img_bev_h, img_bev_w = img_bev.shape[:2]
+    ratio_bev = output_width / img_bev_w
+    output_bev_h = int(ratio_bev * img_bev_h)
+
+    ret_img_bev = cv2.resize(img_bev, (output_width, output_bev_h))
+
+    out_img = np.zeros((output_rgb_h + output_bev_h, output_width, 3), dtype=np.uint8)
+    # Upper: RGB --> BEV
+    out_img[:output_rgb_h, ...] = ret_img_rgb
+    out_img[output_rgb_h:, ...] = ret_img_bev
+
+    return out_img