Update the lib code and example for review comment

Signed-off-by: khalid-davis <huangqinkai1@huawei.com>
4 years ago · 7cecee805e
--- a/build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile
+++ b/build/worker/base_images/tensorflow/tensorflow-2.3.Dockerfile
@@ -10,4 +10,4 @@ ENV PYTHONPATH "/home/lib"
 WORKDIR /home/work
 COPY ./lib /home/lib
 ENTRYPOINT ["python"]
 ENTRYPOINT ["python"]
--- a/examples/helmet_detection/training/train.py
+++ b/examples/helmet_detection/training/train.py
@@ -1,70 +0,0 @@
 import logging
 import tensorflow as tf
 import neptune
 from interface import Interface
 from neptune.incremental_learning.incremental_learning import IncrementalConfig
 LOG = logging.getLogger(__name__)
 MODEL_URL = IncrementalConfig().model_url
 def main():
    tf.set_random_seed(22)
    class_names = neptune.context.get_parameters("class_names")
    # load dataset.
    train_data = neptune.load_train_dataset(data_format='txt', with_image=False)
    # read parameters from deployment config.
    obj_threshold = neptune.context.get_parameters("obj_threshold")
    nms_threshold = neptune.context.get_parameters("nms_threshold")
    input_shape = neptune.context.get_parameters("input_shape")
    epochs = neptune.context.get_parameters('epochs')
    batch_size = neptune.context.get_parameters('batch_size')
    tf.flags.DEFINE_string('train_url', default=MODEL_URL, help='train url for model')
    tf.flags.DEFINE_string('log_url', default=None, help='log url for model')
    tf.flags.DEFINE_string('checkpoint_url', default=None, help='checkpoint url for model')
    tf.flags.DEFINE_string('model_name', default=None, help='url for train annotation files')
    tf.flags.DEFINE_list('class_names', default=class_names.split(','),  # 'helmet,helmet-on,person,helmet-off'
                         help='label names for the training datasets')
    tf.flags.DEFINE_list('input_shape', default=[int(x) for x in input_shape.split(',')],
                         help='input_shape')  # [352, 640]
    tf.flags.DEFINE_integer('max_epochs', default=epochs, help='training number of epochs')
    tf.flags.DEFINE_integer('batch_size', default=batch_size, help='training batch size')
    tf.flags.DEFINE_boolean('load_imagenet_weights', default=False, help='if load imagenet weights or not')
    tf.flags.DEFINE_string('inference_device',
                           default='GPU',
                           help='which type of device is used to do inference, only CPU, GPU or 310D')
    tf.flags.DEFINE_boolean('copy_to_local', default=True, help='if load imagenet weights or not')
    tf.flags.DEFINE_integer('num_gpus', default=1, help='use number of gpus')
    tf.flags.DEFINE_boolean('finetuning', default=False, help='use number of gpus')
    tf.flags.DEFINE_boolean('label_changed', default=False, help='whether number of labels is changed or not')
    tf.flags.DEFINE_string('learning_rate', default='0.001', help='label names for the training datasets')
    tf.flags.DEFINE_string('obj_threshold', default=obj_threshold, help='label names for the training datasets')
    tf.flags.DEFINE_string('nms_threshold', default=nms_threshold, help='label names for the training datasets')
    tf.flags.DEFINE_string('net_type', default='resnet18', help='resnet18 or resnet18_nas')
    tf.flags.DEFINE_string('nas_sequence', default='64_1-2111-2-1112', help='resnet18 or resnet18_nas')
    tf.flags.DEFINE_string('deploy_model_format', default=None, help='the format for the converted model')
    tf.flags.DEFINE_string('result_url', default=None, help='result url for training')
    model = Interface()
    model = neptune.incremental_learning.train(model=model,
                                               train_data=train_data,
                                               epochs=epochs,
                                               batch_size=batch_size,
                                               class_names=class_names,
                                               input_shape=input_shape,
                                               obj_threshold=obj_threshold,
                                               nms_threshold=nms_threshold)
    # Save the model based on the config.
    # neptune.save_model(model)
 if __name__ == '__main__':
    main()
--- a/examples/helmet_detection_incremental_train/README.md
+++ b/examples/helmet_detection_incremental_train/README.md
@@ -1,34 +1,51 @@
 # Using Incremental Learning Job in Helmet Detection Scenario
 This document introduces how to use incremental learning job in helmet detectioni scenario. Using the incremental learning job, our application can automatically retrains, evaluates, and updates models based on the data generated at the edge.
 This document introduces how to use incremental learning job in helmet detectioni scenario. 
 Using the incremental learning job, our application can automatically retrains, evaluates, 
 and updates models based on the data generated at the edge.
 ## Helmet Detection Experiment
 ### Prepare Worker Image
 Build the worker image by referring to the [dockerfile](/build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile)
 and put the image to the `gm-config.yaml`'s  `imageHub` in [Install Neptune](#install-neptune)
 In this demo, we need to replace the requirement.txt to
 ```
 flask==1.1.2
 keras==2.4.3
 opencv-python==4.4.0.44
 websockets==8.1
 Pillow==8.0.1
 requests==2.24.0
 tqdm==4.56.0
 matplotlib==3.3.3
 ```
 ### Install Neptune
 Follow the [Neptune installation document](/docs/setup/install.md) to install Neptune.
 ### Prepare Data and Model
 Download dataset and model to your node:
 * step 1: download [dataset](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz)
 * step 1: create dataset directory:
 ```
 mkdir -p /data/helmet_detection
 cd /data/helmet_detection
 tar -zxvf dataset.tar.gz
 ```
 * step 2: download [base model](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/model.tar.gz)
 ```
 mkdir /model
 cd /model
 wget https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz
 tar -zxvf model.tar.gz
 ```
 ### Prepare Script
 Download the [scripts](/examples/helmet_detection/training) to the path `code` of your node
 Download the [scripts](/examples/helmet_detection_incremental_train/training) to the path `code` of your node
 ### Create Incremental Job
 Create Namespace `kubectl create ns neptune-test`
 Create Dataset
 ```
@@ -45,7 +62,7 @@ spec:
 EOF
 ```
 Create Initial Model
 Create Initial Model to simulate the initial model in incremental learning scenario.
 ```
 kubectl create -f - <<EOF
@@ -163,10 +180,10 @@ EOF
 ### Mock Video Stream for Inference in Edge Side
 * step1: install the open source video streaming server [EasyDarwin](https://github.com/EasyDarwin/EasyDarwin/tree/dev).
 * step2: start EasyDarwin server.
 * step3: download [video](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/video.tar.gz).
 * step4: push a video stream to the url (e.g., `rtsp://localhost/video`) that the inference service can connect.
 * step 1: install the open source video streaming server [EasyDarwin](https://github.com/EasyDarwin/EasyDarwin/tree/dev).
 * step 2: start EasyDarwin server.
 * step 3: download [video](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/video.tar.gz).
 * step 4: push a video stream to the url (e.g., `rtsp://localhost/video`) that the inference service can connect.
 ```
 wget https://github.com/EasyDarwin/EasyDarwin/releases/download/v8.1.0/EasyDarwin-linux-8.1.0-1901141151.tar.gz --no-check-certificate
@@ -180,13 +197,41 @@ tar -zxvf video.tar.gz
 ffmpeg -re -i /data/video/helmet-detection.mp4 -vcodec libx264 -f rtsp rtsp://localhost/video
 ```
 ### Check Incremental Job Result
 ### Check Incremental Learning Job
 query the service status
 ```
 kubectl get incrementallearningjob helmet-detection-demo -n neptune-test
 ```
 In the `IncrementalLearningJob` resource helmet-detection-demo, the following trigger is configured:
 ```
 trigger:
  checkPeriodSeconds: 60
  timer:
    start: 02:00
    end: 04:00
  condition:
    operator: ">"
    threshold: 500
    metric: num_of_samples
 ```
 In a real word, we need to label the hard examples in `HE_SAVED_URL`  with annotation tools and then put the examples to `Dataset`'s url.   
 Without annotation tools, we can simulate the condition of `num_of_samples` in the following ways:  
 Download [dataset](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz) to our cloud0 node.
 ```
 cd /data/helmet_detection
 wget  https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz
 tar -zxvf dataset.tar.gz
 ```
 The LocalController component will check the number of the sample, realize trigger conditions are met and notice the GlobalManager Component to start train worker.
 When the train worker finish, we can view the updated model in the `/output` directory in cloud0 node.
 Then the eval worker will start to evaluate the model that train worker generated.
 after the job completed, we can view the updated model in the /output directory in cloud0 node
 If the eval result satisfy the `deploySpec`'s trigger 
 ```
 trigger:
  condition:
    operator: ">"
    threshold: 0.1
    metric: precision_delta
 ```
 the deploy worker will load the new model and provide service.
--- a/examples/helmet_detection_incremental_train/training/data_gen.py
+++ b/examples/helmet_detection_incremental_train/training/data_gen.py
@@ -215,13 +215,10 @@ class DataGen(object):
        return image_data, box_data
    def preprocess_true_boxes(self, true_boxes, in_shape=416):
        """
        Introduction
        ------------
            对训练数据的ground truth box进行预处理
        Parameters
        ----------
            true_boxes: ground truth box 形状为[boxes, 5], x_min, y_min, x_max, y_max, class_id
        """Preprocesses the ground truth box of the training data
        :param true_boxes: ground truth box shape is [boxes, 5], x_min, y_min,
            x_max, y_max, class_id
        """
        num_layers = self.anchors.shape[0] // 3
@@ -238,20 +235,21 @@ class DataGen(object):
        grid_shapes = [input_shape // 32, input_shape // 16, input_shape // 8]
        y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + self.num_classes),
                           dtype='float32') for l in range(num_layers)]
        # 这里扩充维度是为了后面应用广播计算每个图中所有box的anchor互相之间的iou
        # The dimension is expanded to calculate the IOU between the
        # anchors of all boxes in each graph by broadcasting
        anchors = np.expand_dims(self.anchors, 0)
        anchors_max = anchors / 2.
        anchors_min = -anchors_max
        # 因为之前对box做了padding, 因此需要去除全0行
        # Because we padded the box before, we need to remove all 0 lines
        valid_mask = boxes_wh[..., 0] > 0
        for b in range(m):
            wh = boxes_wh[b, valid_mask[b]]
            if len(wh) == 0: continue
            # 为了应用广播扩充维度
            # Expanding dimensions for broadcasting applications
            wh = np.expand_dims(wh, -2)
            # wh 的shape为[box_num, 1, 2]
            # wh shape is [box_num, 1, 2]
            boxes_max = wh / 2.
            boxes_min = -boxes_max
@@ -263,7 +261,10 @@ class DataGen(object):
            anchor_area = anchors[..., 0] * anchors[..., 1]
            iou = intersect_area / (box_area + anchor_area - intersect_area)
            # 找出和ground truth box的iou最大的anchor box, 然后将对应不同比例的负责该ground turth box 的位置置为ground truth box坐标
            # Find out the largest anchor box with the IOU of the ground truth
            # box, and then set the corresponding positions of different
            # proportions responsible for the ground turn box as the
            # coordinates of the ground truth box
            best_anchor = np.argmax(iou, axis=-1)
            for t, n in enumerate(best_anchor):
                for l in range(num_layers):
--- a/examples/helmet_detection_incremental_train/training/eval.py
+++ b/examples/helmet_detection_incremental_train/training/eval.py
@@ -19,13 +19,10 @@ def main():
    model = validate
    model = neptune.incremental_learning.evaluate(model=model,
                                                  test_data=test_data,
                                                  class_names=class_names,
                                                  input_shape=input_shape)
    # Save the model based on the config.
    # kubeedge_ai.incremental_learning.save_model(model)
    neptune.incremental_learning.evaluate(model=model,
                                          test_data=test_data,
                                          class_names=class_names,
                                          input_shape=input_shape)
 if __name__ == '__main__':
--- a/examples/helmet_detection_incremental_train/training/inference.py
+++ b/examples/helmet_detection_incremental_train/training/inference.py
--- a/examples/helmet_detection_incremental_train/training/interface.py
+++ b/examples/helmet_detection_incremental_train/training/interface.py
@@ -165,7 +165,7 @@ class Interface:
        logging.info("average checkpoints end .......")
    def save_model_pb(self):
    def save_model_pb(self, saved_model_name):
        """
        save model as a single pb file from checkpoint
        """
@@ -189,6 +189,6 @@ class Interface:
            print('output_tensors : ', output_tensors)
            output_tensors = [t.op.name for t in output_tensors]
            graph = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_tensors)
            tf.train.write_graph(graph, model.model_dir, 'model.pb', False)
            tf.train.write_graph(graph, model.model_dir, saved_model_name, False)
        logging.info("save model as .pb end .......")
--- a/examples/helmet_detection_incremental_train/training/resnet18.py
+++ b/examples/helmet_detection_incremental_train/training/resnet18.py
@@ -19,7 +19,8 @@ def _residual_block_first(x, is_training, out_channel, strides, name="unit"):
            if strides == 1:
                shortcut = tf.identity(x)
            else:
                shortcut = tf.nn.max_pool(x, [1, strides, strides, 1], [1, strides, strides, 1], 'VALID')
                shortcut = tf.nn.max_pool(x, [1, strides, strides, 1],
                                          [1, strides, strides, 1], 'VALID')
        else:
            shortcut = _conv(x, 1, out_channel, strides, name='shortcut')
        # Residual
@@ -58,7 +59,6 @@ def _residual_block(x, is_training, name="unit"):
    return x
 #
 def _conv(x, filter_size, out_channel, strides, name="conv"):
    """
    Helper functions(counts FLOPs and number of weights)
@@ -66,20 +66,26 @@ def _conv(x, filter_size, out_channel, strides, name="conv"):
    in_shape = x.get_shape()
    with tf.variable_scope(name):
        # Main operation: conv2d
        kernel = tf.get_variable('kernel', [filter_size, filter_size, in_shape[3], out_channel], tf.float32,
        kernel = tf.get_variable('kernel',
                                 [filter_size, filter_size, in_shape[3],
                                  out_channel], tf.float32,
                                 initializer=tf.random_normal_initializer(
                                     stddev=np.sqrt(2.0 / filter_size / filter_size / out_channel)))
                                     stddev=np.sqrt(
                                         2.0 / filter_size / filter_size / out_channel)))
        if kernel not in tf.get_collection(WEIGHT_DECAY_KEY):
            tf.add_to_collection(WEIGHT_DECAY_KEY, kernel)
        if strides == 1:
            conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], padding='SAME')
            conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1],
                                padding='SAME')
        else:
            kernel_size_effective = filter_size
            pad_total = kernel_size_effective - 1
            pad_beg = pad_total // 2
            pad_end = pad_total - pad_beg
            x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
            conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], padding='VALID')
            x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end],
                           [0, 0]])
            conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1],
                                padding='VALID')
    return conv
@@ -88,8 +94,9 @@ def _fc(x, out_dim, name="fc"):
        # Main operation: fc
        with tf.device('/CPU:0'):
            w = tf.get_variable('weights', [x.get_shape()[1], out_dim],
                                tf.float32, initializer=tf.random_normal_initializer(
                    stddev=np.sqrt(1.0 / out_dim)))
                                tf.float32,
                                initializer=tf.random_normal_initializer(
                                    stddev=np.sqrt(1.0 / out_dim)))
            b = tf.get_variable('biases', [out_dim], tf.float32,
                                initializer=tf.constant_initializer(0.0))
        if w not in tf.get_collection(WEIGHT_DECAY_KEY):
@@ -100,7 +107,9 @@ def _fc(x, out_dim, name="fc"):
 def _bn(x, is_training, name="bn"):
    bn = tf.layers.batch_normalization(inputs=x, momentum=0.99, epsilon=1e-5,
                                       center=True, scale=True, training=is_training, name=name, fused=True)
                                       center=True, scale=True,
                                       training=is_training, name=name,
                                       fused=True)
    return bn
@@ -140,17 +149,20 @@ class ResNet18(object):
        self.end_points['conv2_output'] = x
        # conv3_x
        x = _residual_block_first(x, is_training, filters[2], strides[2], name='conv3_1')
        x = _residual_block_first(x, is_training, filters[2], strides[2],
                                  name='conv3_1')
        x = _residual_block(x, is_training, name='conv3_2')
        self.end_points['conv3_output'] = x
        # conv4_x
        x = _residual_block_first(x, is_training, filters[3], strides[3], name='conv4_1')
        x = _residual_block_first(x, is_training, filters[3], strides[3],
                                  name='conv4_1')
        x = _residual_block(x, is_training, name='conv4_2')
        self.end_points['conv4_output'] = x
        # conv5_x
        x = _residual_block_first(x, is_training, filters[4], strides[4], name='conv5_1')
        x = _residual_block_first(x, is_training, filters[4], strides[4],
                                  name='conv5_1')
        x = _residual_block(x, is_training, name='conv5_2')
        self.end_points['conv5_output'] = x
--- a/examples/helmet_detection_incremental_train/training/train.py
+++ b/examples/helmet_detection_incremental_train/training/train.py
@@ -0,0 +1,87 @@
 import logging
 import tensorflow as tf
 import neptune
 from interface import Interface
 from neptune.incremental_learning import IncrementalConfig
 LOG = logging.getLogger(__name__)
 MODEL_URL = IncrementalConfig().model_url
 def main():
    tf.set_random_seed(22)
    class_names = neptune.context.get_parameters("class_names")
    # load dataset.
    train_data = neptune.load_train_dataset(data_format='txt',
                                            with_image=False)
    # read parameters from deployment config.
    obj_threshold = neptune.context.get_parameters("obj_threshold")
    nms_threshold = neptune.context.get_parameters("nms_threshold")
    input_shape = neptune.context.get_parameters("input_shape")
    epochs = neptune.context.get_parameters('epochs')
    batch_size = neptune.context.get_parameters('batch_size')
    tf.flags.DEFINE_string('train_url', default=MODEL_URL,
                           help='train url for model')
    tf.flags.DEFINE_string('log_url', default=None, help='log url for model')
    tf.flags.DEFINE_string('checkpoint_url', default=None,
                           help='checkpoint url for model')
    tf.flags.DEFINE_string('model_name', default=None,
                           help='url for train annotation files')
    tf.flags.DEFINE_list('class_names', default=class_names.split(','),
                         # 'helmet,helmet-on,person,helmet-off'
                         help='label names for the training datasets')
    tf.flags.DEFINE_list('input_shape',
                         default=[int(x) for x in input_shape.split(',')],
                         help='input_shape')  # [352, 640]
    tf.flags.DEFINE_integer('max_epochs', default=epochs,
                            help='training number of epochs')
    tf.flags.DEFINE_integer('batch_size', default=batch_size,
                            help='training batch size')
    tf.flags.DEFINE_boolean('load_imagenet_weights', default=False,
                            help='if load imagenet weights or not')
    tf.flags.DEFINE_string('inference_device',
                           default='GPU',
                           help='which type of device is used to do inference,'
                                ' only CPU, GPU or 310D')
    tf.flags.DEFINE_boolean('copy_to_local', default=True,
                            help='if load imagenet weights or not')
    tf.flags.DEFINE_integer('num_gpus', default=1, help='use number of gpus')
    tf.flags.DEFINE_boolean('finetuning', default=False,
                            help='use number of gpus')
    tf.flags.DEFINE_boolean('label_changed', default=False,
                            help='whether number of labels is changed or not')
    tf.flags.DEFINE_string('learning_rate', default='0.001',
                           help='label names for the training datasets')
    tf.flags.DEFINE_string('obj_threshold', default=obj_threshold,
                           help='label names for the training datasets')
    tf.flags.DEFINE_string('nms_threshold', default=nms_threshold,
                           help='label names for the training datasets')
    tf.flags.DEFINE_string('net_type', default='resnet18',
                           help='resnet18 or resnet18_nas')
    tf.flags.DEFINE_string('nas_sequence', default='64_1-2111-2-1112',
                           help='resnet18 or resnet18_nas')
    tf.flags.DEFINE_string('deploy_model_format', default=None,
                           help='the format for the converted model')
    tf.flags.DEFINE_string('result_url', default=None,
                           help='result url for training')
    model = Interface()
    neptune.incremental_learning.train(model=model,
                                       train_data=train_data,
                                       epochs=epochs,
                                       batch_size=batch_size,
                                       class_names=class_names,
                                       input_shape=input_shape,
                                       obj_threshold=obj_threshold,
                                       nms_threshold=nms_threshold)
 if __name__ == '__main__':
    main()
--- a/examples/helmet_detection_incremental_train/training/validate_utils.py
+++ b/examples/helmet_detection_incremental_train/training/validate_utils.py
@@ -1,18 +1,3 @@
 # -*- coding: utf-8 -*-
 # Copyright 2019 ModelArts Service of Huawei Cloud. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -40,14 +25,14 @@ def add_path(path):
 def init_yolo(model_path, input_shape):
    print('model_path : ', model_path)
    # 初始化session，需绑定对应的Graph
    # initialize the session and bind the corresponding graph
    yolo_graph = tf.Graph()
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.1
    yolo_session = tf.Session(graph=yolo_graph, config=config)
    # 初始化yoloInference对象
    # initialize yoloInference object
    yolo_infer = YOLOInference(yolo_session, model_path, input_shape)
    return yolo_infer, yolo_session
@@ -148,8 +133,6 @@ def validate_img_file(yolo_infer, yolo_session, img_file, bbox_list_ground, fold
        img_file = img_file.split("/")[-1]
        cv2.imwrite(os.path.join(folder_out, img_file), img)
    # print ('\tbbox_list_pred : ', bbox_list_pred)
    # print ('\tbbox_list_ground : ', bbox_list_ground)
    count_correct = [0 for ix in range(class_num)]
    count_ground = [0 for ix in range(class_num)]
    count_pred = [0 for ix in range(class_num)]
@@ -163,15 +146,12 @@ def validate_img_file(yolo_infer, yolo_session, img_file, bbox_list_ground, fold
    for iy in range(count_pred_all):
        bbox_pred = [bbox_list_pred[iy][1], bbox_list_pred[iy][0], bbox_list_pred[iy][3], bbox_list_pred[iy][2]]
        # bbox_draw_on_img_cv(img_data, bbox_pred, colors[labels[iy]])
        # if bbox_pred[2]-bbox_pred[0] > 30 and bbox_pred[3]-bbox_pred[1] > 30:
        LOG.debug(f'count_pred={count_pred}, labels[iy]={labels[iy]}')
        count_pred[labels[iy]] += 1
        for ix in range(count_ground_all):
            bbox_ground = [int(x) for x in bbox_list_ground[ix].split(',')]
            class_ground = bbox_ground[4]
            # bbox_draw_on_img_cv(img_data, bbox_ground, (40, 39, 214))
            if labels[iy] == class_ground:
                iou = calc_iou(bbox_pred, bbox_ground)
@@ -225,8 +205,8 @@ def draw_boxes(img, labels, scores, bboxes, class_names, colors):
 def calc_iou(bbox_pred, bbox_ground):
    """
    自定义函数，计算两矩形 IOU，传入为均为矩形对角线，（x,y）  坐标。
    """user-define function for calculating the IOU of two matrixes. The
        input parameters are rectangle diagonals
    """
    x1 = bbox_pred[0]
    y1 = bbox_pred[1]
@@ -247,9 +227,9 @@ def calc_iou(bbox_pred, bbox_ground):
    height = height1 + height2 - (endy - starty)
    if width <= 0 or height <= 0:
        iou = 0  # 重叠率为 0
        iou = 0
    else:
        area = width * height  # 两矩形相交面积
        area = width * height
        area1 = width1 * height1
        area2 = width2 * height2
        iou = area * 1. / (area1 + area2 - area)
--- a/examples/helmet_detection_incremental_train/training/yolo3_multiscale.py
+++ b/examples/helmet_detection_incremental_train/training/yolo3_multiscale.py
@@ -14,13 +14,6 @@ flags = tf.flags.FLAGS
 class Yolo3:
    def __init__(self, sess, is_training, config):
        """
        Introduction
        ------------
            初始化函数
        ----------
        """
        LOG.info('is_training: %s' % is_training)
        LOG.info('model dir: %s' % flags.train_url)
        LOG.info('input_shape: (%d, %d)' % (flags.input_shape[0], flags.input_shape[1]))
@@ -112,8 +105,7 @@ class Yolo3:
            sess.run(v.assign(data[vname]))
    def step(self, sess, batch_data, is_training):
        """
        step, read one batch, generate gradients
        """step, read one batch, generate gradients
        """
        # Input feed
@@ -132,21 +124,19 @@ class Yolo3:
        return outputs[0]  # loss
    def _batch_normalization_layer(self, input_layer, name=None, training=True, norm_decay=0.997, norm_epsilon=1e-5):
        '''
        Introduction
        ------------
            对卷积层提取的feature map使用batch normalization
        Parameters
        ----------
            input_layer: 输入的四维tensor
            name: batchnorm层的名字
            trainging: 是否为训练过程
            norm_decay: 在预测时计算moving average时的衰减率
            norm_epsilon: 方差加上极小的数，防止除以0的情况
        Returns
        -------
            bn_layer: batch normalization处理之后的feature map
        '''
        """Batch normalization is used for feature map extracted from
            convolution layer
        :param input_layer: four dimensional tensor of input
        :param name: the name of batchnorm layer
        :param training: is training or not
        :param norm_decay: The decay rate of moving average is calculated
            during prediction
        :param norm_epsilon: Variance plus a minimal number to prevent
            division by 0
        :return bn_layer: batch normalization处理之后的feature map
        """
        bn_layer = tf.layers.batch_normalization(inputs=input_layer,
                                                 momentum=norm_decay, epsilon=norm_epsilon, center=True,
                                                 scale=True, training=training, name=name, fused=True)
@@ -154,29 +144,20 @@ class Yolo3:
        # return tf.nn.leaky_relu(bn_layer, alpha = 0.1)
    def _conv2d_layer(self, inputs, filters_num, kernel_size, name, use_bias=False, strides=1):
        """
        Introduction
        ------------
            使用tf.layers.conv2d减少权重和偏置矩阵初始化过程，以及卷积后加上偏置项的操作
            经过卷积之后需要进行batch norm，最后使用leaky ReLU激活函数
            根据卷积时的步长，如果卷积的步长为2，则对图像进行降采样
            比如，输入图片的大小为416*416，卷积核大小为3，若stride为2时，（416 - 3 + 2）/ 2 + 1， 计算结果为208，相当于做了池化层处理
            因此需要对stride大于1的时候，先进行一个padding操作, 采用四周都padding一维代替'same'方式
        Parameters
        ----------
            inputs: 输入变量
            filters_num: 卷积核数量
            strides: 卷积步长
            name: 卷积层名字
            trainging: 是否为训练过程
            use_bias: 是否使用偏置项
            kernel_size: 卷积核大小
        Returns
        -------
            conv: 卷积之后的feature map
        """Use tf.layers.conv2d Reduce the weight and bias matrix
            initialization process, as well as convolution plus bias operation
        :param inputs: Input variables
        :param filters_num: Number of convolution kernels
        :param strides: Convolution step
        :param name: Convolution layer name
        :param training: is a training process or not
        :param use_bias: use bias or not
        :param kernel_size: the kernels size
        :return conv: Feature map after convolution
        """
        if strides > 1:  # modified 0327
            # 在输入feature map的长宽维度进行padding
            inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [1, 0], [0, 0]], mode='CONSTANT')
        conv = tf.layers.conv2d(inputs=inputs, filters=filters_num,
                                kernel_size=kernel_size, strides=[strides, strides],
@@ -187,25 +168,6 @@ class Yolo3:
    def _Residual_block(self, inputs, filters_num, blocks_num, conv_index, training=True, norm_decay=0.997,
                        norm_epsilon=1e-5):
        """
        Introduction
        ------------
            Darknet的残差block，类似resnet的两层卷积结构，分别采用1x1和3x3的卷积核，使用1x1是为了减少channel的维度
        Parameters
        ----------
            inputs: 输入变量
            filters_num: 卷积核数量
            trainging: 是否为训练过程
            blocks_num: block的数量
            conv_index: 为了方便加载预训练权重，统一命名序号
            weights_dict: 加载预训练模型的权重
            norm_decay: 在预测时计算moving average时的衰减率
            norm_epsilon: 方差加上极小的数，防止除以0的情况
        Returns
        -------
            inputs: 经过残差网络处理后的结果
        """
        layer = self._conv2d_layer(inputs, filters_num, kernel_size=3, strides=2, name="conv2d_" + str(conv_index))
        layer = self._batch_normalization_layer(layer, name="batch_normalization_" + str(conv_index), training=training,
                                                norm_decay=norm_decay, norm_epsilon=norm_epsilon)
@@ -237,25 +199,6 @@ class Yolo3:
    def _yolo_block(self, inputs, filters_num, out_filters, conv_index, training=True, norm_decay=0.997,
                    norm_epsilon=1e-5):
        """
        Introduction
        ------------
            yolo3在Darknet53提取的特征层基础上，又加了针对3种不同比例的feature map的block，这样来提高对小物体的检测率
        Parameters
        ----------
            inputs: 输入特征
            filters_num: 卷积核数量
            out_filters: 最后输出层的卷积核数量
            conv_index: 卷积层数序号，方便根据名字加载预训练权重
            training: 是否为训练
            norm_decay: 在预测时计算moving average时的衰减率
            norm_epsilon: 方差加上极小的数，防止除以0的情况
        Returns
        -------
            route: 返回最后一层卷积的前一层结果
            conv: 返回最后一层卷积的结果
            conv_index: conv层计数
        """
        conv = self._conv2d_layer(inputs, filters_num=filters_num, kernel_size=1, strides=1,
                                  name="conv2d_" + str(conv_index))
        conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training,
@@ -293,18 +236,6 @@ class Yolo3:
        return route, conv, conv_index
    def yolo_inference(self, features_out, filters_yolo_block, conv_index, num_anchors, num_classes, training=True):
        """
        Introduction
        ------------
            构建yolo模型结构
        Parameters
        ----------
            inputs:       模型的输入变量
            num_anchors:  每个grid cell负责检测的anchor数量
            num_classes:  类别数量
            training:     是否为训练模式
        """
        conv = features_out[0]
        conv2d_45 = features_out[1]
        conv2d_26 = features_out[2]
@@ -368,36 +299,15 @@ class Yolo3:
        return [conv2d_59, conv2d_67, conv2d_75]
    def yolo_head(self, feats, anchors, num_classes, input_shape, training=True):
        """
        Introduction
        ------------
            根据不同大小的feature map做多尺度的检测，三种feature map大小分别为13x13x1024, 26x26x512, 52x52x256
        Parameters
        ----------
            feats: 输入的特征feature map
            anchors: 针对不同大小的feature map的anchor
            num_classes: 类别的数量
            input_shape: 图像的输入大小，一般为416
            trainging: 是否训练，用来控制返回不同的值
        Returns
        -------
        """
        print('feats : ', feats)
        print('anchors : ', anchors)
        print('input_shape : ', input_shape)
        num_anchors = len(anchors)
        anchors_tensor = tf.reshape(tf.constant(anchors, dtype=tf.float32), [1, 1, 1, num_anchors, 2])
        grid_size = tf.shape(feats)[1:3]
        predictions = tf.reshape(feats, [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5])
        # 这里构建13*13*1*2的矩阵，对应每个格子加上对应的坐标
        grid_y = tf.tile(tf.reshape(tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1])
        grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [1, -1, 1, 1]), [grid_size[0], 1, 1, 1])
        grid = tf.concat([grid_x, grid_y], axis=-1)
        grid = tf.cast(grid, tf.float32)
        # 将x,y坐标归一化为占416的比例
        box_xy = (tf.sigmoid(predictions[..., :2]) + grid) / tf.cast(grid_size[::-1], tf.float32)
        # 将w,h也归一化为占416的比例
        box_wh = tf.exp(predictions[..., 2:4]) * anchors_tensor / input_shape[::-1]
        box_confidence = tf.sigmoid(predictions[..., 4:5])
        box_class_probs = tf.sigmoid(predictions[..., 5:])
@@ -406,18 +316,6 @@ class Yolo3:
        return box_xy, box_wh, box_confidence, box_class_probs
    def yolo_boxes_scores(self, feats, anchors, num_classes, input_shape, image_shape):
        """
        Introduction
        ------------
            该函数是将box的坐标修正，除去之前按照长宽比缩放填充的部分，最后将box的坐标还原成相对原始图片的
        Parameters
        ----------
            feats: 模型输出feature map
            anchors: 模型anchors
            num_classes: 数据集类别数
            input_shape: 训练输入图片大小
            image_shape: 原始图片的大小
        """
        input_shape = tf.cast(input_shape, tf.float32)
        image_shape = tf.cast(image_shape, tf.float32)
        box_xy, box_wh, box_confidence, box_class_probs = self.yolo_head(feats, anchors, num_classes, input_shape,
@@ -446,18 +344,6 @@ class Yolo3:
        return boxes, boxes_scores
    def box_iou(self, box1, box2):
        """
        Introduction
        ------------
            计算box tensor之间的iou
        Parameters
        ----------
            box1: shape=[grid_size, grid_size, anchors, xywh]
            box2: shape=[box_num, xywh]
        Returns
        -------
            iou:
        """
        box1 = tf.expand_dims(box1, -2)
        box1_xy = box1[..., :2]
        box1_wh = box1[..., 2:4]
@@ -480,35 +366,16 @@ class Yolo3:
        return iou
    def yolo_loss(self, yolo_output, y_true, anchors, num_classes, ignore_thresh=.5):
        """
        Introduction
        ------------
            yolo模型的损失函数
        Parameters
        ----------
            yolo_output: yolo模型的输出
            y_true: 经过预处理的真实标签，shape为[batch, grid_size, grid_size, 5 + num_classes]
            anchors: yolo模型对应的anchors
            num_classes: 类别数量
            ignore_thresh: 小于该阈值的box我们认为没有物体
        Returns
        -------
            loss: 每个batch的平均损失值
            accuracy
        """
        loss = 0.0
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
        input_shape = tf.shape(yolo_output[0])[1: 3] * 32
        input_shape = tf.cast(input_shape, tf.float32)
        grid_shapes = [tf.cast(tf.shape(yolo_output[l])[1:3], tf.float32) for l in range(3)]
        for index in range(3):
            # 只有负责预测ground truth box的grid对应的为1, 才计算相对应的loss
            # object_mask的shape为[batch_size, grid_size, grid_size, 3, 1]
            object_mask = y_true[index][..., 4:5]
            class_probs = y_true[index][..., 5:]
            grid, predictions, pred_xy, pred_wh = self.yolo_head(yolo_output[index], anchors[anchor_mask[index]],
                                                                 num_classes, input_shape, training=True)
            # pred_box的shape为[batch, box_num, 4]
            pred_box = tf.concat([pred_xy, pred_wh], axis=-1)
            raw_true_xy = y_true[index][..., :2] * grid_shapes[index][::-1] - grid
            object_mask_bool = tf.cast(object_mask, dtype=tf.bool)
@@ -516,16 +383,13 @@ class Yolo3:
                tf.where(tf.equal(y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1], 0),
                         tf.ones_like(y_true[index][..., 2:4]),
                         y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1]))
            # 该系数是用来调整box坐标loss的系数
            box_loss_scale = 2 - y_true[index][..., 2:3] * y_true[index][..., 3:4]
            ignore_mask = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True)
            def loop_body(internal_index, ignore_mask):
                # true_box的shape为[box_num, 4]
                true_box = tf.boolean_mask(y_true[index][internal_index, ..., 0:4],
                                           object_mask_bool[internal_index, ..., 0])
                iou = self.box_iou(pred_box[internal_index], true_box)
                # 计算每个true_box对应的预测的iou最大的box
                best_iou = tf.reduce_max(iou, axis=-1)
                ignore_mask = ignore_mask.write(internal_index, tf.cast(best_iou < ignore_thresh, tf.float32))
                return internal_index + 1, ignore_mask
@@ -535,7 +399,6 @@ class Yolo3:
                [0, ignore_mask])
            ignore_mask = ignore_mask.stack()
            ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
            # 计算四个部分的loss
            xy_loss = object_mask * box_loss_scale * tf.nn.sigmoid_cross_entropy_with_logits(
                labels=raw_true_xy,
                logits=predictions[..., 0:2])
@@ -557,27 +420,11 @@ class Yolo3:
        return loss
    def yolo_eval(self, yolo_outputs, image_shape, max_boxes=20):
        """
        Introduction
        ------------
            根据Yolo模型的输出进行非极大值抑制，获取最后的物体检测框和物体检测类别
        Parameters
        ----------
            yolo_outputs: yolo模型输出
            image_shape: 图片的大小
            max_boxes:  最大box数量
        Returns
        -------
            boxes_: 物体框的位置
            scores_: 物体类别的概率
            classes_: 物体类别
        """
        with tf.variable_scope('boxes_scores'):
            anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
            boxes = []
            box_scores = []
            input_shape = tf.shape(yolo_outputs[0])[1: 3] * 32
            # 对三个尺度的输出获取每个预测box坐标和box的分数，score计算为置信度x类别概率
            for i in range(len(yolo_outputs)):
                _boxes, _box_scores = self.yolo_boxes_scores(yolo_outputs[i], self.anchors[anchor_mask[i]],
                                                             len(self.class_names), input_shape, image_shape)
@@ -627,9 +474,6 @@ class YoloConfig:
    norm_decay = 0.99
    norm_epsilon = 1e-5
    ignore_thresh = 0.5
    # learning_rate = 1e-3
    # obj_threshold = 0.3
    # nms_threshold = 0.4
 class YOLOInference(object):
--- a/lib/neptune/incremental_learning/incremental_learning.py
+++ b/lib/neptune/incremental_learning/incremental_learning.py
@@ -20,6 +20,7 @@ class IncrementalConfig(BaseConfig):
        BaseConfig.__init__(self)
        self.model_urls = os.getenv("MODEL_URLS")
        self.base_model_url = os.getenv("BASE_MODEL_URL")
        self.saved_model_name = "model.pb"
 def train(model, train_data, epochs, batch_size, class_names, input_shape,
@@ -40,12 +41,12 @@ def train(model, train_data, epochs, batch_size, class_names, input_shape,
    clean_folder(il_config.model_url)
    model.train(train_data, [])  # validation data is empty.
    tf.reset_default_graph()
    model.save_model_pb()
    model.save_model_pb(il_config.saved_model_name)
    ckpt_model_url = remove_path_prefix(il_config.model_url,
                                        il_config.data_path_prefix)
    pb_model_url = remove_path_prefix(
        os.path.join(il_config.model_url, 'model.pb'),
        os.path.join(il_config.model_url, il_config.saved_model_name),
        il_config.data_path_prefix)
    # TODO delete metrics whether affect lc
@@ -156,8 +157,8 @@ class Inference:
    def inference(self, img_data) -> InferenceResult:
        result = self.model.inference(img_data)
        bboxes = deal_infer_rsl(result)
        is_hard_example = self.hard_example_mining_algorithm.hard_judge(bboxes)
        rsl = deal_infer_rsl(result)
        is_hard_example = self.hard_example_mining_algorithm.hard_judge(rsl)
        if is_hard_example:
            return InferenceResult(True, result)
        else:
@@ -166,9 +167,9 @@ class Inference:
 def deal_infer_rsl(model_output):
    all_classes, all_scores, all_bboxes = model_output
    bboxes = []
    rsl = []
    for c, s, bbox in zip(all_classes, all_scores, all_bboxes):
        bbox[0], bbox[1], bbox[2], bbox[3] = bbox[1], bbox[0], bbox[3], bbox[2]
        bboxes.append(bbox.tolist() + [s, c])
        rsl.append(bbox.tolist() + [s, c])
    return bboxes
    return rsl
--- a/lib/requirements.txt
+++ b/lib/requirements.txt
@@ -4,5 +4,3 @@ opencv-python==4.4.0.44
 websockets==8.1
 Pillow==8.0.1
 requests==2.24.0
 tqdm==4.56.0
 matplotlib==3.3.3