Signed-off-by: khalid-davis <huangqinkai1@huawei.com>tags/v0.1.0
| @@ -10,4 +10,4 @@ ENV PYTHONPATH "/home/lib" | |||||
| WORKDIR /home/work | WORKDIR /home/work | ||||
| COPY ./lib /home/lib | COPY ./lib /home/lib | ||||
| ENTRYPOINT ["python"] | |||||
| ENTRYPOINT ["python"] | |||||
| @@ -1,70 +0,0 @@ | |||||
| import logging | |||||
| import tensorflow as tf | |||||
| import neptune | |||||
| from interface import Interface | |||||
| from neptune.incremental_learning.incremental_learning import IncrementalConfig | |||||
| LOG = logging.getLogger(__name__) | |||||
| MODEL_URL = IncrementalConfig().model_url | |||||
| def main(): | |||||
| tf.set_random_seed(22) | |||||
| class_names = neptune.context.get_parameters("class_names") | |||||
| # load dataset. | |||||
| train_data = neptune.load_train_dataset(data_format='txt', with_image=False) | |||||
| # read parameters from deployment config. | |||||
| obj_threshold = neptune.context.get_parameters("obj_threshold") | |||||
| nms_threshold = neptune.context.get_parameters("nms_threshold") | |||||
| input_shape = neptune.context.get_parameters("input_shape") | |||||
| epochs = neptune.context.get_parameters('epochs') | |||||
| batch_size = neptune.context.get_parameters('batch_size') | |||||
| tf.flags.DEFINE_string('train_url', default=MODEL_URL, help='train url for model') | |||||
| tf.flags.DEFINE_string('log_url', default=None, help='log url for model') | |||||
| tf.flags.DEFINE_string('checkpoint_url', default=None, help='checkpoint url for model') | |||||
| tf.flags.DEFINE_string('model_name', default=None, help='url for train annotation files') | |||||
| tf.flags.DEFINE_list('class_names', default=class_names.split(','), # 'helmet,helmet-on,person,helmet-off' | |||||
| help='label names for the training datasets') | |||||
| tf.flags.DEFINE_list('input_shape', default=[int(x) for x in input_shape.split(',')], | |||||
| help='input_shape') # [352, 640] | |||||
| tf.flags.DEFINE_integer('max_epochs', default=epochs, help='training number of epochs') | |||||
| tf.flags.DEFINE_integer('batch_size', default=batch_size, help='training batch size') | |||||
| tf.flags.DEFINE_boolean('load_imagenet_weights', default=False, help='if load imagenet weights or not') | |||||
| tf.flags.DEFINE_string('inference_device', | |||||
| default='GPU', | |||||
| help='which type of device is used to do inference, only CPU, GPU or 310D') | |||||
| tf.flags.DEFINE_boolean('copy_to_local', default=True, help='if load imagenet weights or not') | |||||
| tf.flags.DEFINE_integer('num_gpus', default=1, help='use number of gpus') | |||||
| tf.flags.DEFINE_boolean('finetuning', default=False, help='use number of gpus') | |||||
| tf.flags.DEFINE_boolean('label_changed', default=False, help='whether number of labels is changed or not') | |||||
| tf.flags.DEFINE_string('learning_rate', default='0.001', help='label names for the training datasets') | |||||
| tf.flags.DEFINE_string('obj_threshold', default=obj_threshold, help='label names for the training datasets') | |||||
| tf.flags.DEFINE_string('nms_threshold', default=nms_threshold, help='label names for the training datasets') | |||||
| tf.flags.DEFINE_string('net_type', default='resnet18', help='resnet18 or resnet18_nas') | |||||
| tf.flags.DEFINE_string('nas_sequence', default='64_1-2111-2-1112', help='resnet18 or resnet18_nas') | |||||
| tf.flags.DEFINE_string('deploy_model_format', default=None, help='the format for the converted model') | |||||
| tf.flags.DEFINE_string('result_url', default=None, help='result url for training') | |||||
| model = Interface() | |||||
| model = neptune.incremental_learning.train(model=model, | |||||
| train_data=train_data, | |||||
| epochs=epochs, | |||||
| batch_size=batch_size, | |||||
| class_names=class_names, | |||||
| input_shape=input_shape, | |||||
| obj_threshold=obj_threshold, | |||||
| nms_threshold=nms_threshold) | |||||
| # Save the model based on the config. | |||||
| # neptune.save_model(model) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @@ -1,34 +1,51 @@ | |||||
| # Using Incremental Learning Job in Helmet Detection Scenario | # Using Incremental Learning Job in Helmet Detection Scenario | ||||
| This document introduces how to use incremental learning job in helmet detectioni scenario. Using the incremental learning job, our application can automatically retrains, evaluates, and updates models based on the data generated at the edge. | |||||
| This document introduces how to use incremental learning job in helmet detectioni scenario. | |||||
| Using the incremental learning job, our application can automatically retrains, evaluates, | |||||
| and updates models based on the data generated at the edge. | |||||
| ## Helmet Detection Experiment | ## Helmet Detection Experiment | ||||
| ### Prepare Worker Image | |||||
| Build the worker image by referring to the [dockerfile](/build/worker/base_images/tensorflow/tensorflow-1.15.Dockerfile) | |||||
| and put the image to the `gm-config.yaml`'s `imageHub` in [Install Neptune](#install-neptune) | |||||
| In this demo, we need to replace the requirement.txt to | |||||
| ``` | |||||
| flask==1.1.2 | |||||
| keras==2.4.3 | |||||
| opencv-python==4.4.0.44 | |||||
| websockets==8.1 | |||||
| Pillow==8.0.1 | |||||
| requests==2.24.0 | |||||
| tqdm==4.56.0 | |||||
| matplotlib==3.3.3 | |||||
| ``` | |||||
| ### Install Neptune | ### Install Neptune | ||||
| Follow the [Neptune installation document](/docs/setup/install.md) to install Neptune. | Follow the [Neptune installation document](/docs/setup/install.md) to install Neptune. | ||||
| ### Prepare Data and Model | ### Prepare Data and Model | ||||
| Download dataset and model to your node: | |||||
| * step 1: download [dataset](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz) | |||||
| * step 1: create dataset directory: | |||||
| ``` | ``` | ||||
| mkdir -p /data/helmet_detection | mkdir -p /data/helmet_detection | ||||
| cd /data/helmet_detection | |||||
| tar -zxvf dataset.tar.gz | |||||
| ``` | ``` | ||||
| * step 2: download [base model](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/model.tar.gz) | * step 2: download [base model](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/model.tar.gz) | ||||
| ``` | ``` | ||||
| mkdir /model | mkdir /model | ||||
| cd /model | cd /model | ||||
| wget https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz | |||||
| tar -zxvf model.tar.gz | tar -zxvf model.tar.gz | ||||
| ``` | ``` | ||||
| ### Prepare Script | ### Prepare Script | ||||
| Download the [scripts](/examples/helmet_detection/training) to the path `code` of your node | |||||
| Download the [scripts](/examples/helmet_detection_incremental_train/training) to the path `code` of your node | |||||
| ### Create Incremental Job | ### Create Incremental Job | ||||
| Create Namespace `kubectl create ns neptune-test` | |||||
| Create Dataset | Create Dataset | ||||
| ``` | ``` | ||||
| @@ -45,7 +62,7 @@ spec: | |||||
| EOF | EOF | ||||
| ``` | ``` | ||||
| Create Initial Model | |||||
| Create Initial Model to simulate the initial model in incremental learning scenario. | |||||
| ``` | ``` | ||||
| kubectl create -f - <<EOF | kubectl create -f - <<EOF | ||||
| @@ -163,10 +180,10 @@ EOF | |||||
| ### Mock Video Stream for Inference in Edge Side | ### Mock Video Stream for Inference in Edge Side | ||||
| * step1: install the open source video streaming server [EasyDarwin](https://github.com/EasyDarwin/EasyDarwin/tree/dev). | |||||
| * step2: start EasyDarwin server. | |||||
| * step3: download [video](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/video.tar.gz). | |||||
| * step4: push a video stream to the url (e.g., `rtsp://localhost/video`) that the inference service can connect. | |||||
| * step 1: install the open source video streaming server [EasyDarwin](https://github.com/EasyDarwin/EasyDarwin/tree/dev). | |||||
| * step 2: start EasyDarwin server. | |||||
| * step 3: download [video](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/video.tar.gz). | |||||
| * step 4: push a video stream to the url (e.g., `rtsp://localhost/video`) that the inference service can connect. | |||||
| ``` | ``` | ||||
| wget https://github.com/EasyDarwin/EasyDarwin/releases/download/v8.1.0/EasyDarwin-linux-8.1.0-1901141151.tar.gz --no-check-certificate | wget https://github.com/EasyDarwin/EasyDarwin/releases/download/v8.1.0/EasyDarwin-linux-8.1.0-1901141151.tar.gz --no-check-certificate | ||||
| @@ -180,13 +197,41 @@ tar -zxvf video.tar.gz | |||||
| ffmpeg -re -i /data/video/helmet-detection.mp4 -vcodec libx264 -f rtsp rtsp://localhost/video | ffmpeg -re -i /data/video/helmet-detection.mp4 -vcodec libx264 -f rtsp rtsp://localhost/video | ||||
| ``` | ``` | ||||
| ### Check Incremental Job Result | |||||
| ### Check Incremental Learning Job | |||||
| query the service status | query the service status | ||||
| ``` | ``` | ||||
| kubectl get incrementallearningjob helmet-detection-demo -n neptune-test | kubectl get incrementallearningjob helmet-detection-demo -n neptune-test | ||||
| ``` | ``` | ||||
| In the `IncrementalLearningJob` resource helmet-detection-demo, the following trigger is configured: | |||||
| ``` | |||||
| trigger: | |||||
| checkPeriodSeconds: 60 | |||||
| timer: | |||||
| start: 02:00 | |||||
| end: 04:00 | |||||
| condition: | |||||
| operator: ">" | |||||
| threshold: 500 | |||||
| metric: num_of_samples | |||||
| ``` | |||||
| In a real word, we need to label the hard examples in `HE_SAVED_URL` with annotation tools and then put the examples to `Dataset`'s url. | |||||
| Without annotation tools, we can simulate the condition of `num_of_samples` in the following ways: | |||||
| Download [dataset](https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz) to our cloud0 node. | |||||
| ``` | |||||
| cd /data/helmet_detection | |||||
| wget https://edgeai-neptune.obs.cn-north-1.myhuaweicloud.com/examples/helmet-detection/dataset.tar.gz | |||||
| tar -zxvf dataset.tar.gz | |||||
| ``` | |||||
| The LocalController component will check the number of the sample, realize trigger conditions are met and notice the GlobalManager Component to start train worker. | |||||
| When the train worker finish, we can view the updated model in the `/output` directory in cloud0 node. | |||||
| Then the eval worker will start to evaluate the model that train worker generated. | |||||
| after the job completed, we can view the updated model in the /output directory in cloud0 node | |||||
| If the eval result satisfy the `deploySpec`'s trigger | |||||
| ``` | |||||
| trigger: | |||||
| condition: | |||||
| operator: ">" | |||||
| threshold: 0.1 | |||||
| metric: precision_delta | |||||
| ``` | |||||
| the deploy worker will load the new model and provide service. | |||||
| @@ -215,13 +215,10 @@ class DataGen(object): | |||||
| return image_data, box_data | return image_data, box_data | ||||
| def preprocess_true_boxes(self, true_boxes, in_shape=416): | def preprocess_true_boxes(self, true_boxes, in_shape=416): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 对训练数据的ground truth box进行预处理 | |||||
| Parameters | |||||
| ---------- | |||||
| true_boxes: ground truth box 形状为[boxes, 5], x_min, y_min, x_max, y_max, class_id | |||||
| """Preprocesses the ground truth box of the training data | |||||
| :param true_boxes: ground truth box shape is [boxes, 5], x_min, y_min, | |||||
| x_max, y_max, class_id | |||||
| """ | """ | ||||
| num_layers = self.anchors.shape[0] // 3 | num_layers = self.anchors.shape[0] // 3 | ||||
| @@ -238,20 +235,21 @@ class DataGen(object): | |||||
| grid_shapes = [input_shape // 32, input_shape // 16, input_shape // 8] | grid_shapes = [input_shape // 32, input_shape // 16, input_shape // 8] | ||||
| y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + self.num_classes), | y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + self.num_classes), | ||||
| dtype='float32') for l in range(num_layers)] | dtype='float32') for l in range(num_layers)] | ||||
| # 这里扩充维度是为了后面应用广播计算每个图中所有box的anchor互相之间的iou | |||||
| # The dimension is expanded to calculate the IOU between the | |||||
| # anchors of all boxes in each graph by broadcasting | |||||
| anchors = np.expand_dims(self.anchors, 0) | anchors = np.expand_dims(self.anchors, 0) | ||||
| anchors_max = anchors / 2. | anchors_max = anchors / 2. | ||||
| anchors_min = -anchors_max | anchors_min = -anchors_max | ||||
| # 因为之前对box做了padding, 因此需要去除全0行 | |||||
| # Because we padded the box before, we need to remove all 0 lines | |||||
| valid_mask = boxes_wh[..., 0] > 0 | valid_mask = boxes_wh[..., 0] > 0 | ||||
| for b in range(m): | for b in range(m): | ||||
| wh = boxes_wh[b, valid_mask[b]] | wh = boxes_wh[b, valid_mask[b]] | ||||
| if len(wh) == 0: continue | if len(wh) == 0: continue | ||||
| # 为了应用广播扩充维度 | |||||
| # Expanding dimensions for broadcasting applications | |||||
| wh = np.expand_dims(wh, -2) | wh = np.expand_dims(wh, -2) | ||||
| # wh 的shape为[box_num, 1, 2] | |||||
| # wh shape is [box_num, 1, 2] | |||||
| boxes_max = wh / 2. | boxes_max = wh / 2. | ||||
| boxes_min = -boxes_max | boxes_min = -boxes_max | ||||
| @@ -263,7 +261,10 @@ class DataGen(object): | |||||
| anchor_area = anchors[..., 0] * anchors[..., 1] | anchor_area = anchors[..., 0] * anchors[..., 1] | ||||
| iou = intersect_area / (box_area + anchor_area - intersect_area) | iou = intersect_area / (box_area + anchor_area - intersect_area) | ||||
| # 找出和ground truth box的iou最大的anchor box, 然后将对应不同比例的负责该ground turth box 的位置置为ground truth box坐标 | |||||
| # Find out the largest anchor box with the IOU of the ground truth | |||||
| # box, and then set the corresponding positions of different | |||||
| # proportions responsible for the ground turn box as the | |||||
| # coordinates of the ground truth box | |||||
| best_anchor = np.argmax(iou, axis=-1) | best_anchor = np.argmax(iou, axis=-1) | ||||
| for t, n in enumerate(best_anchor): | for t, n in enumerate(best_anchor): | ||||
| for l in range(num_layers): | for l in range(num_layers): | ||||
| @@ -19,13 +19,10 @@ def main(): | |||||
| model = validate | model = validate | ||||
| model = neptune.incremental_learning.evaluate(model=model, | |||||
| test_data=test_data, | |||||
| class_names=class_names, | |||||
| input_shape=input_shape) | |||||
| # Save the model based on the config. | |||||
| # kubeedge_ai.incremental_learning.save_model(model) | |||||
| neptune.incremental_learning.evaluate(model=model, | |||||
| test_data=test_data, | |||||
| class_names=class_names, | |||||
| input_shape=input_shape) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -165,7 +165,7 @@ class Interface: | |||||
| logging.info("average checkpoints end .......") | logging.info("average checkpoints end .......") | ||||
| def save_model_pb(self): | |||||
| def save_model_pb(self, saved_model_name): | |||||
| """ | """ | ||||
| save model as a single pb file from checkpoint | save model as a single pb file from checkpoint | ||||
| """ | """ | ||||
| @@ -189,6 +189,6 @@ class Interface: | |||||
| print('output_tensors : ', output_tensors) | print('output_tensors : ', output_tensors) | ||||
| output_tensors = [t.op.name for t in output_tensors] | output_tensors = [t.op.name for t in output_tensors] | ||||
| graph = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_tensors) | graph = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_tensors) | ||||
| tf.train.write_graph(graph, model.model_dir, 'model.pb', False) | |||||
| tf.train.write_graph(graph, model.model_dir, saved_model_name, False) | |||||
| logging.info("save model as .pb end .......") | logging.info("save model as .pb end .......") | ||||
| @@ -19,7 +19,8 @@ def _residual_block_first(x, is_training, out_channel, strides, name="unit"): | |||||
| if strides == 1: | if strides == 1: | ||||
| shortcut = tf.identity(x) | shortcut = tf.identity(x) | ||||
| else: | else: | ||||
| shortcut = tf.nn.max_pool(x, [1, strides, strides, 1], [1, strides, strides, 1], 'VALID') | |||||
| shortcut = tf.nn.max_pool(x, [1, strides, strides, 1], | |||||
| [1, strides, strides, 1], 'VALID') | |||||
| else: | else: | ||||
| shortcut = _conv(x, 1, out_channel, strides, name='shortcut') | shortcut = _conv(x, 1, out_channel, strides, name='shortcut') | ||||
| # Residual | # Residual | ||||
| @@ -58,7 +59,6 @@ def _residual_block(x, is_training, name="unit"): | |||||
| return x | return x | ||||
| # | |||||
| def _conv(x, filter_size, out_channel, strides, name="conv"): | def _conv(x, filter_size, out_channel, strides, name="conv"): | ||||
| """ | """ | ||||
| Helper functions(counts FLOPs and number of weights) | Helper functions(counts FLOPs and number of weights) | ||||
| @@ -66,20 +66,26 @@ def _conv(x, filter_size, out_channel, strides, name="conv"): | |||||
| in_shape = x.get_shape() | in_shape = x.get_shape() | ||||
| with tf.variable_scope(name): | with tf.variable_scope(name): | ||||
| # Main operation: conv2d | # Main operation: conv2d | ||||
| kernel = tf.get_variable('kernel', [filter_size, filter_size, in_shape[3], out_channel], tf.float32, | |||||
| kernel = tf.get_variable('kernel', | |||||
| [filter_size, filter_size, in_shape[3], | |||||
| out_channel], tf.float32, | |||||
| initializer=tf.random_normal_initializer( | initializer=tf.random_normal_initializer( | ||||
| stddev=np.sqrt(2.0 / filter_size / filter_size / out_channel))) | |||||
| stddev=np.sqrt( | |||||
| 2.0 / filter_size / filter_size / out_channel))) | |||||
| if kernel not in tf.get_collection(WEIGHT_DECAY_KEY): | if kernel not in tf.get_collection(WEIGHT_DECAY_KEY): | ||||
| tf.add_to_collection(WEIGHT_DECAY_KEY, kernel) | tf.add_to_collection(WEIGHT_DECAY_KEY, kernel) | ||||
| if strides == 1: | if strides == 1: | ||||
| conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], padding='SAME') | |||||
| conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], | |||||
| padding='SAME') | |||||
| else: | else: | ||||
| kernel_size_effective = filter_size | kernel_size_effective = filter_size | ||||
| pad_total = kernel_size_effective - 1 | pad_total = kernel_size_effective - 1 | ||||
| pad_beg = pad_total // 2 | pad_beg = pad_total // 2 | ||||
| pad_end = pad_total - pad_beg | pad_end = pad_total - pad_beg | ||||
| x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) | |||||
| conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], padding='VALID') | |||||
| x = tf.pad(x, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], | |||||
| [0, 0]]) | |||||
| conv = tf.nn.conv2d(x, kernel, [1, strides, strides, 1], | |||||
| padding='VALID') | |||||
| return conv | return conv | ||||
| @@ -88,8 +94,9 @@ def _fc(x, out_dim, name="fc"): | |||||
| # Main operation: fc | # Main operation: fc | ||||
| with tf.device('/CPU:0'): | with tf.device('/CPU:0'): | ||||
| w = tf.get_variable('weights', [x.get_shape()[1], out_dim], | w = tf.get_variable('weights', [x.get_shape()[1], out_dim], | ||||
| tf.float32, initializer=tf.random_normal_initializer( | |||||
| stddev=np.sqrt(1.0 / out_dim))) | |||||
| tf.float32, | |||||
| initializer=tf.random_normal_initializer( | |||||
| stddev=np.sqrt(1.0 / out_dim))) | |||||
| b = tf.get_variable('biases', [out_dim], tf.float32, | b = tf.get_variable('biases', [out_dim], tf.float32, | ||||
| initializer=tf.constant_initializer(0.0)) | initializer=tf.constant_initializer(0.0)) | ||||
| if w not in tf.get_collection(WEIGHT_DECAY_KEY): | if w not in tf.get_collection(WEIGHT_DECAY_KEY): | ||||
| @@ -100,7 +107,9 @@ def _fc(x, out_dim, name="fc"): | |||||
| def _bn(x, is_training, name="bn"): | def _bn(x, is_training, name="bn"): | ||||
| bn = tf.layers.batch_normalization(inputs=x, momentum=0.99, epsilon=1e-5, | bn = tf.layers.batch_normalization(inputs=x, momentum=0.99, epsilon=1e-5, | ||||
| center=True, scale=True, training=is_training, name=name, fused=True) | |||||
| center=True, scale=True, | |||||
| training=is_training, name=name, | |||||
| fused=True) | |||||
| return bn | return bn | ||||
| @@ -140,17 +149,20 @@ class ResNet18(object): | |||||
| self.end_points['conv2_output'] = x | self.end_points['conv2_output'] = x | ||||
| # conv3_x | # conv3_x | ||||
| x = _residual_block_first(x, is_training, filters[2], strides[2], name='conv3_1') | |||||
| x = _residual_block_first(x, is_training, filters[2], strides[2], | |||||
| name='conv3_1') | |||||
| x = _residual_block(x, is_training, name='conv3_2') | x = _residual_block(x, is_training, name='conv3_2') | ||||
| self.end_points['conv3_output'] = x | self.end_points['conv3_output'] = x | ||||
| # conv4_x | # conv4_x | ||||
| x = _residual_block_first(x, is_training, filters[3], strides[3], name='conv4_1') | |||||
| x = _residual_block_first(x, is_training, filters[3], strides[3], | |||||
| name='conv4_1') | |||||
| x = _residual_block(x, is_training, name='conv4_2') | x = _residual_block(x, is_training, name='conv4_2') | ||||
| self.end_points['conv4_output'] = x | self.end_points['conv4_output'] = x | ||||
| # conv5_x | # conv5_x | ||||
| x = _residual_block_first(x, is_training, filters[4], strides[4], name='conv5_1') | |||||
| x = _residual_block_first(x, is_training, filters[4], strides[4], | |||||
| name='conv5_1') | |||||
| x = _residual_block(x, is_training, name='conv5_2') | x = _residual_block(x, is_training, name='conv5_2') | ||||
| self.end_points['conv5_output'] = x | self.end_points['conv5_output'] = x | ||||
| @@ -0,0 +1,87 @@ | |||||
| import logging | |||||
| import tensorflow as tf | |||||
| import neptune | |||||
| from interface import Interface | |||||
| from neptune.incremental_learning import IncrementalConfig | |||||
| LOG = logging.getLogger(__name__) | |||||
| MODEL_URL = IncrementalConfig().model_url | |||||
| def main(): | |||||
| tf.set_random_seed(22) | |||||
| class_names = neptune.context.get_parameters("class_names") | |||||
| # load dataset. | |||||
| train_data = neptune.load_train_dataset(data_format='txt', | |||||
| with_image=False) | |||||
| # read parameters from deployment config. | |||||
| obj_threshold = neptune.context.get_parameters("obj_threshold") | |||||
| nms_threshold = neptune.context.get_parameters("nms_threshold") | |||||
| input_shape = neptune.context.get_parameters("input_shape") | |||||
| epochs = neptune.context.get_parameters('epochs') | |||||
| batch_size = neptune.context.get_parameters('batch_size') | |||||
| tf.flags.DEFINE_string('train_url', default=MODEL_URL, | |||||
| help='train url for model') | |||||
| tf.flags.DEFINE_string('log_url', default=None, help='log url for model') | |||||
| tf.flags.DEFINE_string('checkpoint_url', default=None, | |||||
| help='checkpoint url for model') | |||||
| tf.flags.DEFINE_string('model_name', default=None, | |||||
| help='url for train annotation files') | |||||
| tf.flags.DEFINE_list('class_names', default=class_names.split(','), | |||||
| # 'helmet,helmet-on,person,helmet-off' | |||||
| help='label names for the training datasets') | |||||
| tf.flags.DEFINE_list('input_shape', | |||||
| default=[int(x) for x in input_shape.split(',')], | |||||
| help='input_shape') # [352, 640] | |||||
| tf.flags.DEFINE_integer('max_epochs', default=epochs, | |||||
| help='training number of epochs') | |||||
| tf.flags.DEFINE_integer('batch_size', default=batch_size, | |||||
| help='training batch size') | |||||
| tf.flags.DEFINE_boolean('load_imagenet_weights', default=False, | |||||
| help='if load imagenet weights or not') | |||||
| tf.flags.DEFINE_string('inference_device', | |||||
| default='GPU', | |||||
| help='which type of device is used to do inference,' | |||||
| ' only CPU, GPU or 310D') | |||||
| tf.flags.DEFINE_boolean('copy_to_local', default=True, | |||||
| help='if load imagenet weights or not') | |||||
| tf.flags.DEFINE_integer('num_gpus', default=1, help='use number of gpus') | |||||
| tf.flags.DEFINE_boolean('finetuning', default=False, | |||||
| help='use number of gpus') | |||||
| tf.flags.DEFINE_boolean('label_changed', default=False, | |||||
| help='whether number of labels is changed or not') | |||||
| tf.flags.DEFINE_string('learning_rate', default='0.001', | |||||
| help='label names for the training datasets') | |||||
| tf.flags.DEFINE_string('obj_threshold', default=obj_threshold, | |||||
| help='label names for the training datasets') | |||||
| tf.flags.DEFINE_string('nms_threshold', default=nms_threshold, | |||||
| help='label names for the training datasets') | |||||
| tf.flags.DEFINE_string('net_type', default='resnet18', | |||||
| help='resnet18 or resnet18_nas') | |||||
| tf.flags.DEFINE_string('nas_sequence', default='64_1-2111-2-1112', | |||||
| help='resnet18 or resnet18_nas') | |||||
| tf.flags.DEFINE_string('deploy_model_format', default=None, | |||||
| help='the format for the converted model') | |||||
| tf.flags.DEFINE_string('result_url', default=None, | |||||
| help='result url for training') | |||||
| model = Interface() | |||||
| neptune.incremental_learning.train(model=model, | |||||
| train_data=train_data, | |||||
| epochs=epochs, | |||||
| batch_size=batch_size, | |||||
| class_names=class_names, | |||||
| input_shape=input_shape, | |||||
| obj_threshold=obj_threshold, | |||||
| nms_threshold=nms_threshold) | |||||
| if __name__ == '__main__': | |||||
| main() | |||||
| @@ -1,18 +1,3 @@ | |||||
| # -*- coding: utf-8 -*- | |||||
| # Copyright 2019 ModelArts Service of Huawei Cloud. All Rights Reserved. | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| from __future__ import absolute_import | from __future__ import absolute_import | ||||
| from __future__ import division | from __future__ import division | ||||
| from __future__ import print_function | from __future__ import print_function | ||||
| @@ -40,14 +25,14 @@ def add_path(path): | |||||
| def init_yolo(model_path, input_shape): | def init_yolo(model_path, input_shape): | ||||
| print('model_path : ', model_path) | print('model_path : ', model_path) | ||||
| # 初始化session,需绑定对应的Graph | |||||
| # initialize the session and bind the corresponding graph | |||||
| yolo_graph = tf.Graph() | yolo_graph = tf.Graph() | ||||
| config = tf.ConfigProto(allow_soft_placement=True) | config = tf.ConfigProto(allow_soft_placement=True) | ||||
| config.gpu_options.allow_growth = True | config.gpu_options.allow_growth = True | ||||
| config.gpu_options.per_process_gpu_memory_fraction = 0.1 | config.gpu_options.per_process_gpu_memory_fraction = 0.1 | ||||
| yolo_session = tf.Session(graph=yolo_graph, config=config) | yolo_session = tf.Session(graph=yolo_graph, config=config) | ||||
| # 初始化yoloInference对象 | |||||
| # initialize yoloInference object | |||||
| yolo_infer = YOLOInference(yolo_session, model_path, input_shape) | yolo_infer = YOLOInference(yolo_session, model_path, input_shape) | ||||
| return yolo_infer, yolo_session | return yolo_infer, yolo_session | ||||
| @@ -148,8 +133,6 @@ def validate_img_file(yolo_infer, yolo_session, img_file, bbox_list_ground, fold | |||||
| img_file = img_file.split("/")[-1] | img_file = img_file.split("/")[-1] | ||||
| cv2.imwrite(os.path.join(folder_out, img_file), img) | cv2.imwrite(os.path.join(folder_out, img_file), img) | ||||
| # print ('\tbbox_list_pred : ', bbox_list_pred) | |||||
| # print ('\tbbox_list_ground : ', bbox_list_ground) | |||||
| count_correct = [0 for ix in range(class_num)] | count_correct = [0 for ix in range(class_num)] | ||||
| count_ground = [0 for ix in range(class_num)] | count_ground = [0 for ix in range(class_num)] | ||||
| count_pred = [0 for ix in range(class_num)] | count_pred = [0 for ix in range(class_num)] | ||||
| @@ -163,15 +146,12 @@ def validate_img_file(yolo_infer, yolo_session, img_file, bbox_list_ground, fold | |||||
| for iy in range(count_pred_all): | for iy in range(count_pred_all): | ||||
| bbox_pred = [bbox_list_pred[iy][1], bbox_list_pred[iy][0], bbox_list_pred[iy][3], bbox_list_pred[iy][2]] | bbox_pred = [bbox_list_pred[iy][1], bbox_list_pred[iy][0], bbox_list_pred[iy][3], bbox_list_pred[iy][2]] | ||||
| # bbox_draw_on_img_cv(img_data, bbox_pred, colors[labels[iy]]) | |||||
| # if bbox_pred[2]-bbox_pred[0] > 30 and bbox_pred[3]-bbox_pred[1] > 30: | |||||
| LOG.debug(f'count_pred={count_pred}, labels[iy]={labels[iy]}') | LOG.debug(f'count_pred={count_pred}, labels[iy]={labels[iy]}') | ||||
| count_pred[labels[iy]] += 1 | count_pred[labels[iy]] += 1 | ||||
| for ix in range(count_ground_all): | for ix in range(count_ground_all): | ||||
| bbox_ground = [int(x) for x in bbox_list_ground[ix].split(',')] | bbox_ground = [int(x) for x in bbox_list_ground[ix].split(',')] | ||||
| class_ground = bbox_ground[4] | class_ground = bbox_ground[4] | ||||
| # bbox_draw_on_img_cv(img_data, bbox_ground, (40, 39, 214)) | |||||
| if labels[iy] == class_ground: | if labels[iy] == class_ground: | ||||
| iou = calc_iou(bbox_pred, bbox_ground) | iou = calc_iou(bbox_pred, bbox_ground) | ||||
| @@ -225,8 +205,8 @@ def draw_boxes(img, labels, scores, bboxes, class_names, colors): | |||||
| def calc_iou(bbox_pred, bbox_ground): | def calc_iou(bbox_pred, bbox_ground): | ||||
| """ | |||||
| 自定义函数,计算两矩形 IOU,传入为均为矩形对角线,(x,y) 坐标。 | |||||
| """user-define function for calculating the IOU of two matrixes. The | |||||
| input parameters are rectangle diagonals | |||||
| """ | """ | ||||
| x1 = bbox_pred[0] | x1 = bbox_pred[0] | ||||
| y1 = bbox_pred[1] | y1 = bbox_pred[1] | ||||
| @@ -247,9 +227,9 @@ def calc_iou(bbox_pred, bbox_ground): | |||||
| height = height1 + height2 - (endy - starty) | height = height1 + height2 - (endy - starty) | ||||
| if width <= 0 or height <= 0: | if width <= 0 or height <= 0: | ||||
| iou = 0 # 重叠率为 0 | |||||
| iou = 0 | |||||
| else: | else: | ||||
| area = width * height # 两矩形相交面积 | |||||
| area = width * height | |||||
| area1 = width1 * height1 | area1 = width1 * height1 | ||||
| area2 = width2 * height2 | area2 = width2 * height2 | ||||
| iou = area * 1. / (area1 + area2 - area) | iou = area * 1. / (area1 + area2 - area) | ||||
| @@ -14,13 +14,6 @@ flags = tf.flags.FLAGS | |||||
| class Yolo3: | class Yolo3: | ||||
| def __init__(self, sess, is_training, config): | def __init__(self, sess, is_training, config): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 初始化函数 | |||||
| ---------- | |||||
| """ | |||||
| LOG.info('is_training: %s' % is_training) | LOG.info('is_training: %s' % is_training) | ||||
| LOG.info('model dir: %s' % flags.train_url) | LOG.info('model dir: %s' % flags.train_url) | ||||
| LOG.info('input_shape: (%d, %d)' % (flags.input_shape[0], flags.input_shape[1])) | LOG.info('input_shape: (%d, %d)' % (flags.input_shape[0], flags.input_shape[1])) | ||||
| @@ -112,8 +105,7 @@ class Yolo3: | |||||
| sess.run(v.assign(data[vname])) | sess.run(v.assign(data[vname])) | ||||
| def step(self, sess, batch_data, is_training): | def step(self, sess, batch_data, is_training): | ||||
| """ | |||||
| step, read one batch, generate gradients | |||||
| """step, read one batch, generate gradients | |||||
| """ | """ | ||||
| # Input feed | # Input feed | ||||
| @@ -132,21 +124,19 @@ class Yolo3: | |||||
| return outputs[0] # loss | return outputs[0] # loss | ||||
| def _batch_normalization_layer(self, input_layer, name=None, training=True, norm_decay=0.997, norm_epsilon=1e-5): | def _batch_normalization_layer(self, input_layer, name=None, training=True, norm_decay=0.997, norm_epsilon=1e-5): | ||||
| ''' | |||||
| Introduction | |||||
| ------------ | |||||
| 对卷积层提取的feature map使用batch normalization | |||||
| Parameters | |||||
| ---------- | |||||
| input_layer: 输入的四维tensor | |||||
| name: batchnorm层的名字 | |||||
| trainging: 是否为训练过程 | |||||
| norm_decay: 在预测时计算moving average时的衰减率 | |||||
| norm_epsilon: 方差加上极小的数,防止除以0的情况 | |||||
| Returns | |||||
| ------- | |||||
| bn_layer: batch normalization处理之后的feature map | |||||
| ''' | |||||
| """Batch normalization is used for feature map extracted from | |||||
| convolution layer | |||||
| :param input_layer: four dimensional tensor of input | |||||
| :param name: the name of batchnorm layer | |||||
| :param training: is training or not | |||||
| :param norm_decay: The decay rate of moving average is calculated | |||||
| during prediction | |||||
| :param norm_epsilon: Variance plus a minimal number to prevent | |||||
| division by 0 | |||||
| :return bn_layer: batch normalization处理之后的feature map | |||||
| """ | |||||
| bn_layer = tf.layers.batch_normalization(inputs=input_layer, | bn_layer = tf.layers.batch_normalization(inputs=input_layer, | ||||
| momentum=norm_decay, epsilon=norm_epsilon, center=True, | momentum=norm_decay, epsilon=norm_epsilon, center=True, | ||||
| scale=True, training=training, name=name, fused=True) | scale=True, training=training, name=name, fused=True) | ||||
| @@ -154,29 +144,20 @@ class Yolo3: | |||||
| # return tf.nn.leaky_relu(bn_layer, alpha = 0.1) | # return tf.nn.leaky_relu(bn_layer, alpha = 0.1) | ||||
| def _conv2d_layer(self, inputs, filters_num, kernel_size, name, use_bias=False, strides=1): | def _conv2d_layer(self, inputs, filters_num, kernel_size, name, use_bias=False, strides=1): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 使用tf.layers.conv2d减少权重和偏置矩阵初始化过程,以及卷积后加上偏置项的操作 | |||||
| 经过卷积之后需要进行batch norm,最后使用leaky ReLU激活函数 | |||||
| 根据卷积时的步长,如果卷积的步长为2,则对图像进行降采样 | |||||
| 比如,输入图片的大小为416*416,卷积核大小为3,若stride为2时,(416 - 3 + 2)/ 2 + 1, 计算结果为208,相当于做了池化层处理 | |||||
| 因此需要对stride大于1的时候,先进行一个padding操作, 采用四周都padding一维代替'same'方式 | |||||
| Parameters | |||||
| ---------- | |||||
| inputs: 输入变量 | |||||
| filters_num: 卷积核数量 | |||||
| strides: 卷积步长 | |||||
| name: 卷积层名字 | |||||
| trainging: 是否为训练过程 | |||||
| use_bias: 是否使用偏置项 | |||||
| kernel_size: 卷积核大小 | |||||
| Returns | |||||
| ------- | |||||
| conv: 卷积之后的feature map | |||||
| """Use tf.layers.conv2d Reduce the weight and bias matrix | |||||
| initialization process, as well as convolution plus bias operation | |||||
| :param inputs: Input variables | |||||
| :param filters_num: Number of convolution kernels | |||||
| :param strides: Convolution step | |||||
| :param name: Convolution layer name | |||||
| :param training: is a training process or not | |||||
| :param use_bias: use bias or not | |||||
| :param kernel_size: the kernels size | |||||
| :return conv: Feature map after convolution | |||||
| """ | """ | ||||
| if strides > 1: # modified 0327 | if strides > 1: # modified 0327 | ||||
| # 在输入feature map的长宽维度进行padding | |||||
| inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [1, 0], [0, 0]], mode='CONSTANT') | inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [1, 0], [0, 0]], mode='CONSTANT') | ||||
| conv = tf.layers.conv2d(inputs=inputs, filters=filters_num, | conv = tf.layers.conv2d(inputs=inputs, filters=filters_num, | ||||
| kernel_size=kernel_size, strides=[strides, strides], | kernel_size=kernel_size, strides=[strides, strides], | ||||
| @@ -187,25 +168,6 @@ class Yolo3: | |||||
| def _Residual_block(self, inputs, filters_num, blocks_num, conv_index, training=True, norm_decay=0.997, | def _Residual_block(self, inputs, filters_num, blocks_num, conv_index, training=True, norm_decay=0.997, | ||||
| norm_epsilon=1e-5): | norm_epsilon=1e-5): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| Darknet的残差block,类似resnet的两层卷积结构,分别采用1x1和3x3的卷积核,使用1x1是为了减少channel的维度 | |||||
| Parameters | |||||
| ---------- | |||||
| inputs: 输入变量 | |||||
| filters_num: 卷积核数量 | |||||
| trainging: 是否为训练过程 | |||||
| blocks_num: block的数量 | |||||
| conv_index: 为了方便加载预训练权重,统一命名序号 | |||||
| weights_dict: 加载预训练模型的权重 | |||||
| norm_decay: 在预测时计算moving average时的衰减率 | |||||
| norm_epsilon: 方差加上极小的数,防止除以0的情况 | |||||
| Returns | |||||
| ------- | |||||
| inputs: 经过残差网络处理后的结果 | |||||
| """ | |||||
| layer = self._conv2d_layer(inputs, filters_num, kernel_size=3, strides=2, name="conv2d_" + str(conv_index)) | layer = self._conv2d_layer(inputs, filters_num, kernel_size=3, strides=2, name="conv2d_" + str(conv_index)) | ||||
| layer = self._batch_normalization_layer(layer, name="batch_normalization_" + str(conv_index), training=training, | layer = self._batch_normalization_layer(layer, name="batch_normalization_" + str(conv_index), training=training, | ||||
| norm_decay=norm_decay, norm_epsilon=norm_epsilon) | norm_decay=norm_decay, norm_epsilon=norm_epsilon) | ||||
| @@ -237,25 +199,6 @@ class Yolo3: | |||||
| def _yolo_block(self, inputs, filters_num, out_filters, conv_index, training=True, norm_decay=0.997, | def _yolo_block(self, inputs, filters_num, out_filters, conv_index, training=True, norm_decay=0.997, | ||||
| norm_epsilon=1e-5): | norm_epsilon=1e-5): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| yolo3在Darknet53提取的特征层基础上,又加了针对3种不同比例的feature map的block,这样来提高对小物体的检测率 | |||||
| Parameters | |||||
| ---------- | |||||
| inputs: 输入特征 | |||||
| filters_num: 卷积核数量 | |||||
| out_filters: 最后输出层的卷积核数量 | |||||
| conv_index: 卷积层数序号,方便根据名字加载预训练权重 | |||||
| training: 是否为训练 | |||||
| norm_decay: 在预测时计算moving average时的衰减率 | |||||
| norm_epsilon: 方差加上极小的数,防止除以0的情况 | |||||
| Returns | |||||
| ------- | |||||
| route: 返回最后一层卷积的前一层结果 | |||||
| conv: 返回最后一层卷积的结果 | |||||
| conv_index: conv层计数 | |||||
| """ | |||||
| conv = self._conv2d_layer(inputs, filters_num=filters_num, kernel_size=1, strides=1, | conv = self._conv2d_layer(inputs, filters_num=filters_num, kernel_size=1, strides=1, | ||||
| name="conv2d_" + str(conv_index)) | name="conv2d_" + str(conv_index)) | ||||
| conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training, | conv = self._batch_normalization_layer(conv, name="batch_normalization_" + str(conv_index), training=training, | ||||
| @@ -293,18 +236,6 @@ class Yolo3: | |||||
| return route, conv, conv_index | return route, conv, conv_index | ||||
| def yolo_inference(self, features_out, filters_yolo_block, conv_index, num_anchors, num_classes, training=True): | def yolo_inference(self, features_out, filters_yolo_block, conv_index, num_anchors, num_classes, training=True): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 构建yolo模型结构 | |||||
| Parameters | |||||
| ---------- | |||||
| inputs: 模型的输入变量 | |||||
| num_anchors: 每个grid cell负责检测的anchor数量 | |||||
| num_classes: 类别数量 | |||||
| training: 是否为训练模式 | |||||
| """ | |||||
| conv = features_out[0] | conv = features_out[0] | ||||
| conv2d_45 = features_out[1] | conv2d_45 = features_out[1] | ||||
| conv2d_26 = features_out[2] | conv2d_26 = features_out[2] | ||||
| @@ -368,36 +299,15 @@ class Yolo3: | |||||
| return [conv2d_59, conv2d_67, conv2d_75] | return [conv2d_59, conv2d_67, conv2d_75] | ||||
| def yolo_head(self, feats, anchors, num_classes, input_shape, training=True): | def yolo_head(self, feats, anchors, num_classes, input_shape, training=True): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 根据不同大小的feature map做多尺度的检测,三种feature map大小分别为13x13x1024, 26x26x512, 52x52x256 | |||||
| Parameters | |||||
| ---------- | |||||
| feats: 输入的特征feature map | |||||
| anchors: 针对不同大小的feature map的anchor | |||||
| num_classes: 类别的数量 | |||||
| input_shape: 图像的输入大小,一般为416 | |||||
| trainging: 是否训练,用来控制返回不同的值 | |||||
| Returns | |||||
| ------- | |||||
| """ | |||||
| print('feats : ', feats) | |||||
| print('anchors : ', anchors) | |||||
| print('input_shape : ', input_shape) | |||||
| num_anchors = len(anchors) | num_anchors = len(anchors) | ||||
| anchors_tensor = tf.reshape(tf.constant(anchors, dtype=tf.float32), [1, 1, 1, num_anchors, 2]) | anchors_tensor = tf.reshape(tf.constant(anchors, dtype=tf.float32), [1, 1, 1, num_anchors, 2]) | ||||
| grid_size = tf.shape(feats)[1:3] | grid_size = tf.shape(feats)[1:3] | ||||
| predictions = tf.reshape(feats, [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5]) | predictions = tf.reshape(feats, [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5]) | ||||
| # 这里构建13*13*1*2的矩阵,对应每个格子加上对应的坐标 | |||||
| grid_y = tf.tile(tf.reshape(tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1]) | grid_y = tf.tile(tf.reshape(tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1]) | ||||
| grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [1, -1, 1, 1]), [grid_size[0], 1, 1, 1]) | grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [1, -1, 1, 1]), [grid_size[0], 1, 1, 1]) | ||||
| grid = tf.concat([grid_x, grid_y], axis=-1) | grid = tf.concat([grid_x, grid_y], axis=-1) | ||||
| grid = tf.cast(grid, tf.float32) | grid = tf.cast(grid, tf.float32) | ||||
| # 将x,y坐标归一化为占416的比例 | |||||
| box_xy = (tf.sigmoid(predictions[..., :2]) + grid) / tf.cast(grid_size[::-1], tf.float32) | box_xy = (tf.sigmoid(predictions[..., :2]) + grid) / tf.cast(grid_size[::-1], tf.float32) | ||||
| # 将w,h也归一化为占416的比例 | |||||
| box_wh = tf.exp(predictions[..., 2:4]) * anchors_tensor / input_shape[::-1] | box_wh = tf.exp(predictions[..., 2:4]) * anchors_tensor / input_shape[::-1] | ||||
| box_confidence = tf.sigmoid(predictions[..., 4:5]) | box_confidence = tf.sigmoid(predictions[..., 4:5]) | ||||
| box_class_probs = tf.sigmoid(predictions[..., 5:]) | box_class_probs = tf.sigmoid(predictions[..., 5:]) | ||||
| @@ -406,18 +316,6 @@ class Yolo3: | |||||
| return box_xy, box_wh, box_confidence, box_class_probs | return box_xy, box_wh, box_confidence, box_class_probs | ||||
| def yolo_boxes_scores(self, feats, anchors, num_classes, input_shape, image_shape): | def yolo_boxes_scores(self, feats, anchors, num_classes, input_shape, image_shape): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 该函数是将box的坐标修正,除去之前按照长宽比缩放填充的部分,最后将box的坐标还原成相对原始图片的 | |||||
| Parameters | |||||
| ---------- | |||||
| feats: 模型输出feature map | |||||
| anchors: 模型anchors | |||||
| num_classes: 数据集类别数 | |||||
| input_shape: 训练输入图片大小 | |||||
| image_shape: 原始图片的大小 | |||||
| """ | |||||
| input_shape = tf.cast(input_shape, tf.float32) | input_shape = tf.cast(input_shape, tf.float32) | ||||
| image_shape = tf.cast(image_shape, tf.float32) | image_shape = tf.cast(image_shape, tf.float32) | ||||
| box_xy, box_wh, box_confidence, box_class_probs = self.yolo_head(feats, anchors, num_classes, input_shape, | box_xy, box_wh, box_confidence, box_class_probs = self.yolo_head(feats, anchors, num_classes, input_shape, | ||||
| @@ -446,18 +344,6 @@ class Yolo3: | |||||
| return boxes, boxes_scores | return boxes, boxes_scores | ||||
| def box_iou(self, box1, box2): | def box_iou(self, box1, box2): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 计算box tensor之间的iou | |||||
| Parameters | |||||
| ---------- | |||||
| box1: shape=[grid_size, grid_size, anchors, xywh] | |||||
| box2: shape=[box_num, xywh] | |||||
| Returns | |||||
| ------- | |||||
| iou: | |||||
| """ | |||||
| box1 = tf.expand_dims(box1, -2) | box1 = tf.expand_dims(box1, -2) | ||||
| box1_xy = box1[..., :2] | box1_xy = box1[..., :2] | ||||
| box1_wh = box1[..., 2:4] | box1_wh = box1[..., 2:4] | ||||
| @@ -480,35 +366,16 @@ class Yolo3: | |||||
| return iou | return iou | ||||
| def yolo_loss(self, yolo_output, y_true, anchors, num_classes, ignore_thresh=.5): | def yolo_loss(self, yolo_output, y_true, anchors, num_classes, ignore_thresh=.5): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| yolo模型的损失函数 | |||||
| Parameters | |||||
| ---------- | |||||
| yolo_output: yolo模型的输出 | |||||
| y_true: 经过预处理的真实标签,shape为[batch, grid_size, grid_size, 5 + num_classes] | |||||
| anchors: yolo模型对应的anchors | |||||
| num_classes: 类别数量 | |||||
| ignore_thresh: 小于该阈值的box我们认为没有物体 | |||||
| Returns | |||||
| ------- | |||||
| loss: 每个batch的平均损失值 | |||||
| accuracy | |||||
| """ | |||||
| loss = 0.0 | loss = 0.0 | ||||
| anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] | ||||
| input_shape = tf.shape(yolo_output[0])[1: 3] * 32 | input_shape = tf.shape(yolo_output[0])[1: 3] * 32 | ||||
| input_shape = tf.cast(input_shape, tf.float32) | input_shape = tf.cast(input_shape, tf.float32) | ||||
| grid_shapes = [tf.cast(tf.shape(yolo_output[l])[1:3], tf.float32) for l in range(3)] | grid_shapes = [tf.cast(tf.shape(yolo_output[l])[1:3], tf.float32) for l in range(3)] | ||||
| for index in range(3): | for index in range(3): | ||||
| # 只有负责预测ground truth box的grid对应的为1, 才计算相对应的loss | |||||
| # object_mask的shape为[batch_size, grid_size, grid_size, 3, 1] | |||||
| object_mask = y_true[index][..., 4:5] | object_mask = y_true[index][..., 4:5] | ||||
| class_probs = y_true[index][..., 5:] | class_probs = y_true[index][..., 5:] | ||||
| grid, predictions, pred_xy, pred_wh = self.yolo_head(yolo_output[index], anchors[anchor_mask[index]], | grid, predictions, pred_xy, pred_wh = self.yolo_head(yolo_output[index], anchors[anchor_mask[index]], | ||||
| num_classes, input_shape, training=True) | num_classes, input_shape, training=True) | ||||
| # pred_box的shape为[batch, box_num, 4] | |||||
| pred_box = tf.concat([pred_xy, pred_wh], axis=-1) | pred_box = tf.concat([pred_xy, pred_wh], axis=-1) | ||||
| raw_true_xy = y_true[index][..., :2] * grid_shapes[index][::-1] - grid | raw_true_xy = y_true[index][..., :2] * grid_shapes[index][::-1] - grid | ||||
| object_mask_bool = tf.cast(object_mask, dtype=tf.bool) | object_mask_bool = tf.cast(object_mask, dtype=tf.bool) | ||||
| @@ -516,16 +383,13 @@ class Yolo3: | |||||
| tf.where(tf.equal(y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1], 0), | tf.where(tf.equal(y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1], 0), | ||||
| tf.ones_like(y_true[index][..., 2:4]), | tf.ones_like(y_true[index][..., 2:4]), | ||||
| y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1])) | y_true[index][..., 2:4] / anchors[anchor_mask[index]] * input_shape[::-1])) | ||||
| # 该系数是用来调整box坐标loss的系数 | |||||
| box_loss_scale = 2 - y_true[index][..., 2:3] * y_true[index][..., 3:4] | box_loss_scale = 2 - y_true[index][..., 2:3] * y_true[index][..., 3:4] | ||||
| ignore_mask = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True) | ignore_mask = tf.TensorArray(dtype=tf.float32, size=1, dynamic_size=True) | ||||
| def loop_body(internal_index, ignore_mask): | def loop_body(internal_index, ignore_mask): | ||||
| # true_box的shape为[box_num, 4] | |||||
| true_box = tf.boolean_mask(y_true[index][internal_index, ..., 0:4], | true_box = tf.boolean_mask(y_true[index][internal_index, ..., 0:4], | ||||
| object_mask_bool[internal_index, ..., 0]) | object_mask_bool[internal_index, ..., 0]) | ||||
| iou = self.box_iou(pred_box[internal_index], true_box) | iou = self.box_iou(pred_box[internal_index], true_box) | ||||
| # 计算每个true_box对应的预测的iou最大的box | |||||
| best_iou = tf.reduce_max(iou, axis=-1) | best_iou = tf.reduce_max(iou, axis=-1) | ||||
| ignore_mask = ignore_mask.write(internal_index, tf.cast(best_iou < ignore_thresh, tf.float32)) | ignore_mask = ignore_mask.write(internal_index, tf.cast(best_iou < ignore_thresh, tf.float32)) | ||||
| return internal_index + 1, ignore_mask | return internal_index + 1, ignore_mask | ||||
| @@ -535,7 +399,6 @@ class Yolo3: | |||||
| [0, ignore_mask]) | [0, ignore_mask]) | ||||
| ignore_mask = ignore_mask.stack() | ignore_mask = ignore_mask.stack() | ||||
| ignore_mask = tf.expand_dims(ignore_mask, axis=-1) | ignore_mask = tf.expand_dims(ignore_mask, axis=-1) | ||||
| # 计算四个部分的loss | |||||
| xy_loss = object_mask * box_loss_scale * tf.nn.sigmoid_cross_entropy_with_logits( | xy_loss = object_mask * box_loss_scale * tf.nn.sigmoid_cross_entropy_with_logits( | ||||
| labels=raw_true_xy, | labels=raw_true_xy, | ||||
| logits=predictions[..., 0:2]) | logits=predictions[..., 0:2]) | ||||
| @@ -557,27 +420,11 @@ class Yolo3: | |||||
| return loss | return loss | ||||
| def yolo_eval(self, yolo_outputs, image_shape, max_boxes=20): | def yolo_eval(self, yolo_outputs, image_shape, max_boxes=20): | ||||
| """ | |||||
| Introduction | |||||
| ------------ | |||||
| 根据Yolo模型的输出进行非极大值抑制,获取最后的物体检测框和物体检测类别 | |||||
| Parameters | |||||
| ---------- | |||||
| yolo_outputs: yolo模型输出 | |||||
| image_shape: 图片的大小 | |||||
| max_boxes: 最大box数量 | |||||
| Returns | |||||
| ------- | |||||
| boxes_: 物体框的位置 | |||||
| scores_: 物体类别的概率 | |||||
| classes_: 物体类别 | |||||
| """ | |||||
| with tf.variable_scope('boxes_scores'): | with tf.variable_scope('boxes_scores'): | ||||
| anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] | ||||
| boxes = [] | boxes = [] | ||||
| box_scores = [] | box_scores = [] | ||||
| input_shape = tf.shape(yolo_outputs[0])[1: 3] * 32 | input_shape = tf.shape(yolo_outputs[0])[1: 3] * 32 | ||||
| # 对三个尺度的输出获取每个预测box坐标和box的分数,score计算为置信度x类别概率 | |||||
| for i in range(len(yolo_outputs)): | for i in range(len(yolo_outputs)): | ||||
| _boxes, _box_scores = self.yolo_boxes_scores(yolo_outputs[i], self.anchors[anchor_mask[i]], | _boxes, _box_scores = self.yolo_boxes_scores(yolo_outputs[i], self.anchors[anchor_mask[i]], | ||||
| len(self.class_names), input_shape, image_shape) | len(self.class_names), input_shape, image_shape) | ||||
| @@ -627,9 +474,6 @@ class YoloConfig: | |||||
| norm_decay = 0.99 | norm_decay = 0.99 | ||||
| norm_epsilon = 1e-5 | norm_epsilon = 1e-5 | ||||
| ignore_thresh = 0.5 | ignore_thresh = 0.5 | ||||
| # learning_rate = 1e-3 | |||||
| # obj_threshold = 0.3 | |||||
| # nms_threshold = 0.4 | |||||
| class YOLOInference(object): | class YOLOInference(object): | ||||
| @@ -20,6 +20,7 @@ class IncrementalConfig(BaseConfig): | |||||
| BaseConfig.__init__(self) | BaseConfig.__init__(self) | ||||
| self.model_urls = os.getenv("MODEL_URLS") | self.model_urls = os.getenv("MODEL_URLS") | ||||
| self.base_model_url = os.getenv("BASE_MODEL_URL") | self.base_model_url = os.getenv("BASE_MODEL_URL") | ||||
| self.saved_model_name = "model.pb" | |||||
| def train(model, train_data, epochs, batch_size, class_names, input_shape, | def train(model, train_data, epochs, batch_size, class_names, input_shape, | ||||
| @@ -40,12 +41,12 @@ def train(model, train_data, epochs, batch_size, class_names, input_shape, | |||||
| clean_folder(il_config.model_url) | clean_folder(il_config.model_url) | ||||
| model.train(train_data, []) # validation data is empty. | model.train(train_data, []) # validation data is empty. | ||||
| tf.reset_default_graph() | tf.reset_default_graph() | ||||
| model.save_model_pb() | |||||
| model.save_model_pb(il_config.saved_model_name) | |||||
| ckpt_model_url = remove_path_prefix(il_config.model_url, | ckpt_model_url = remove_path_prefix(il_config.model_url, | ||||
| il_config.data_path_prefix) | il_config.data_path_prefix) | ||||
| pb_model_url = remove_path_prefix( | pb_model_url = remove_path_prefix( | ||||
| os.path.join(il_config.model_url, 'model.pb'), | |||||
| os.path.join(il_config.model_url, il_config.saved_model_name), | |||||
| il_config.data_path_prefix) | il_config.data_path_prefix) | ||||
| # TODO delete metrics whether affect lc | # TODO delete metrics whether affect lc | ||||
| @@ -156,8 +157,8 @@ class Inference: | |||||
| def inference(self, img_data) -> InferenceResult: | def inference(self, img_data) -> InferenceResult: | ||||
| result = self.model.inference(img_data) | result = self.model.inference(img_data) | ||||
| bboxes = deal_infer_rsl(result) | |||||
| is_hard_example = self.hard_example_mining_algorithm.hard_judge(bboxes) | |||||
| rsl = deal_infer_rsl(result) | |||||
| is_hard_example = self.hard_example_mining_algorithm.hard_judge(rsl) | |||||
| if is_hard_example: | if is_hard_example: | ||||
| return InferenceResult(True, result) | return InferenceResult(True, result) | ||||
| else: | else: | ||||
| @@ -166,9 +167,9 @@ class Inference: | |||||
| def deal_infer_rsl(model_output): | def deal_infer_rsl(model_output): | ||||
| all_classes, all_scores, all_bboxes = model_output | all_classes, all_scores, all_bboxes = model_output | ||||
| bboxes = [] | |||||
| rsl = [] | |||||
| for c, s, bbox in zip(all_classes, all_scores, all_bboxes): | for c, s, bbox in zip(all_classes, all_scores, all_bboxes): | ||||
| bbox[0], bbox[1], bbox[2], bbox[3] = bbox[1], bbox[0], bbox[3], bbox[2] | bbox[0], bbox[1], bbox[2], bbox[3] = bbox[1], bbox[0], bbox[3], bbox[2] | ||||
| bboxes.append(bbox.tolist() + [s, c]) | |||||
| rsl.append(bbox.tolist() + [s, c]) | |||||
| return bboxes | |||||
| return rsl | |||||
| @@ -4,5 +4,3 @@ opencv-python==4.4.0.44 | |||||
| websockets==8.1 | websockets==8.1 | ||||
| Pillow==8.0.1 | Pillow==8.0.1 | ||||
| requests==2.24.0 | requests==2.24.0 | ||||
| tqdm==4.56.0 | |||||
| matplotlib==3.3.3 | |||||