|
- # Copyright 2021 The KubeEdge Authors.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import logging
-
- import cv2
- import numpy as np
- import tensorflow as tf
-
- from resnet18 import ResNet18
-
- LOG = logging.getLogger(__name__)
- flags = tf.flags.FLAGS
-
-
- class Yolo3:
-
- def __init__(self, sess, is_training, config):
- LOG.info('is_training: %s' % is_training)
- LOG.info('model dir: %s' % flags.train_url)
- LOG.info('input_shape: (%d, %d)' %
- (flags.input_shape[0], flags.input_shape[1]))
- LOG.info('learning rate: %f' % float(flags.learning_rate))
-
- self.is_training = is_training
- self.model_dir = flags.train_url
- self.norm_epsilon = config.norm_epsilon
- self.norm_decay = config.norm_decay
- self.obj_threshold = float(flags.obj_threshold)
- self.nms_threshold = float(flags.nms_threshold)
-
- self.anchors = np.array([float(x)
- for x in config.anchors]).reshape(-1, 2)
- self.class_names = flags.class_names
- self.num_classes = len(self.class_names)
- self.input_shape = flags.input_shape
- self.nas_sequence = flags.nas_sequence
-
- if not os.path.exists(self.model_dir):
- os.makedirs(self.model_dir)
- print("anchors : ", self.anchors)
- print("class_names : ", self.class_names)
-
- if is_training:
- self.images = tf.placeholder(
- shape=[
- None,
- None,
- None,
- 3],
- dtype=tf.float32,
- name='images')
- else:
- self.images = tf.placeholder(
- shape=[
- 1,
- self.input_shape[0],
- self.input_shape[1],
- 3],
- dtype=tf.float32,
- name='images')
-
- self.image_shape = tf.placeholder(
- dtype=tf.int32, shape=(2,), name='shapes')
-
- self.bbox_true_13 = tf.placeholder(
- shape=[
- None,
- None,
- None,
- 3,
- self.num_classes +
- 5],
- dtype=tf.float32)
- self.bbox_true_26 = tf.placeholder(
- shape=[
- None,
- None,
- None,
- 3,
- self.num_classes +
- 5],
- dtype=tf.float32)
- self.bbox_true_52 = tf.placeholder(
- shape=[
- None,
- None,
- None,
- 3,
- self.num_classes +
- 5],
- dtype=tf.float32)
- bbox_true = [self.bbox_true_13, self.bbox_true_26, self.bbox_true_52]
-
- features_out, filters_yolo_block, conv_index = self._resnet18(
- self.images, self.is_training)
-
- self.output = self.yolo_inference(
- features_out, filters_yolo_block, conv_index,
- len(self.anchors) / 3, self.num_classes, self.is_training)
- self.loss = self.yolo_loss(
- self.output,
- bbox_true,
- self.anchors,
- self.num_classes,
- config.ignore_thresh)
-
- self.global_step = tf.Variable(0, trainable=False)
-
- if self.is_training:
- learning_rate = tf.train.exponential_decay(
- float(
- flags.learning_rate),
- self.global_step,
- 1000,
- 0.95,
- staircase=True)
- optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
- update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
- with tf.control_dependencies(update_ops):
- self.train_op = optimizer.minimize(
- loss=self.loss, global_step=self.global_step)
- else:
- self.boxes, self.scores, self.classes = self.yolo_eval(
- self.output, self.image_shape, config.max_boxes)
-
- self.saver = tf.train.Saver()
-
- ckpt = tf.train.get_checkpoint_state(flags.train_url)
- if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
- if not flags.label_changed:
- print('restore model', ckpt.model_checkpoint_path)
- self.saver.restore(sess, ckpt.model_checkpoint_path)
- else:
- print('restore model', ckpt.model_checkpoint_path)
- sess.run(tf.global_variables_initializer())
- sess.run(tf.local_variables_initializer())
-
- variables = tf.global_variables()
- vars_restore = [var for var in variables
- if not ("Adam" in var.name
- or '25' in var.name
- or '33' in var.name
- or '41' in var.name)]
- saver_restore = tf.train.Saver(vars_restore)
- saver_restore.restore(sess, ckpt.model_checkpoint_path)
- else:
- print('initialize model with fresh weights...')
- sess.run(tf.global_variables_initializer())
- sess.run(tf.local_variables_initializer())
-
- def load_weights(self, sess, fpath):
- sess = tf.get_default_session()
- variables = sess.graph.get_collection("variables")
- data = np.load(fpath)
- for v in variables:
- vname = v.name.replace(':0', '')
- if vname not in data:
- print("----------skip %s----------" % vname)
- continue
- print("assigning %s" % vname)
- sess.run(v.assign(data[vname]))
-
- def step(self, sess, batch_data, is_training):
- """step, read one batch, generate gradients
- """
-
- # Input feed
- input_feed = {}
- input_feed[self.images] = batch_data['images']
- input_feed[self.bbox_true_13] = batch_data['bbox_true_13']
- input_feed[self.bbox_true_26] = batch_data['bbox_true_26']
- input_feed[self.bbox_true_52] = batch_data['bbox_true_52']
-
- # Output feed: depends on training or test
- output_feed = [self.loss] # Loss for this batch.
- if is_training:
- output_feed.append(self.train_op) # Gradient updates
-
- outputs = sess.run(output_feed, input_feed)
- return outputs[0] # loss
-
- def _batch_normalization_layer(
- self,
- input_layer,
- name=None,
- training=True,
- norm_decay=0.997,
- norm_epsilon=1e-5):
- """Batch normalization is used for feature map extracted from
- convolution layer
-
- :param input_layer: four dimensional tensor of input
- :param name: the name of batchnorm layer
- :param training: is training or not
- :param norm_decay: The decay rate of moving average is calculated
- during prediction
- :param norm_epsilon: Variance plus a minimal number to prevent
- division by 0
-
- :return bn_layer: batch normalization处理之后的feature map
- """
- bn_layer = tf.layers.batch_normalization(
- inputs=input_layer,
- momentum=norm_decay,
- epsilon=norm_epsilon,
- center=True,
- scale=True,
- training=training,
- name=name,
- fused=True)
- return tf.nn.relu(bn_layer)
- # return tf.nn.leaky_relu(bn_layer, alpha = 0.1)
-
- def _conv2d_layer(
- self,
- inputs,
- filters_num,
- kernel_size,
- name,
- use_bias=False,
- strides=1):
- """Use tf.layers.conv2d Reduce the weight and bias matrix
- initialization process, as well as convolution plus bias operation
-
- :param inputs: Input variables
- :param filters_num: Number of convolution kernels
- :param strides: Convolution step
- :param name: Convolution layer name
- :param training: is a training process or not
- :param use_bias: use bias or not
- :param kernel_size: the kernels size
-
- :return conv: Feature map after convolution
- """
- if strides > 1: # modified 0327
- inputs = tf.pad(inputs, paddings=[[0, 0], [1, 0], [
- 1, 0], [0, 0]], mode='CONSTANT')
- conv = tf.layers.conv2d(inputs=inputs, filters=filters_num,
- kernel_size=kernel_size,
- strides=[strides, strides],
- padding=('SAME' if strides == 1 else 'VALID'),
- # padding = 'SAME', #
- use_bias=use_bias,
- name=name)
- return conv
-
- def _Residual_block(
- self,
- inputs,
- filters_num,
- blocks_num,
- conv_index,
- training=True,
- norm_decay=0.997,
- norm_epsilon=1e-5):
- layer = self._conv2d_layer(
- inputs,
- filters_num,
- kernel_size=3,
- strides=2,
- name="conv2d_" +
- str(conv_index))
- layer = self._batch_normalization_layer(
- layer,
- name="batch_normalization_" +
- str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- for _ in range(blocks_num):
- shortcut = layer
- layer = self._conv2d_layer(
- layer,
- filters_num //
- 2,
- kernel_size=1,
- strides=1,
- name="conv2d_" +
- str(conv_index))
- layer = self._batch_normalization_layer(
- layer,
- name="batch_normalization_" +
- str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- layer = self._conv2d_layer(
- layer,
- filters_num,
- kernel_size=3,
- strides=1,
- name="conv2d_" +
- str(conv_index))
- layer = self._batch_normalization_layer(
- layer,
- name="batch_normalization_" +
- str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- layer += shortcut
- return layer, conv_index
-
- def _resnet18(self, inputs, training=True):
- cnn_model = ResNet18(inputs, training)
- for k, v in cnn_model.end_points.items():
- print(k)
- print(v)
- features_out = [
- cnn_model.end_points['conv5_output'],
- cnn_model.end_points['conv4_output'],
- cnn_model.end_points['conv3_output']]
- filters_yolo_block = [256, 128, 64]
- conv_index = 19
- return features_out, filters_yolo_block, conv_index
-
- def _yolo_block(
- self,
- inputs,
- filters_num,
- out_filters,
- conv_index,
- training=True,
- norm_decay=0.997,
- norm_epsilon=1e-5):
-
- conv = self._conv2d_layer(
- inputs,
- filters_num=filters_num,
- kernel_size=1,
- strides=1,
- name="conv2d_" +
- str(conv_index))
- conv = self._batch_normalization_layer(
- conv,
- name="batch_normalization_" +
- str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- conv = self._conv2d_layer(
- conv,
- filters_num=filters_num * 2,
- kernel_size=3,
- strides=1,
- name="conv2d_" +
- str(conv_index))
- conv = self._batch_normalization_layer(
- conv,
- name="batch_normalization_" +
- str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- conv = self._conv2d_layer(
- conv,
- filters_num=filters_num,
- kernel_size=1,
- strides=1,
- name="conv2d_" + str(conv_index))
- conv = self._batch_normalization_layer(
- conv,
- name="batch_normalization_" + str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- conv = self._conv2d_layer(
- conv,
- filters_num=filters_num * 2,
- kernel_size=3,
- strides=1,
- name="conv2d_" + str(conv_index))
- conv = self._batch_normalization_layer(
- conv,
- name="batch_normalization_" + str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- conv = self._conv2d_layer(
- conv,
- filters_num=filters_num,
- kernel_size=1,
- strides=1,
- name="conv2d_" + str(conv_index))
- conv = self._batch_normalization_layer(
- conv,
- name="batch_normalization_" + str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- route = conv
- conv = self._conv2d_layer(
- conv,
- filters_num=filters_num * 2,
- kernel_size=3,
- strides=1,
- name="conv2d_" +
- str(conv_index))
- conv = self._batch_normalization_layer(
- conv,
- name="batch_normalization_" + str(conv_index),
- training=training,
- norm_decay=norm_decay,
- norm_epsilon=norm_epsilon)
- conv_index += 1
- conv = self._conv2d_layer(
- conv,
- filters_num=out_filters,
- kernel_size=1,
- strides=1,
- name="conv2d_" +
- str(conv_index),
- use_bias=True)
- conv_index += 1
- return route, conv, conv_index
-
- def yolo_inference(
- self,
- features_out,
- filters_yolo_block,
- conv_index,
- num_anchors,
- num_classes,
- training=True
- ):
-
- conv = features_out[0]
- conv2d_45 = features_out[1]
- conv2d_26 = features_out[2]
-
- print('conv : ', conv)
- print('conv2d_45 : ', conv2d_45)
- print('conv2d_26 : ', conv2d_26)
-
- with tf.variable_scope('yolo'):
- conv2d_57, conv2d_59, conv_index = (
- self._yolo_block(conv,
- filters_yolo_block[0],
- num_anchors * (num_classes + 5),
- conv_index=conv_index,
- training=training,
- norm_decay=self.norm_decay,
- norm_epsilon=self.norm_epsilon)
- )
- print('conv2d_59 : ', conv2d_59)
- print('conv2d_57 : ', conv2d_57)
-
- conv2d_60 = self._conv2d_layer(
- conv2d_57,
- filters_num=filters_yolo_block[1],
- kernel_size=1,
- strides=1,
- name="conv2d_" +
- str(conv_index))
- conv2d_60 = self._batch_normalization_layer(
- conv2d_60,
- name="batch_normalization_" +
- str(conv_index),
- training=training,
- norm_decay=self.norm_decay,
- norm_epsilon=self.norm_epsilon)
- print('conv2d_60 : ', conv2d_60)
-
- conv_index += 1
- upSample_0 = tf.image.resize_nearest_neighbor(
- conv2d_60, [
- 2 * tf.shape(conv2d_60)[1], 2 * tf.shape(conv2d_60)[2]],
- name='upSample_0')
- print('upSample_0 : ', upSample_0)
-
- route0 = tf.concat([upSample_0, conv2d_45],
- axis=-1, name='route_0')
- print('route0 : ', route0)
-
- conv2d_65, conv2d_67, conv_index = (
- self._yolo_block(route0,
- filters_yolo_block[1],
- num_anchors * (num_classes + 5),
- conv_index=conv_index,
- training=training,
- norm_decay=self.norm_decay,
- norm_epsilon=self.norm_epsilon)
- )
- print('conv2d_67 : ', conv2d_67)
- print('conv2d_65 : ', conv2d_65)
-
- conv2d_68 = self._conv2d_layer(
- conv2d_65,
- filters_num=filters_yolo_block[2],
- kernel_size=1,
- strides=1,
- name="conv2d_" +
- str(conv_index))
- conv2d_68 = self._batch_normalization_layer(
- conv2d_68,
- name="batch_normalization_" +
- str(conv_index),
- training=training,
- norm_decay=self.norm_decay,
- norm_epsilon=self.norm_epsilon)
- print('conv2d_68 : ', conv2d_68)
-
- conv_index += 1
- upSample_1 = tf.image.resize_nearest_neighbor(
- conv2d_68, [
- 2 * tf.shape(conv2d_68)[1], 2 * tf.shape(conv2d_68)[2]],
- name='upSample_1')
- print('upSample_1 : ', upSample_1)
-
- route1 = tf.concat([upSample_1, conv2d_26],
- axis=-1, name='route_1')
- print('route1 : ', route1)
-
- _, conv2d_75, _ = self._yolo_block(route1, filters_yolo_block[2],
- num_anchors * (num_classes + 5),
- conv_index=conv_index,
- training=training,
- norm_decay=self.norm_decay,
- norm_epsilon=self.norm_epsilon)
- print('conv2d_75 : ', conv2d_75)
-
- return [conv2d_59, conv2d_67, conv2d_75]
-
- def yolo_head(
- self,
- feats,
- anchors,
- num_classes,
- input_shape,
- training=True
- ):
-
- num_anchors = len(anchors)
- anchors_tensor = tf.reshape(
- tf.constant(
- anchors, dtype=tf.float32), [
- 1, 1, 1, num_anchors, 2])
- grid_size = tf.shape(feats)[1:3]
- predictions = tf.reshape(
- feats,
- [-1, grid_size[0], grid_size[1], num_anchors, num_classes + 5])
- grid_y = tf.tile(tf.reshape(
- tf.range(grid_size[0]), [-1, 1, 1, 1]), [1, grid_size[1], 1, 1])
- grid_x = tf.tile(tf.reshape(tf.range(grid_size[1]), [
- 1, -1, 1, 1]), [grid_size[0], 1, 1, 1])
- grid = tf.concat([grid_x, grid_y], axis=-1)
- grid = tf.cast(grid, tf.float32)
- box_xy = (tf.sigmoid(predictions[..., :2]) +
- grid) / tf.cast(grid_size[::-1], tf.float32)
- box_wh = tf.exp(
- predictions[..., 2:4]) * anchors_tensor / input_shape[::-1]
- box_confidence = tf.sigmoid(predictions[..., 4:5])
- box_class_probs = tf.sigmoid(predictions[..., 5:])
- if training:
- return grid, predictions, box_xy, box_wh
- return box_xy, box_wh, box_confidence, box_class_probs
-
- def yolo_boxes_scores(
- self,
- feats,
- anchors,
- num_classes,
- input_shape,
- image_shape):
- input_shape = tf.cast(input_shape, tf.float32)
- image_shape = tf.cast(image_shape, tf.float32)
- box_xy, box_wh, box_confidence, box_class_probs = self.yolo_head(
- feats, anchors, num_classes, input_shape, training=False)
- box_yx = box_xy[..., ::-1]
- box_hw = box_wh[..., ::-1]
- new_shape = tf.round(
- image_shape *
- tf.reduce_min(
- input_shape /
- image_shape))
- offset = (input_shape - new_shape) / 2. / input_shape
- scale = input_shape / new_shape
- box_yx = (box_yx - offset) * scale
- box_hw = box_hw * scale
-
- box_min = box_yx - box_hw / 2.
- box_max = box_yx + box_hw / 2.
- boxes = tf.concat(
- [box_min[..., 0:1],
- box_min[..., 1:2],
- box_max[..., 0:1],
- box_max[..., 1:2]],
- axis=-1
- )
- boxes *= tf.concat([image_shape, image_shape], axis=-1)
- boxes = tf.reshape(boxes, [-1, 4])
- boxes_scores = box_confidence * box_class_probs
- boxes_scores = tf.reshape(boxes_scores, [-1, num_classes])
- return boxes, boxes_scores
-
- def box_iou(self, box1, box2):
- box1 = tf.expand_dims(box1, -2)
- box1_xy = box1[..., :2]
- box1_wh = box1[..., 2:4]
- box1_mins = box1_xy - box1_wh / 2.
- box1_maxs = box1_xy + box1_wh / 2.
-
- box2 = tf.expand_dims(box2, 0)
- box2_xy = box2[..., :2]
- box2_wh = box2[..., 2:4]
- box2_mins = box2_xy - box2_wh / 2.
- box2_maxs = box2_xy + box2_wh / 2.
-
- intersect_mins = tf.maximum(box1_mins, box2_mins)
- intersect_maxs = tf.minimum(box1_maxs, box2_maxs)
- intersect_wh = tf.maximum(intersect_maxs - intersect_mins, 0.)
- intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
- box1_area = box1_wh[..., 0] * box1_wh[..., 1]
- box2_area = box2_wh[..., 0] * box2_wh[..., 1]
- iou = intersect_area / (box1_area + box2_area - intersect_area)
- return iou
-
- def yolo_loss(
- self,
- yolo_output,
- y_true,
- anchors,
- num_classes,
- ignore_thresh=.5):
- loss = 0.0
- anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
- input_shape = tf.shape(yolo_output[0])[1: 3] * 32
- input_shape = tf.cast(input_shape, tf.float32)
- grid_shapes = [
- tf.cast(
- tf.shape(
- yolo_output[layer])[1:3],
- tf.float32) for layer in range(3)]
- for index in range(3):
- object_mask = y_true[index][..., 4:5]
- class_probs = y_true[index][..., 5:]
- grid, predictions, pred_xy, pred_wh = self.yolo_head(
- yolo_output[index], anchors[anchor_mask[index]], num_classes,
- input_shape, training=True)
- pred_box = tf.concat([pred_xy, pred_wh], axis=-1)
- raw_true_xy = y_true[
- index][..., :2] * grid_shapes[index][::-1] - grid
- object_mask_bool = tf.cast(object_mask, dtype=tf.bool)
- raw_true_wh = tf.log(
- tf.where(tf.equal(y_true[index][..., 2:4] / anchors[
- anchor_mask[index]] * input_shape[::-1], 0),
- tf.ones_like(y_true[index][..., 2:4]),
- y_true[index][..., 2:4] / anchors[
- anchor_mask[index]] * input_shape[::-1]))
- box_loss_scale = 2 - y_true[
- index][..., 2:3] * y_true[index][..., 3:4]
- ignore_mask = tf.TensorArray(
- dtype=tf.float32, size=1, dynamic_size=True)
-
- def loop_body(internal_index, ignore_mask):
- true_box = tf.boolean_mask(
- y_true[index][internal_index, ..., 0:4],
- object_mask_bool[internal_index, ..., 0])
- iou = self.box_iou(pred_box[internal_index], true_box)
- best_iou = tf.reduce_max(iou, axis=-1)
- ignore_mask = ignore_mask.write(
- internal_index, tf.cast(
- best_iou < ignore_thresh, tf.float32))
- return internal_index + 1, ignore_mask
-
- _, ignore_mask = tf.while_loop(
- lambda internal_index, ignore_mask: internal_index < tf.shape(
- yolo_output[0])[0], loop_body, [
- 0, ignore_mask])
- ignore_mask = ignore_mask.stack()
- ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
- xy_loss = (
- object_mask
- * box_loss_scale
- * tf.nn.sigmoid_cross_entropy_with_logits(
- labels=raw_true_xy,
- logits=predictions[..., 0:2])
- )
- wh_loss = (
- object_mask
- * box_loss_scale
- * 0.5
- * tf.square(raw_true_wh - predictions[..., 2:4]))
- confidence_loss = (
- object_mask
- * tf.nn.sigmoid_cross_entropy_with_logits(
- labels=object_mask,
- logits=predictions[..., 4:5])
- + (1 - object_mask)
- * tf.nn.sigmoid_cross_entropy_with_logits(
- labels=object_mask,
- logits=predictions[..., 4:5])
- * ignore_mask
- )
- class_loss = (
- object_mask
- * tf.nn.sigmoid_cross_entropy_with_logits(
- labels=class_probs, logits=predictions[..., 5:])
- )
- xy_loss = tf.reduce_sum(
- xy_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
- wh_loss = tf.reduce_sum(
- wh_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
- confidence_loss = tf.reduce_sum(
- confidence_loss) / tf.cast(tf.shape(yolo_output[0])[0],
- tf.float32)
- class_loss = tf.reduce_sum(
- class_loss) / tf.cast(tf.shape(yolo_output[0])[0], tf.float32)
-
- loss += xy_loss + wh_loss + confidence_loss + class_loss
-
- return loss
-
- def yolo_eval(self, yolo_outputs, image_shape, max_boxes=20):
- with tf.variable_scope('boxes_scores'):
- anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
- boxes = []
- box_scores = []
- input_shape = tf.shape(yolo_outputs[0])[1: 3] * 32
- for i in range(len(yolo_outputs)):
- _boxes, _box_scores = (
- self.yolo_boxes_scores(yolo_outputs[i],
- self.anchors[
- anchor_mask[i]],
- len(self.class_names),
- input_shape,
- image_shape)
- )
- boxes.append(_boxes)
- box_scores.append(_box_scores)
- boxes = tf.concat(boxes, axis=0)
- box_scores = tf.concat(box_scores, axis=0)
-
- with tf.variable_scope('nms'):
- mask = box_scores >= self.obj_threshold
- max_boxes_tensor = tf.constant(max_boxes, dtype=tf.int32)
- boxes_ = []
- scores_ = []
- classes_ = []
- for c in range(len(self.class_names)):
- class_boxes = tf.boolean_mask(boxes, mask[:, c])
- class_box_scores = tf.boolean_mask(
- box_scores[:, c], mask[:, c])
- nms_index = tf.image.non_max_suppression(
- class_boxes,
- class_box_scores,
- max_boxes_tensor,
- iou_threshold=self.nms_threshold)
- class_boxes = tf.gather(class_boxes, nms_index)
- class_box_scores = tf.gather(class_box_scores, nms_index)
- classes = tf.ones_like(class_box_scores, 'int32') * c
- boxes_.append(class_boxes)
- scores_.append(class_box_scores)
- classes_.append(classes)
-
- with tf.variable_scope('output'):
- boxes_ = tf.concat(boxes_, axis=0, name='boxes')
- scores_ = tf.concat(scores_, axis=0, name='scores')
- classes_ = tf.concat(classes_, axis=0, name='classes')
- return boxes_, scores_, classes_
-
-
- class YoloConfig:
- gpu_index = "3"
-
- net_type = 'resnet18'
-
- anchors = [
- 10,
- 13,
- 16,
- 30,
- 33,
- 23,
- 30,
- 61,
- 62,
- 45,
- 59,
- 119,
- 116,
- 90,
- 156,
- 198,
- 163,
- 326]
-
- max_boxes = 50
- jitter = 0.3
- hue = 0.1
- sat = 1.0
- cont = 0.8
- bri = 0.1
- norm_decay = 0.99
- norm_epsilon = 1e-5
- ignore_thresh = 0.5
-
-
- class YOLOInference(object):
-
- # pylint: disable=too-many-arguments, too-many-instance-attributes
- def __init__(self, sess, pb_model_path, input_shape):
- """
- initialization
- """
-
- self.load_model(sess, pb_model_path)
- self.input_shape = input_shape
-
- def load_model(self, sess, pb_model_path):
- """
- import model and load parameters from pb file
- """
-
- logging.info("Import yolo model from pb start .......")
-
- with sess.as_default():
- with sess.graph.as_default():
- with tf.gfile.FastGFile(pb_model_path, 'rb') as f_handle:
- logging.info("ParseFromString start .......")
- graph_def = tf.GraphDef()
- graph_def.ParseFromString(f_handle.read())
- logging.info("ParseFromString end .......")
-
- tf.import_graph_def(graph_def, name='')
- logging.info("Import_graph_def end .......")
-
- logging.info("Import yolo model from pb end .......")
-
- # pylint: disable=too-many-locals
- # pylint: disable=invalid-name
- def predict(self, sess, img_data):
- """
- prediction for image rectangle by input_feed and output_feed
- """
-
- with sess.as_default():
- new_image = self.preprocess(img_data, self.input_shape)
- input_feed = self.create_input_feed(sess, new_image, img_data)
- output_fetch = self.create_output_fetch(sess)
- all_classes, all_scores, all_bboxes = sess.run(
- output_fetch, input_feed)
-
- return all_classes, all_scores, all_bboxes
-
- def create_input_feed(self, sess, new_image, img_data):
- """
- create input feed data
- """
-
- input_feed = {}
-
- input_img_data = sess.graph.get_tensor_by_name('images:0')
- input_feed[input_img_data] = new_image
-
- input_img_shape = sess.graph.get_tensor_by_name('shapes:0')
- input_feed[input_img_shape] = [img_data.shape[0], img_data.shape[1]]
-
- return input_feed
-
- def create_output_fetch(self, sess):
- """
- create output fetch tensors
- """
-
- output_classes = sess.graph.get_tensor_by_name('output/classes:0')
- output_scores = sess.graph.get_tensor_by_name('output/scores:0')
- output_boxes = sess.graph.get_tensor_by_name('output/boxes:0')
-
- output_fetch = [output_classes, output_scores, output_boxes]
-
- return output_fetch
-
- def preprocess(self, image, input_shape):
- """
- resize image with unchanged aspect ratio using padding by opencv
- """
- # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
- h, w, _ = image.shape
-
- input_h, input_w = input_shape
- scale = min(float(input_w) / float(w), float(input_h) / float(h))
- nw = int(w * scale)
- nh = int(h * scale)
-
- image = cv2.resize(image, (nw, nh))
-
- new_image = np.zeros((input_h, input_w, 3), np.float32)
- new_image.fill(128)
- bh, bw, _ = new_image.shape
- _inx_1 = int((bh - nh) / 2)
- _inx_2 = nh + int((bh - nh) / 2)
- _inx_3 = int((bw - nw) / 2)
- _inx_4 = nw + int((bw - nw) / 2)
- new_image[_inx_1: _inx_2, _inx_3:_inx_4:] = image
-
- new_image /= 255.
- new_image = np.expand_dims(new_image, 0) # Add batch dimension.
- return new_image
|