|
- """
- /**
- * Copyright 2020 Tianshu AI Platform. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * =============================================================
- */
- Reference:
- - [YOLOv3: An Incremental Improvement]
- (https://arxiv.org/abs/1804.02767)
- """
-
- import oneflow as flow
- import oneflow.core.operator.op_conf_pb2 as op_conf_util
- from oneflow_yolov3.ops.upsample_nearest import upsample_nearest
- from oneflow_yolov3.ops.yolo_detect import yolo_detect
- from oneflow_yolov3.ops.yolo_nms import yolo_nms
- layer_number = 1
- route_dict = {}
- yolo_pos_result = []
- yolo_prob_result = []
- yolo_loss_result = []
-
- num_classes = 80
- ignore_thresh = 0.7
- truth_thresh = 1.0
- image_height = 608
- image_width = 608
- max_out_boxes = 200
- nms = True
- # nms=False
- nms_threshold = 0.45
-
- #anchor_boxes_size_list=[flow.detection.anchor_boxes_size(10, 13), flow.detection.anchor_boxes_size(16, 30), flow.detection.anchor_boxes_size(33, 23), flow.detection.anchor_boxes_size(30,61), flow.detection.anchor_boxes_size(62, 45), flow.detection.anchor_boxes_size(59, 119), flow.detection.anchor_boxes_size(116,90), flow.detection.anchor_boxes_size(156, 198), flow.detection.anchor_boxes_size(373, 326)]
- anchor_boxes_size_list = [10, 13, 16, 30, 33, 23, 30,
- 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
- yolo_box_diff_conf = [{'image_height': image_height,
- 'image_width': image_width,
- 'layer_height': 19,
- 'layer_width': 19,
- 'ignore_thresh': ignore_thresh,
- 'truth_thresh': truth_thresh,
- 'anchor_boxes_size': anchor_boxes_size_list,
- 'box_mask': [6,
- 7,
- 8]},
- {'image_height': image_height,
- 'image_width': image_width,
- 'layer_height': 38,
- 'layer_width': 38,
- 'ignore_thresh': ignore_thresh,
- 'truth_thresh': truth_thresh,
- 'anchor_boxes_size': anchor_boxes_size_list,
- 'box_mask': [3,
- 4,
- 5]},
- {'image_height': image_height,
- 'image_width': image_width,
- 'layer_height': 76,
- 'layer_width': 76,
- 'ignore_thresh': ignore_thresh,
- 'truth_thresh': truth_thresh,
- 'anchor_boxes_size': anchor_boxes_size_list,
- 'box_mask': [0,
- 1,
- 2]}]
-
- # to confirm wh pos, gr 12.19 check with 11 ~/yolov3/predict.job
- yolo_conf = [{'layer_height': 19,
- 'layer_width': 19,
- 'prob_thresh': 0.5,
- 'num_classes': 80,
- 'anchor_boxes_size': [116,
- 90,
- 156,
- 198,
- 373,
- 326]},
- {'layer_height': 38,
- 'layer_width': 38,
- 'prob_thresh': 0.5,
- 'num_classes': 80,
- 'anchor_boxes_size': [30,
- 61,
- 62,
- 45,
- 59,
- 119]},
- {'layer_height': 76,
- 'layer_width': 76,
- 'prob_thresh': 0.5,
- 'num_classes': 80,
- 'anchor_boxes_size': [10,
- 13,
- 16,
- 30,
- 33,
- 23]}]
-
-
- def _conv2d_layer(
- name,
- input,
- filters,
- kernel_size=3,
- strides=1,
- padding="SAME",
- group_num=1,
- data_format="NCHW",
- dilation_rate=1,
- activation=op_conf_util.kRelu,
- use_bias=False,
- weight_initializer=flow.random_uniform_initializer(),
- bias_initializer=flow.random_uniform_initializer(),
- trainable=True,
- ):
- if data_format == "NCHW":
- weight_shape = (
- int(filters), int(
- input.static_shape[1]), int(
- kernel_size[0]), int(
- kernel_size[0]))
- elif data_format == "NHWC":
- weight_shape = (
- int(filters), int(
- kernel_size[0]), int(
- kernel_size[0]), int(
- input.static_shape[3]))
- else:
- raise ValueError('data_format must be "NCHW" or "NHWC".')
- weight = flow.get_variable(
- name + "-weight",
- shape=weight_shape,
- dtype=input.dtype,
- initializer=weight_initializer,
- trainable=trainable,
- )
- output = flow.nn.conv2d(
- input, weight, strides, padding, data_format, dilation_rate, name=name
- )
- if use_bias:
- bias = flow.get_variable(
- name + "-bias",
- shape=(filters,),
- dtype=input.dtype,
- initializer=bias_initializer,
- model_name="bias",
- trainable=trainable,
- )
- output = flow.nn.bias_add(output, bias, data_format)
-
- if activation is not None:
- if activation == op_conf_util.kRelu:
- output = flow.keras.activations.relu(output)
- else:
- raise NotImplementedError
-
- return output
-
-
- def _batch_norm(
- inputs,
- axis,
- momentum,
- epsilon,
- center=True,
- scale=True,
- trainable=True,
- name=None):
-
- return flow.layers.batch_normalization(
- inputs=inputs,
- axis=axis,
- momentum=momentum,
- epsilon=epsilon,
- center=center,
- scale=scale,
- trainable=trainable,
- name=name
- )
-
-
- def _leaky_relu(input, alpha=None, name=None):
- return flow.nn.leaky_relu(input, alpha=alpha, name=None)
- # return flow.math.relu(input)
-
-
- def _upsample(input, name=None):
- # return flow.detection.upsample_nearest(input, name=name, scale=2,
- # data_format="channels_first")
- return upsample_nearest(
- input,
- name=name,
- scale=2,
- data_format="channels_first")
-
-
- def conv_unit(
- data,
- num_filter=1,
- kernel=(
- 1,
- 1),
- stride=(
- 1,
- 1),
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=True,
- prefix=''):
- conv = _conv2d_layer(
- name=prefix + '-conv',
- input=data,
- filters=num_filter,
- kernel_size=kernel,
- strides=stride,
- padding='same',
- data_format=data_format,
- dilation_rate=1,
- activation=None,
- use_bias=use_bias,
- trainable=trainable)
- bn = _batch_norm(
- conv,
- axis=1,
- momentum=0.99,
- epsilon=1.0001e-5,
- trainable=trainable,
- name=prefix +
- '-bn')
- leaky_relu = _leaky_relu(bn, alpha=0.1, name=prefix + '-leakyRelu')
- return leaky_relu
-
-
- def ResidualBlock(data, prefix, filter, trainable):
- global layer_number
- layer_number += 1
- blob = conv_unit(
- data,
- num_filter=filter,
- kernel=[
- 1,
- 1],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=trainable,
- prefix='yolo-layer' +
- str(layer_number))
-
- layer_number += 1
- blob = conv_unit(
- blob,
- num_filter=filter *
- 2,
- kernel=[
- 3,
- 3],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=trainable,
- prefix='yolo-layer' +
- str(layer_number))
-
- layer_number += 1
- shortcut = flow.math.add(
- data,
- blob,
- name='yolo-layer' +
- str(layer_number) +
- '-shortcut')
- return shortcut
-
-
- def ResidualStage(data, prefix, n, filters, trainable):
- global layer_number
-
- layer_number += 1
-
- blob = conv_unit(
- data,
- num_filter=filters *
- 2,
- kernel=[
- 3,
- 3],
- stride=[
- 2,
- 2],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=trainable,
- prefix='yolo-layer' +
- str(layer_number))
- for i in range(n):
- blob = ResidualBlock(blob, "%s_%d" %
- (prefix, i), filters, trainable=trainable)
- return blob
-
-
- def DarknetNetConvXBody(in_blob, trainable, on_stage_end=lambda x: x):
- global layer_number
- filter = [32, 64, 128, 256, 512]
- block_counts = [1, 2, 8, 8, 4]
- blob = in_blob
-
- for i in range(len(block_counts)):
-
- blob = ResidualStage(blob, "block%d" % i, block_counts[i],
- filter[i], trainable=trainable)
- if i == 2:
- route_dict['layer_36'] = blob
- if i == 3:
- route_dict['layer_61'] = blob
- if i == 4:
- route_dict['layer_74'] = blob
-
- on_stage_end(blob)
- return blob
-
-
- def YoloBlock(in_blob, prefix, filter, stage_idx, block_idx, trainable):
- global layer_number
- layer_number += 1
- blob = conv_unit(
- in_blob,
- num_filter=filter,
- kernel=[
- 1,
- 1],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- prefix='yolo-layer' +
- str(layer_number))
- if stage_idx == 0 and block_idx == 2:
- route_dict['layer_79'] = blob
- if stage_idx == 1 and block_idx == 2:
- route_dict['layer_91'] = blob
- layer_number += 1
- blob = conv_unit(
- blob,
- num_filter=filter *
- 2,
- kernel=[
- 3,
- 3],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- prefix='yolo-layer' +
- str(layer_number))
-
- return blob
-
-
- def YoloStage(in_blob, prefix, n, filters, stage_idx, trainable):
- global layer_number
- blob = in_blob
- for i in range(n):
- blob = YoloBlock(
- blob, "%s_%d" %
- (prefix, i), filters, stage_idx, i, trainable=trainable)
- layer_number += 1
- blob = _conv2d_layer(
- name='yolo-layer' +
- str(layer_number) +
- '-conv',
- input=blob,
- filters=255,
- kernel_size=[
- 1,
- 1],
- strides=[
- 1,
- 1],
- padding='same',
- data_format="NCHW",
- dilation_rate=1,
- activation=None,
- use_bias=True,
- trainable=trainable)
-
- return blob
-
-
- def YoloPredictLayer(in_blob, origin_image_info, i, trainable):
- global layer_number
- layer_name = 'yolo-layer' + str(layer_number)
- # placeholder for a reshape from (n,h,w,255)->(n,h,w*3,85)
- blob = flow.transpose(
- in_blob,
- name=layer_name +
- '-yolo_transpose',
- perm=[
- 0,
- 2,
- 3,
- 1])
- reshape_blob = flow.reshape(blob, shape=(
- blob.shape[0], -1, 85), name=layer_name + '-yolo_reshape')
- position = flow.slice(
- reshape_blob, [
- None, 0, 0], [
- None, -1, 4], name=layer_name + '-yolo_slice_pos')
- xy = flow.slice(
- position, [
- None, 0, 0], [
- None, -1, 2], name=layer_name + '-yolo_slice_xy')
- wh = flow.slice(
- position, [
- None, 0, 2], [
- None, -1, 2], name=layer_name + '-yolo_slice_wh')
- xy = flow.math.sigmoid(xy, name=layer_name + '-yolo_ligistic_xy')
- position = flow.concat([xy, wh], axis=2, name=layer_name + '-yolo_concat')
- confidence = flow.slice(
- reshape_blob, [
- None, 0, 4], [
- None, -1, 81], name=layer_name + '-yolo_slice_prob')
- confidence = flow.math.sigmoid(
- confidence,
- name=layer_name +
- '-yolo_ligistic_prob')
- #[out_bbox, out_probs, valid_num] = flow.detection.yolo_detect(bbox=position, probs=confidence, origin_image_info=origin_image_info, image_height=608, image_width=608, layer_height=yolo_conf[i]['layer_height'], layer_width=yolo_conf[i]['layer_width'], prob_thresh=0.5, num_classes=80, max_out_boxes = max_out_boxes, anchor_boxes=yolo_conf[i]['anchor_boxes_size'])
- [out_bbox,
- out_probs,
- valid_num] = yolo_detect(bbox=position,
- probs=confidence,
- origin_image_info=origin_image_info,
- image_height=608,
- image_width=608,
- layer_height=yolo_conf[i]['layer_height'],
- layer_width=yolo_conf[i]['layer_width'],
- prob_thresh=0.5,
- num_classes=80,
- max_out_boxes=max_out_boxes,
- anchor_boxes=yolo_conf[i]['anchor_boxes_size'],
- name=str(layer_name) + "yolo_detect")
- print("out_bbox.shape", out_bbox.shape)
- return out_bbox, out_probs, valid_num
-
-
- def YoloTrainLayer(in_blob, gt_bbox_blob, gt_label_blob, gt_valid_num_blob, i):
-
- global layer_number
- layer_name = 'yolo-layer' + str(layer_number)
- # placeholder for a reshape from (n,h,w,255)->(n,h,w*3,85)
- blob = flow.transpose(
- in_blob,
- name=layer_name +
- '-yolo_transpose',
- perm=[
- 0,
- 2,
- 3,
- 1])
- reshape_blob = flow.reshape(blob, shape=(
- blob.shape[0], -1, 85), name=layer_name + '-yolo_reshape')
- position = flow.slice(
- reshape_blob, [
- None, 0, 0], [
- None, -1, 4], name=layer_name + '-yolo_slice_pos')
- xy = flow.slice(
- position, [
- None, 0, 0], [
- None, -1, 2], name=layer_name + '-yolo_slice_xy')
- wh = flow.slice(
- position, [
- None, 0, 2], [
- None, -1, 2], name=layer_name + '-yolo_slice_wh')
- xy = flow.math.logistic(xy, name=layer_name + '-yolo_ligistic_xy')
- #xy = flow.math.sigmoid(xy, name = layer_name + '-yolo_ligistic_xy')
- position = flow.concat([xy, wh], axis=2, name=layer_name + '-yolo_concat')
- confidence = flow.slice(
- reshape_blob, [
- None, 0, 4], [
- None, -1, 81], name=layer_name + '-yolo_slice_prob')
- confidence = flow.math.logistic(
- confidence,
- name=layer_name +
- '-yolo_ligistic_prob')
- #confidence = flow.math.sigmoid(confidence, name = layer_name+ '-yolo_ligistic_prob')
-
- objness = flow.slice(
- confidence, [
- None, 0, 0], [
- None, -1, 1], name=layer_name + '-yolo_slice_objness')
- clsprob = flow.slice(
- confidence, [
- None, 0, 1], [
- None, -1, 80], name=layer_name + '-yolo_slice_clsprob')
- bbox_loc_diff, pos_inds, pos_cls_label, neg_inds, valid_num = flow.detection.yolo_box_diff(position, gt_bbox_blob, gt_label_blob, gt_valid_num_blob, image_height=yolo_box_diff_conf[i]['image_height'], image_width=yolo_box_diff_conf[i]['image_width'], layer_height=yolo_box_diff_conf[i]['layer_height'], layer_width=yolo_box_diff_conf[
- i]['layer_width'], ignore_thresh=yolo_box_diff_conf[i]['ignore_thresh'], truth_thresh=yolo_box_diff_conf[i]['truth_thresh'], box_mask=yolo_box_diff_conf[i]['box_mask'], anchor_boxes_size=yolo_box_diff_conf[i]['anchor_boxes_size'], name=layer_name + '-yolo_box_loss') # placeholder for yolobox layer
- bbox_objness_out, bbox_clsprob_out = flow.detection.yolo_prob_loss(
- objness, clsprob, pos_inds, pos_cls_label, neg_inds, valid_num, num_classes=80, name=layer_name + '-yolo_prob_loss')
- bbox_loss = flow.concat([bbox_loc_diff,
- bbox_objness_out,
- bbox_clsprob_out],
- axis=2,
- name=layer_name + '-loss_concat')
- bbox_loss_reduce_sum = flow.math.reduce_sum(
- bbox_loss, axis=[1, 2], name=layer_name + '-bbox_loss_reduce_sum')
- return bbox_loss_reduce_sum
-
-
- def YoloNetBody(
- in_blob,
- gt_bbox_blob=None,
- gt_label_blob=None,
- gt_valid_num_blob=None,
- origin_image_info=None,
- trainable=False):
- global layer_number
- filter = [512, 256, 128]
- block_counts = [3, 3, 3]
- blob = in_blob
- yolo_result = []
- for i in range(len(filter)):
- if i == 0:
- blob = route_dict['layer_74']
- elif i == 1:
- layer_number += 1
- # placeholder for route layer
- layer_number += 1
- blob = conv_unit(
- route_dict['layer_79'],
- num_filter=filter[i],
- kernel=[
- 1,
- 1],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=trainable,
- prefix='yolo-layer' +
- str(layer_number))
-
- layer_number += 1
- blob = _upsample(blob, name='upsample' + str(i))
- layer_number += 1
-
- blob = flow.concat([blob, route_dict['layer_61']],
- name='route' + str(i), axis=1)
- elif i == 2:
- layer_number += 1
- # placeholder for route layer
- layer_number += 1
-
- blob = conv_unit(
- route_dict['layer_91'],
- num_filter=filter[i],
- kernel=[
- 1,
- 1],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=trainable,
- prefix='yolo-layer' +
- str(layer_number))
-
- layer_number += 1
- blob = _upsample(blob, name='upsample' + str(i))
- layer_number += 1
- blob = flow.concat([blob, route_dict['layer_36']],
- axis=1, name='route' + str(i))
-
- yolo_blob = YoloStage(blob, "%d" % i, block_counts[i],
- filter[i], i, trainable=trainable)
- layer_number += 1
- if not trainable:
- yolo_position, yolo_prob, valid_num = YoloPredictLayer(
- yolo_blob, origin_image_info, i, trainable=trainable)
- yolo_pos_result.append(yolo_position)
- yolo_prob_result.append(yolo_prob)
- else:
- loss = YoloTrainLayer(
- yolo_blob,
- gt_bbox_blob,
- gt_label_blob,
- gt_valid_num_blob,
- i)
- yolo_loss_result.append(loss)
- if not trainable:
- yolo_positions = flow.concat(
- yolo_pos_result,
- axis=1,
- name="concat_pos") # (b, n_boxes, 4)
- yolo_probs = flow.concat(yolo_prob_result, axis=1) # (b, n_boxes, 81)
- print(yolo_positions.shape)
- print(yolo_probs.shape)
- if nms:
- yolo_probs_transpose = flow.transpose(
- yolo_probs, perm=[0, 2, 1]) # (b, 81, n_boxes)
- pre_nms_top_k_inds = flow.math.top_k(
- yolo_probs_transpose, k=20000) # (b, 81, n_boxes)
- pre_nms_top_k_inds1 = flow.reshape(
- pre_nms_top_k_inds,
- shape=(
- pre_nms_top_k_inds.shape[0],
- pre_nms_top_k_inds.shape[1] *
- pre_nms_top_k_inds.shape[2]),
- name="reshape1") # (b, 81*n_boxes)
- gathered_yolo_positions = flow.gather(
- yolo_positions,
- pre_nms_top_k_inds1,
- axis=1,
- batch_dims=1) # (b, 81*n_boxes, 4)
- gathered_yolo_positions = flow.reshape(
- gathered_yolo_positions,
- shape=(
- gathered_yolo_positions.shape[0],
- yolo_probs.shape[2],
- yolo_positions.shape[1],
- yolo_positions.shape[2]),
- name="reshape2") # (b, 81, n_boxes, 4)
- gathered_yolo_probs = flow.gather(
- yolo_probs_transpose,
- pre_nms_top_k_inds,
- axis=2,
- batch_dims=2) # (b, 81, n_boxes)
- nms_val = yolo_nms(
- gathered_yolo_positions,
- gathered_yolo_probs,
- iou_threshold=nms_threshold,
- keep_n=-1,
- batch_dims=2,
- name="nms") # b, 81, n_boxes
- nms_val_cast = flow.cast(
- nms_val, dtype=flow.float) # (b, 81, n_boxes)
- nms_val_reshape = flow.reshape(
- nms_val_cast,
- shape=(
- nms_val.shape[0],
- nms_val.shape[1],
- nms_val.shape[2],
- 1)) # (b, 81, n_boxes, 1)
- final_boxes = flow.math.multiply(
- gathered_yolo_positions,
- nms_val_reshape) # (b, 81, 270, 4)
- final_probs = flow.math.multiply(
- gathered_yolo_probs, nms_val_cast) # (b, 81, 270)
- return final_boxes, final_probs
-
- return yolo_positions, yolo_probs
- else:
- return yolo_loss_result
-
-
- def YoloPredictNet(data, origin_image_info, trainable=False):
- print("nms:", nms)
- global layer_number
- #data = flow.transpose(data, perm=[0, 3, 1, 2])
- blob = conv_unit(
- data,
- num_filter=32,
- kernel=[
- 3,
- 3],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=trainable,
- prefix='yolo-layer' +
- str(layer_number))
- blob = DarknetNetConvXBody(blob, trainable, lambda x: x)
- yolo_pos_result, yolo_prob_result = YoloNetBody(
- in_blob=blob, origin_image_info=origin_image_info, trainable=trainable)
- return yolo_pos_result, yolo_prob_result
-
-
- def YoloTrainNet(data, gt_box, gt_label, gt_valid_num, trainable=True):
- global layer_number
- #data = flow.transpose(data, perm=[0, 3, 1, 2])
- blob = conv_unit(
- data,
- num_filter=32,
- kernel=[
- 3,
- 3],
- stride=[
- 1,
- 1],
- pad="same",
- data_format="NCHW",
- use_bias=False,
- trainable=trainable,
- prefix='yolo-layer' +
- str(layer_number))
- blob = DarknetNetConvXBody(blob, trainable, lambda x: x)
- yolo_loss_result = YoloNetBody(
- in_blob=blob,
- gt_bbox_blob=gt_box,
- gt_label_blob=gt_label,
- gt_valid_num_blob=gt_valid_num,
- trainable=trainable)
- return yolo_loss_result
|