|
- /**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /*!
- * \file nn_detect_ops.h
- * \brief
- */
- #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
- #define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
-
- #include "graph/operator_reg.h"
- #include "graph/operator.h"
-
- namespace ge {
-
- /**
- *@brief Generates bounding boxes based on "rois" and "deltas".
- * It is a customized FasterRcnn operator . \n
-
- *@par Inputs:
- * Two inputs, including:
- *@li rois: Region of interests (ROIs) generated by the region proposal
- * network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4).
- * "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1",
- * "y0", and "y1".
- *@li deltas: Absolute variation between the ROIs generated by the RPN and
- * ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4).
- * "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh" . \n
-
- *@par Attributes:
- *@li means: An index of type int. Defaults to [0,0,0,0].
- * "deltas" = "deltas" x "stds" + "means".
- *@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
- * "deltas" = "deltas" x "stds" + "means".
- *@li max_shape: Shape [h, w], specifying the size of the image transferred to
- * the network. Used to ensure that the bbox shape after conversion does not
- * exceed "max_shape".
- *@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall
- * within (-wh_ratio_clip, wh_ratio_clip) . \n
-
- *@par Outputs:
- *bboxes: Bboxes generated based on "rois" and "deltas". Have the same format
- * and type as "rois".
- */
- REG_OP(BoundingBoxDecode)
- .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
- .OUTPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
- .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
- .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
- .REQUIRED_ATTR(max_shape, ListInt)
- .ATTR(wh_ratio_clip, Float, 0.016)
- .OP_END_FACTORY_REG(BoundingBoxDecode)
-
- /**
- *@brief Computes the coordinate variations between bboxes and ground truth
- * boxes. It is a customized FasterRcnn operator . \n
-
- *@par Inputs:
- * Two inputs, including:
- *@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4).
- * "N" indicates the number of bounding boxes, and the value "4" refers to
- * "x0", "x1", "y0", and "y1".
- *@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with
- * shape (N, 4). "N" indicates the number of bounding boxes, and the value "4"
- * refers to "x0", "x1", "y0", and "y1" . \n
-
- *@par Attributes:
- *@li means: An index of type int. Defaults to [0,0,0,0].
- * "deltas" = "deltas" x "stds" + "means".
- *@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
- * "deltas" = "deltas" x "stds" + "means" . \n
-
- *@par Outputs:
- *delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes.
- */
- REG_OP(BoundingBoxEncode)
- .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(ground_truth_box, TensorType({DT_FLOAT16, DT_FLOAT}))
- .OUTPUT(delats, TensorType({DT_FLOAT16, DT_FLOAT}))
- .ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
- .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
- .OP_END_FACTORY_REG(BoundingBoxEncode)
-
- /**
- *@brief Judges whether the bounding box is valid. It is a customized
- * FasterRcnn operator . \n
-
- *@par Inputs:
- * Two inputs, including:
- *@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4).
- * "N" indicates the number of bounding boxes, the value "4" indicates "x0",
- * "x1", "y0", and "y1".
- *@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16
- * with shape (16,)
-
- *@par Outputs:
- *valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is
- * in an image. "1" indicates valid, while "0" indicates invalid . \n
-
- *@attention Constraints:
- * 16 "img_metas" are input. The first three numbers (height, width, ratio) are
- * valid, specifying the valid boundary (heights x ratio, weights x ratio).
- */
- REG_OP(CheckValid)
- .INPUT(bbox_tensor, TensorType({DT_FLOAT16}))
- .INPUT(img_metas, TensorType({DT_FLOAT16}))
- .OUTPUT(valid_tensor, TensorType({DT_INT8}))
- .OP_END_FACTORY_REG(CheckValid)
-
- /**
- *@brief Computes the intersection over union (iou) or the intersection over
- * foreground (iof) based on the ground-truth and predicted regions . \n
-
- *@par Inputs:
- * Two inputs, including:
- *@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
- * shape (N, 4). "N" indicates the number of bounding boxes, and the value
- * "4" refers to "x0", "x1", "y0", and "y1".
- *@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
- * with shape (M, 4). "M" indicates the number of ground truth boxes, and
- * the value "4" refers to "x0", "x1", "y0", and "y1" . \n
-
- *@par Attributes:
- *mode: Computation mode, a character string with the value range of [iou, iof] . \n
-
- *@par Outputs:
- *overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
- * the IoU or IoF ratio . \n
-
- *@attention Constraints:
- * Only computation of float16 data is supported. To avoid overflow, the input
- * length and width are scaled by 0.2 internally.
- */
- REG_OP(Iou)
- .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
- .OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
- .ATTR(mode, String, "iou")
- .OP_END_FACTORY_REG(Iou)
-
- /**
- *@brief Performs the backpropagation of ROIAlign for training scenarios . \n
-
- *@par Inputs:
- * Three inputs, including:
- *@li ydiff: A 5HD gradient input of type float32.
- *@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
- the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
- *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n
-
- *@par Attributes:
- *@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
- *@li pooled_width: A required attribute of type int, specifying the W dimension.
- *@li pooled_height: A required attribute of type int, specifying the H dimension.
- *@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
- *@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
- sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
- equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
-
- *@par Outputs:
- *xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
- */
- REG_OP(ROIAlignGrad)
- .INPUT(ydiff, TensorType({DT_FLOAT}))
- .INPUT(rois, TensorType({DT_FLOAT}))
- .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
- .OUTPUT(xdiff, TensorType({DT_FLOAT}))
- .REQUIRED_ATTR(xdiff_shape, ListInt)
- .REQUIRED_ATTR(pooled_width, Int)
- .REQUIRED_ATTR(pooled_height, Int)
- .REQUIRED_ATTR(spatial_scale, Float)
- .ATTR(sample_num, Int, 2)
- .OP_END_FACTORY_REG(ROIAlignGrad)
-
- /**
- *@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator . \n
-
- *@par Inputs:
- * Three inputs, including:
- *@li features: A 5HD Tensor of type float32 or float16.
- *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
- the value "5" indicates the indexes of images where the ROIs are located,
- * "x0", "y0", "x1", and "y1".
- *@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved . \n
-
- *@par Attributes:
- *@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image.
- *@li pooled_height: A required attribute of type int32, specifying the H dimension.
- *@li pooled_width: A required attribute of type int32, specifying the W dimension.
- *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
- * the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
- *@li roi_end_mode: An optional attribute of type int32. Defaults to "1" . \n
-
- *@par Outputs:
- * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
- The axis N is the number of input ROIs. Axes H, W, and C are consistent
- * with the values of "pooled_height",
- * "pooled_width", and "features", respectively.
- */
- REG_OP(ROIAlign)
- .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
- .OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
- .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
- .REQUIRED_ATTR(spatial_scale, Float)
- .REQUIRED_ATTR(pooled_height, Int)
- .REQUIRED_ATTR(pooled_width, Int)
- .ATTR(sample_num, Int, 2)
- .ATTR(roi_end_mode, Int, 1)
- .OP_END_FACTORY_REG(ROIAlign)
-
- /**
- *@brief Performs SSD prior box detection . \n
-
- *@par Inputs:
- * Two inputs, including:
- *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
- *@li img: source image. Has the same type and format as "x" . \n
-
- *@par Attributes:
- *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
- *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
- *@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height
- is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0".
- *@li img_h: An optional int32, specifying the source image height. Defaults to "0".
- *@li img_w: An optional int32, specifying the source image width. Defaults to "0".
- *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0".
- *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. Defaults to "0.0".
- *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
- *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
- *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
- *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
-
- *@par Outputs:
- *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
-
- *@attention Constraints:
- * This operator applies only to SSD networks.
- *@see SSDDetectionOutput()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(PriorBox)
- .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
- .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
- .REQUIRED_ATTR(min_size, ListFloat)
- .REQUIRED_ATTR(max_size, ListFloat)
- .REQUIRED_ATTR(aspect_ratio, ListFloat)
- .ATTR(img_h, Int, 0)
- .ATTR(img_w, Int, 0)
- .ATTR(step_h, Float, 0.0)
- .ATTR(step_w, Float, 0.0)
- .ATTR(flip, Bool, true)
- .ATTR(clip, Bool, false)
- .ATTR(offset, Float, 0.5)
- .ATTR(variance, ListFloat, {0.1})
- .OP_END_FACTORY_REG(PriorBox);
-
- /**
- *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
-
- *@par Inputs:
- * Six inputs, including:
- *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
- *@li img: source image. Has the same type and format as "x".
- *@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
- *@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
- *@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box.
- *@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
-
- *@par Attributes:
- *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
- *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
- *@li img_h: An optional int32, specifying the height of the source image.
- *@li img_w: An optional int32, specifying the width of the source image.
- *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
- *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
- *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
- *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
- *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
- *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
-
- *@par Outputs:
- *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
-
- *@attention Constraints:
- * This operator applies only to SSD networks.
- *@see SSDDetectionOutput()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- *@par Restrictions:
- *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
- */
- REG_OP(PriorBoxD)
- .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
- .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
- .REQUIRED_ATTR(min_size, ListFloat)
- .REQUIRED_ATTR(max_size, ListFloat)
- .ATTR(img_h, Int, 0)
- .ATTR(img_w, Int, 0)
- .ATTR(step_h, Float, 0.0)
- .ATTR(step_w, Float, 0.0)
- .ATTR(flip, Bool, true)
- .ATTR(clip, Bool, false)
- .ATTR(offset, Float, 0.5)
- .ATTR(variance, ListFloat, {0.1})
- .OP_END_FACTORY_REG(PriorBoxD);
-
- /**
- *@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
-
- *@par Inputs:
- * Six inputs, including:
- *@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
- *@li img: source image. Has the same type and format as "x".
- *@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y
-
- *@par Attributes:
- *@li min_size: A required float32, specifying the minimum edge length of a square prior box.
- *@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
- *@li img_h: An optional int32, specifying the height of the source image.
- *@li img_w: An optional int32, specifying the width of the source image.
- *@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
- *@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
- *@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
- *@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
- *@li offset: An optional float32, specifying the offset. Defaults to "0.5".
- *@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
-
- *@par Outputs:
- *y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
-
- *@attention Constraints:
- * This operator applies only to SSD networks.
- *@see SSDDetectionOutput()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- *@par Restrictions:
- *Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
- */
- REG_OP(PriorBoxDV2)
- .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
- .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
- .REQUIRED_ATTR(min_size, ListFloat)
- .REQUIRED_ATTR(max_size, ListFloat)
- .ATTR(img_h, Int, 0)
- .ATTR(img_w, Int, 0)
- .ATTR(step_h, Float, 0.0)
- .ATTR(step_w, Float, 0.0)
- .ATTR(flip, Bool, true)
- .ATTR(clip, Bool, false)
- .ATTR(offset, Float, 0.5)
- .ATTR(variance, ListFloat, {0.1})
- .OP_END_FACTORY_REG(PriorBoxDV2);
-
- /**
- *@brief Performs Position Sensitive ROI Pooling . \n
-
- *@par Inputs:
- * Two inputs, including:
- *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
- * map, dimension C1 must be equal to
- * (int(output_dim+15)/C0))*group_size*group_size.
- *@li rois: A tensor of type float16 or float32, with shape
- * [batch, 5, rois_num], describing the ROIs, each ROI consists of five
- * elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
- * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
- * greater than or equal to "0.0" . \n
-
- *@par Attributes:
- *@li output_dim: A required int32, specifying the number of output channels,
- * must be greater than 0.
- *@li group_size: A required int32, specifying the number of groups to encode
- * position-sensitive score maps, must be within the range (0, 128).
- *@li spatial_scale: A required float32, scaling factor for mapping the input
- * coordinates to the ROI coordinates . \n
-
- *@par Outputs:
- *y: An NC1HWC0 tensor of type float16 or float32, describing the result
- * feature map . \n
-
- *@attention Constraints:
- * HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
- */
- REG_OP(PSROIPooling)
- .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
- .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
- .REQUIRED_ATTR(output_dim, Int)
- .REQUIRED_ATTR(group_size, Int)
- .REQUIRED_ATTR(spatial_scale, Float)
- .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
- .OP_END_FACTORY_REG(PSROIPooling)
-
- /**
- *@brief Returns detection result . \n
-
- *@par Inputs:
- * Four inputs, including:
- *@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
- *@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
- *@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
- *@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
- *@li actual_rois_num: An optional NCHW tensor of type int32, specifying the number of valid boxes per batch.
- *@par Attributes:
- *@li batch_rois: An optional int32, specifying the number of images to be predicted. Defaults to "1".
- *@li num_classes: An required int32, specifying the number of classes to be predicted. The value must be greater than 0.
- *@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0].
- *@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0).
- *@par Outputs:
- *@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8].
- * 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
- That is, take min (the maximum number of input boxes, 1024)
- *@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes . \n
-
- *@attention Constraints:
- *@li totalnum < max_rois_num * batch_rois.
- *@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
- *@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(FSRDetectionOutput)
- .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
- .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
- .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
- .INPUT(im_info, TensorType({DT_FLOAT, DT_FLOAT16}))
- .OPTIONAL_INPUT(actual_rois_num, TensorType({DT_INT32}))
- .OUTPUT(actual_bbox_num, TensorType({DT_INT32}))
- .OUTPUT(box, TensorType({DT_FLOAT, DT_FLOAT16}))
- .ATTR(batch_rois, Int, 1)
- .REQUIRED_ATTR(num_classes, Int)
- .REQUIRED_ATTR(score_threshold, Float)
- .REQUIRED_ATTR(iou_threshold, Float)
- .OP_END_FACTORY_REG(FSRDetectionOutput)
-
- /**
- *@brief Returns detection result . \n
-
- *@par Inputs:
- * Four inputs, including:
- *@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
- *@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
- *@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
- *@par Attributes:
- *@li num_classes: An optional int32, specifying the number of classes to be predicted. Defaults to "2". The value must be greater than 1 and lesser than 1025.
- *@li share_location: An optional bool, specify the shared location. Defaults to True
- *@li background_label_id: An optional int32, specify the background label id. Must be 0
- *@li iou_threshold: An optional float32, specify the nms threshold
- *@li top_k: An optional int32, specify the topk value. Defaults to 200
- *@li eta: An optional float32, specify the eta value. Defaults to 1.0
- *@li variance_encoded_in_target: An optional bool, specify whether variance encoded in target or not. Defaults to False
- *@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
- *@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
- *@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
- *@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
- *@par Outputs:
- *@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
- *@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
- * In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(SSDDetectionOutput)
- .INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
- .INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
- .INPUT(anchors, TensorType({DT_FLOAT, DT_FLOAT16}))
- .OUTPUT(out_boxnum, TensorType({DT_INT32}))
- .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
- .ATTR(num_classes, Int, 2)
- .ATTR(share_location, Bool, true)
- .ATTR(background_label_id, Int, 0)
- .ATTR(iou_threshold, Float, 0.3)
- .ATTR(top_k, Int, 200)
- .ATTR(eta, Float, 1.0)
- .ATTR(variance_encoded_in_target, Bool, false)
- .ATTR(code_type, Int, 1)
- .ATTR(keep_top_k, Int, -1)
- .ATTR(confidence_threshold, Float, 0.0)
- .OP_END_FACTORY_REG(SSDDetectionOutput)
-
- /**
- *@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
-
- *@par Inputs:
- *x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
- where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
- as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
-
- *@par Attributes:
- *@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
- *@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
- *@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
- *@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
- *@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
- *@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
- *@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
-
- *@par Outputs:
- *@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
- * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
- *@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
- * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
- *@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
- * where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
-
- *@attention Constraints:
- *@li This operator applies to YOLO v2 and v3 networks.
- *@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput.
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(Yolo)
- .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
- .ATTR(boxes, Int, 3)
- .ATTR(coords, Int, 4)
- .ATTR(classes, Int, 80)
- .ATTR(yolo_version, String, "V3")
- .ATTR(softmax, Bool, false)
- .ATTR(background, Bool, false)
- .ATTR(softmaxtree, Bool, false)
- .OP_END_FACTORY_REG(Yolo)
-
- /**
- *@brief Performs YOLO V2 detection . \n
-
- *@par Inputs:
- * Four inputs, including:
- *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput.
- * Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
- *@li img_info: A float16 or float32, describing the image information including the required image height and width
- * and the actual image height and width.
- *
- *@par Attributes:
- *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
- *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
- *@li coords: Specifies the number of coordinate parameters. Must be 4.
- *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
- *@li relative: An optional bool. Defaults to and must be "true".
- *@li obj_threshold: A required float, specifying the confidence threshold for box filtering,
- * which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
-
- *@li post_nms_topn: An optional int32. This attribute is reserved.
- *@li score_threshold: A required float, specifying the class score threshold for box filtering,
- which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
- *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
- *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
- *
- *@par Outputs:
- *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
- * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
- *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
- * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
- *
- *@attention Constraints:
- *@li This operator applies only to the YOLO v2 network.
- *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator.
- *
- *@see Yolo()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(YoloV2DetectionOutput)
- .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
- .REQUIRED_ATTR(biases, ListFloat)
- .ATTR(boxes, Int, 5)
- .ATTR(coords, Int, 4)
- .ATTR(classes, Int, 20)
- .ATTR(relative, Bool, true)
- .ATTR(obj_threshold, Float, 0.5)
- .ATTR(post_nms_topn, Int, 512)
- .ATTR(score_threshold, Float, 0.5)
- .ATTR(iou_threshold, Float, 0.45)
- .ATTR(pre_nms_topn, Int, 512)
- .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(box_out_num, TensorType({DT_INT32}))
- .OP_END_FACTORY_REG(YoloV2DetectionOutput)
-
- /**
- *@brief Performs YOLO V2 detection . \n
-
- *@par Inputs:
- *Six inputs, including:
- *@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput.
- * Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
- *@li imginfo: A float16, describing the image information including the required image height and width
- * and the actual image height and width.
- *@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs.
- * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed.
-
- *@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]].
-
- *
- *@par Attributes:
- *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
- *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
- *@li coords: Specifies the number of coordinate parameters. Must be 4.
- *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
- *@li relative: An optional bool. Defaults to and must be "true".
- *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
- *@li post_nms_topn: An optional int32. This attribute is reserved.
- *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
-
- *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
- *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
- *
- *@par Outputs:
- *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
- * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
- *@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
- * the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
- *
- *@attention Constraints:
- *@li This operator applies only to the YOLO v2 network.
- *@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \n
-
- *@see Yolo()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- *@par Restrictions:
- *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
- */
- REG_OP(YoloV2DetectionOutputD)
- .INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
- .REQUIRED_ATTR(biases, ListFloat)
- .ATTR(boxes, Int, 5)
- .ATTR(coords, Int, 4)
- .ATTR(classes, Int, 20)
- .ATTR(relative, Bool, true)
- .ATTR(obj_threshold, Float, 0.5)
- .ATTR(post_nms_topn, Int, 512)
- .ATTR(score_threshold, Float, 0.5)
- .ATTR(iou_threshold, Float, 0.45)
- .ATTR(pre_nms_topn, Int, 512)
- .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(box_out_num, TensorType({DT_INT32}))
- .OP_END_FACTORY_REG(YoloV2DetectionOutputD)
-
- /**
- *@brief Performs YOLO V3 detection . \n
-
- *@par Inputs:
- *Ten inputs, including:
- *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class".
- * There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
- *@li img_info: A float16 or float32, describing the image information including the required image height and width
- * and the actual image height and width.
-
- *@par Attributes:
- *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
- *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
- *@li coords: Specifies the number of coordinate parameters. Must be 4.
- *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
- *@li relative: An optional bool. Defaults to and must be "true".
- *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
-
- *@li post_nms_topn: An optional int32. This attribute is reserved.
- *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
-
- *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
-
- *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
-
- *@par Outputs:
- *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
- * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
- *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
- * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
-
- *@attention Constraints:
- *@li This operator applies only to the YOLO v3 network.
- *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n
-
- *@see Yolo()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(YoloV3DetectionOutput)
- .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
- .REQUIRED_ATTR(biases_low, ListFloat)
- .REQUIRED_ATTR(biases_mid, ListFloat)
- .REQUIRED_ATTR(biases_high, ListFloat)
- .ATTR(boxes, Int, 3)
- .ATTR(coords, Int, 4)
- .ATTR(classes, Int, 80)
- .ATTR(relative, Bool, true)
- .ATTR(obj_threshold, Float, 0.5)
- .ATTR(post_nms_topn, Int, 512)
- .ATTR(score_threshold, Float, 0.5)
- .ATTR(iou_threshold, Float, 0.45)
- .ATTR(pre_nms_topn, Int, 512)
- .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(box_out_num, TensorType({DT_INT32}))
- .OP_END_FACTORY_REG(YoloV3DetectionOutput)
-
- /**
- *@brief Performs YOLO V3 detection . \n
-
- *@par Inputs:
- *16 Input, including:
- *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
- * A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
- *@li imginfo: A float16, describing the image information including the required image height and width
- * and the actual image height and width.
- *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
- * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n
-
- *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs.
- * [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
- s
- *@par Attributes:
- *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
- *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
- *@li coords: Specifies the number of coordinate parameters. Must be 4.
- *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
- *@li relative: An optional bool. Defaults to and must be "true".
- *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
- *@li post_nms_topn: An optional int32. This attribute is reserved.
- *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
- *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
- *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
-
- *@par Outputs:
- *@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
- * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
- *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
- * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
-
- *@attention Constraints:
- *@li This operator applies only to the YOLO v3 network.
- *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
- *@see Yolo()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- *@par Restrictions:
- *Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
- */
- REG_OP(YoloV3DetectionOutputD)
- .INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT}))
- .REQUIRED_ATTR(biases_low, ListFloat)
- .REQUIRED_ATTR(biases_mid, ListFloat)
- .REQUIRED_ATTR(biases_high, ListFloat)
- .ATTR(boxes, Int, 3)
- .ATTR(coords, Int, 4)
- .ATTR(classes, Int, 80)
- .ATTR(relative, Bool, true)
- .ATTR(obj_threshold, Float, 0.5)
- .ATTR(post_nms_topn, Int, 512)
- .ATTR(score_threshold, Float, 0.5)
- .ATTR(iou_threshold, Float, 0.45)
- .ATTR(pre_nms_topn, Int, 512)
- .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(box_out_num, TensorType({DT_INT32}))
- .OP_END_FACTORY_REG(YoloV3DetectionOutputD)
-
- /**
- *@brief Performs YOLO V3 detection . \n
-
- *@par Inputs:
- *Ten inputs, including:
- *@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
- There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
- *@li img_info: A float16 or float32, describing the image information including the required image height and width \n
- * and the actual image height and width.
-
- *@par Attributes:
- *@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
- *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
- *@li coords: Specifies the number of coordinate parameters. Must be 4.
- *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
- *@li relative: An optional bool. Defaults to and must be "true".
- *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
-
- *@li post_nms_topn: An optional int32. This attribute is reserved.
- *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
-
- *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
-
- *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
-
- *@par Outputs:
- *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
- * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
- *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
- * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
-
- *@attention Constraints:\n
- *@li This operator applies only to the YOLO v3 network.
- *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
-
- *@see Yolo()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(YoloV3DetectionOutputV2)
- .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
- .REQUIRED_ATTR(biases, ListFloat)
- .ATTR(boxes, Int, 3)
- .ATTR(coords, Int, 4)
- .ATTR(classes, Int, 80)
- .ATTR(relative, Bool, true)
- .ATTR(obj_threshold, Float, 0.5)
- .ATTR(post_nms_topn, Int, 512)
- .ATTR(score_threshold, Float, 0.5)
- .ATTR(iou_threshold, Float, 0.45)
- .ATTR(pre_nms_topn, Int, 512)
- .ATTR(N, Int, 10)
- .ATTR(resize_origin_img_to_net, Bool, false)
- .ATTR(out_box_dim, Int, 3)
- .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(box_out_num, TensorType({DT_INT32}))
- .OP_END_FACTORY_REG(YoloV3DetectionOutputV2)
-
- /**
- *@brief Performs YOLO V3 detection.
-
- *@par Inputs:
- *16 Input, including:
- *@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
- * A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
- *@li imginfo: A float16, describing the image information including the required image height and width
- * and the actual image height and width.
- *@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
- * [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
- * is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
-
- *@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
- *@par Attributes:
- *@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
- *@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
- *@li coords: Specifies the number of coordinate parameters. Must be 4.
- *@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
- *@li relative: An optional bool. Defaults to and must be "true".
- *@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
- *@li post_nms_topn: An optional int32. This attribute is reserved.
- *@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
- *@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
- *@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
- *
- *@par Outputs:
- *@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
- * describing the information of each output box.
- * In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
- *@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
- * The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
- *
- *@attention Constraints:
- *@li This operator applies only to the YOLO v3 network.
- *@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
- *@see Yolo()
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
-
- * @par Restrictions:
- * Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
- */
- REG_OP(YoloV3DetectionOutputV2D)
- .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
- .DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
- .DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
- .REQUIRED_ATTR(biases, ListFloat)
- .ATTR(boxes, Int, 3)
- .ATTR(coords, Int, 4)
- .ATTR(classes, Int, 80)
- .ATTR(relative, Bool, true)
- .ATTR(obj_threshold, Float, 0.5)
- .ATTR(post_nms_topn, Int, 512)
- .ATTR(score_threshold, Float, 0.5)
- .ATTR(iou_threshold, Float, 0.45)
- .ATTR(pre_nms_topn, Int, 512)
- .ATTR(N, Int, 10)
- .ATTR(resize_origin_img_to_net, Bool, false)
- .ATTR(out_box_dim, Int, 3)
- .OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(box_out_num, TensorType({DT_INT32}))
- .OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)
-
- /**
- *@brief Spatial Pyramid Pooling, multi-level pooling.
- * Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height) . \n
-
- *@par Inputs:
- *x: An NCHW tensor, support float16 or float32 type . \n
-
- *@par Attributes:
- * @li pyramid_height: An required int32.
- * Multi-level pooling out from 2^0 to 2^(pyramid_height-1).
- * @li pool_method: An optional int32, pooling method: 0-MAX, 1-AVE.
- * Defaults to "0" . \n
-
- *@par Outputs:
- *y: A NCHW tensor, support float16 or float32 type . \n
-
- *@attention Constraints:
- * @li pyramid_height: pyramid_heigjt should be in range [0,7).
- * Pooling paramter should statisfied with caffe pooling param(pad<kernel).
- * @li feature_size:input feture map h and w should be [1, 510] . \n
-
- *@par Third-party framework compatibility
- * Compatible with the Caffe operator SPP.
- */
- REG_OP(SPP)
- .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
- .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
- .REQUIRED_ATTR(pyramid_height, Int)
- .ATTR(pool_method, Int, 0)
- .OP_END_FACTORY_REG(SPP)
-
- /**
- *@brief Performs Region of Interest (ROI) Pooling . \n
-
- *@par Inputs:
- * Three inputs, including:
- *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
- * map.
- *@li rois: A tensor of type float16 or float32, with shape
- * [batch, 5, roi_max_num], describing the RIOs.
- *@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying
- * the number of ROIs per batch . \n
-
- *@par Attributes:
- *@li pooled_h: A required int32, specifying the pooled H. Must be greater
- * than 0.
- *@li pooled_w: A required int32, specifying the pooled W. Must be greater
- * than 0.
- *@li spatial_scale_h: An required scaling factor for mapping the input
- * coordinates of height to the ROI coordinates.
- *@li spatial_scale_w: An required scaling factor for mapping the input
- * coordinates of width to the ROI coordinates . \n
-
- *@par Outputs:
- *y: An NC1HWC0 tensor of type float16 or float32, describing the result
- * feature map . \n
-
- *@attention Constraints:
- *@li For the feature map input:
- (1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
- (2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
- (3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
- (4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
- (5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
- (6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
- (7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
- (8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
- (9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
- (10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
- (11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
- (12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
- (13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
- (14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
- (15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
- (16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
- (17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
- (18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
- (19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
- *@par Third-party framework compatibility
- * It is a custom operator. It has no corresponding operator in Caffe.
- */
- REG_OP(ROIPooling)
- .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
- .INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
- .OPTIONAL_INPUT(roi_actual_num, TensorType({DT_INT32}))
- .REQUIRED_ATTR(pooled_h, Int)
- .REQUIRED_ATTR(pooled_w, Int)
- .REQUIRED_ATTR(spatial_scale_h, Float)
- .REQUIRED_ATTR(spatial_scale_w, Float)
- .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
- .OP_END_FACTORY_REG(ROIPooling)
-
- /**
- *@brief Computes decode bbox function.
-
- *@par Inputs:
- *Inputs include:
- * @li box_predictions: A Tensor. Must be float16.
- * @li anchors: A Tensor. Must have the same type as box_predictions.
-
- *@par Attributes:
- * @ decode_clip: required, float, threahold of decode process.
-
- *@par Outputs:
- * @ decoded_boxes: A Tensor. Must have the same type as box_predictions.
- * N-D with shape [N, 4].
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- */
- REG_OP(DecodeBbox)
- .INPUT(box_predictions, TensorType{DT_FLOAT16})
- .INPUT(anchors, TensorType{DT_FLOAT16})
- .OUTPUT(decoded_boxes, TensorType{DT_FLOAT16})
- .REQUIRED_ATTR(decode_clip, Float)
- .OP_END_FACTORY_REG(DecodeBbox)
-
- /**
- *@brief Computes ClipBoxes function . \n
-
- *@par Inputs:
- *@li boxes_input: A Tensor. Must be float16. N-D with shape [N, 4].
- *@li img_size: A Tensor. Must be int32. shape [H, W] . \n
-
- *@par Outputs:
- *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4].
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- */
- REG_OP(ClipBoxes)
- .INPUT(boxes_input, TensorType({DT_FLOAT16}))
- .INPUT(img_size, TensorType({DT_INT32}))
- .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(ClipBoxes)
-
- /**
- *@brief Computes ClipBoxesD function . \n
-
- *@par Attributes:
- *img_size: A Tensor of shape [H, W] . \n
-
- *@par Inputs:
- *boxes_input: A Tensor. Must be float16. N-D with shape [N, 4] . \n
-
- *@par Outputs:
- *boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4] . \n
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- */
- REG_OP(ClipBoxesD)
- .INPUT(boxes_input, TensorType({DT_FLOAT16}))
- .REQUIRED_ATTR(img_size, ListInt)
- .OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(ClipBoxesD)
-
- /**
- *@brief Computes Fastrcnn Predictions function.
- *
- *@par Inputs:
- *Inputs include:
- * @li rois: A Tensor. Must be float16. N-D with shape [N*C, 4].
- * @li score: A Tensor. Must be float16. N-D with shape [N, C+1].
- *
- *@par Attributes:
- * @li nms_threshold: required, float, threahold of nms process.
- * @li score_threshold: required, float, threahold of topk process.
- * @li k: required, Int, threahold of topk process.
- *@par Outputs:
- * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
- * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
- * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
- */
- REG_OP(FastrcnnPredictions)
- .INPUT(rois, TensorType({DT_FLOAT16}))
- .INPUT(score, TensorType({DT_FLOAT16}))
- .REQUIRED_ATTR(nms_threshold, Float)
- .REQUIRED_ATTR(score_threshold, Float)
- .REQUIRED_ATTR(k, Int)
- .OUTPUT(sorted_rois, TensorType({DT_FLOAT16}))
- .OUTPUT(sorted_scores, TensorType({DT_FLOAT16}))
- .OUTPUT(sorted_classes, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(FastrcnnPredictions)
-
- /**
- *@brief Computes Fastrcnn RpnProposals function . \n
-
- *@par Inputs:
- *Inputs include:
- * @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
- * @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
- * @li img_size: A Tensor. Must be int32. shape [H, W] . \n
-
- *@par Attributes:
- * @li score_threshold: required, float, threahold of topk process.
- * @li k: required, Int, threahold of topk process.
- * @li min_size: required, float, threahold of nms process.
- * @li nms_threshold: required, float, threahold of nms process.
- * @li post_nms_num: required, float, threahold of nms process.
- * @li score_filter: bool, mark of score_filter. Defaults to "true"
- * @li box_filter: bool, mark of box_filter. Defaults to "true"
- * @li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
-
- *@par Outputs:
- * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
- * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
- * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1] . \n
-
- * @par Third-party framework compatibility
- * Compatible with the TensorFlow operator Unpack.
- */
- REG_OP(RpnProposals)
- .INPUT(rois, TensorType({DT_FLOAT16}))
- .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
- .INPUT(img_size, TensorType({DT_INT32}))
- .REQUIRED_ATTR(score_threshold, Float)
- .REQUIRED_ATTR(k, Int)
- .REQUIRED_ATTR(min_size, Float)
- .REQUIRED_ATTR(nms_threshold, Float)
- .REQUIRED_ATTR(post_nms_num, Int)
- .ATTR(score_filter, Bool, true)
- .ATTR(box_filter, Bool, true)
- .ATTR(score_sigmoid, Bool, false)
- .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(RpnProposals)
-
- /**
- *@brief Computes Fastrcnn RpnProposalsD function . \n
-
- *@par Inputs:
- *@li rois: A Tensor. Must be float16. N-D with shape [N, 4].
- *@li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1] . \n
-
- *@par Attributes:
- *@li img_size: A Tensor size of image. Must be int32. shape [H, W].
- *@li score_threshold: required, float, threahold of topk process.
- *@li k: required, Int, threahold of topk process.
- *@li min_size: required, float, threahold of nms process.
- *@li nms_threshold: required, float, threahold of nms process.
- *@li post_nms_num: required, float, threahold of nms process.
- *@li score_filter: bool, mark of score_filter. Defaults to "true"
- *@li box_filter: bool, mark of box_filter. Defaults to "true"
- *@li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
-
- *@par Outputs:
- *sorted_box: A Tensor of output. Must be float16. N-D with shape [N, 1] . \n
-
- * @par Third-party framework compatibility
- * Compatible with the pytorch operator RPNProposals . \n
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- *@par Restrictions:
- *Warning: THIS FUNCTION IS DEPRECATED. Please use RpnProposals instead.
- */
- REG_OP(RpnProposalsD)
- .INPUT(rois, TensorType({DT_FLOAT16}))
- .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
- .REQUIRED_ATTR(img_size, ListInt)
- .REQUIRED_ATTR(score_threshold, Float)
- .REQUIRED_ATTR(k, Int)
- .REQUIRED_ATTR(min_size, Float)
- .REQUIRED_ATTR(nms_threshold, Float)
- .REQUIRED_ATTR(post_nms_num, Int)
- .ATTR(score_filter, Bool, true)
- .ATTR(box_filter, Bool, true)
- .ATTR(score_sigmoid, Bool, false)
- .OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(RpnProposalsD)
-
- /**
- *@brief Computes Score Filte Pre-Sort function.
-
- *@par Inputs:
- *Inputs include:
- * @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
- * @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
-
- *@par Attributes:
- * @li score_threshold: required, float, threahold of topk process.
- * @li k: required, Int, threahold of topk process.
- * @li score_filter: bool, mark of score_filter. Defaults to "true"
- * @li core_max_num: int, max number of core. Defaults to "8"
- *@par Outputs:
- * @li sorted_proposal: A Tensor. Must be float16.
- * N-D with shape [8*6002, 8].
- * @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
- */
-
- REG_OP(ScoreFiltePreSort)
- .INPUT(rois, TensorType({DT_FLOAT16}))
- .INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
- .OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
- .OUTPUT(proposal_num, TensorType({ DT_UINT32}))
- .REQUIRED_ATTR(score_threshold, Float)
- .REQUIRED_ATTR(k, Int)
- .ATTR(score_filter, Bool, true)
- .ATTR(core_max_num, Int, 8)
- .OP_END_FACTORY_REG(ScoreFiltePreSort)
-
- /**
- *@brief Computes Score Filte Pre-Sort function.
- *
- *@par Inputs:
- *Inputs include:
- * @li sorted_proposal: A Tensor. Must be float16.
- * N-D with shape [8*6002, 8].
- * @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
- *
- *@par Attributes:
- * @li min_size: required, float, threahold of nms process.
- * @li score_threshold: required, float, threahold of topk process.
- * @li k: required, Int, threahold of topk process.
- * @li min_size: required, float, threahold of nms process.
- * @li nms_threshold: required, float, threahold of nms process.
- * @li post_nms_num: required, float, threahold of nms process.
- * @li box_filter: bool, mark of box_filter. Defaults to "true"
- * @li core_max_num: int, max number of core. Defaults to "8"
- *@par Outputs:
- * @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
- * @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
- * @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
- */
- REG_OP(RpnProposalPostProcessing)
- .INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
- .INPUT(proposal_num, TensorType({DT_UINT32}))
- .OUTPUT(sorted_box, TensorType({ DT_FLOAT16}))
- .REQUIRED_ATTR(img_size, ListInt)
- .REQUIRED_ATTR(score_threshold, Float)
- .REQUIRED_ATTR(k, Int)
- .REQUIRED_ATTR(min_size, Float)
- .REQUIRED_ATTR(nms_threshold, Float)
- .REQUIRED_ATTR(post_nms_num, Int)
- .ATTR(box_filter, Bool, true)
- .ATTR(core_max_num, Int, 8)
- .OP_END_FACTORY_REG(RpnProposalPostProcessing)
- /**
- *@brief Computes DecodeBoundariesTarget function.
-
- *@par Inputs:
- *Inputs include:
- * @li boundary_predictions: A Tensor. Must be float16.
- * @li anchors: A Tensor. Must be float16.
-
- *@par Outputs:
- * @ boundary_encoded: A Tensor. Must be float16.
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- */
- REG_OP(DecodeBoundariesTarget)
- .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
- .INPUT(anchors, TensorType({DT_FLOAT16}))
- .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(DecodeBoundariesTarget)
-
- /**
- *@brief Computes DecodeCornerpointsTargetBG function.
- *
- *@par Inputs:
- *Inputs include:
- * @li keypoints_prediction: A Tensor. Must be float16.
- * @li anchors: A Tensor. Must be float16.
- *
- *@par Outputs:
- * @ keypoints_decoded: A Tensor. Must be float16.
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- */
- REG_OP(DecodeCornerpointsTargetBG)
- .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
- .INPUT(anchors, TensorType({DT_FLOAT16}))
- .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(DecodeCornerpointsTargetBG);
-
- /**
- *@brief Computes DecodeCornerpointsTargetWrtCenterV1 function.
- *
- *@par Inputs:
- *Inputs include:
- * @li keypoints_prediction: A Tensor. Must be float16.
- * @li anchors: A Tensor. Must be float16.
- *
- *@par Outputs:
- * @ keypoints_decoded: A Tensor. Must be float16.
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- */
- REG_OP(DecodeCornerpointsTargetWrtCenterV1)
- .INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
- .INPUT(anchors, TensorType({DT_FLOAT16}))
- .OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(DecodeCornerpointsTargetWrtCenterV1)
-
- /**
- *@brief Computes DecodeWheelsTarget function.
- *
- *@par Inputs:
- *Inputs include:
- * @li boundary_predictions: A Tensor. Must be float16.
- * @li anchors: A Tensor. Must be float16.
- *
- *@par Outputs:
- * @ boundary_encoded: A Tensor. Must be float16.
-
- *@par Restrictions:
- *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
- */
- REG_OP(DecodeWheelsTarget)
- .INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
- .INPUT(anchors, TensorType({DT_FLOAT16}))
- .OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
- .OP_END_FACTORY_REG(DecodeWheelsTarget)
-
- /**
- *@brief Computes nms for input boxes and score, support multiple batch and classes.
- * will do clip to window, score filter, top_k, and nms
-
- *@par Inputs:
- * Four inputs, including:
- *@li boxes: boxes, a 4D Tensor of type float16 with
- * shape (batch, num_anchors, num_classes, 4). "batch" indicates the batch size of image,
- * and "num_anchors" indicates num of boxes, and "num_classes" indicates classes of detect.
- * and the value "4" refers to "x0", "x1", "y0", and "y1".
- *@li scores: boxes, a 4D Tensor of type float16 with
- * shape (batch, num_anchors, num_classes).
- *@li clip_window: window size, a 2D Tensor of type float16 with
- * shape (batch, 4). 4" refers to "anchor_x0", "anchor_x1", "anchor_y0", and "anchor_y1".
- *@li num_valid_boxes: valid boxes number for each batch, a 1D Tensor of type int32 with
- * shape (batch,) . \n
-
- *@par Attributes:
- *@li score_threshold: A required attribute of type float32, specifying the score filter iou iou_threshold.
- *@li iou_threshold: A required attribute of type float32, specifying the nms iou iou_threshold.
- *@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
- *@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
- *@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
- *@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n
-
- *@par Outputs:
- *@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
- * specifying the output nms boxes per batch.
- *@li nmsed_scores: A 2D Tensor of type float16 with shape (batch, max_total_size),
- * specifying the output nms score per batch.
- *@li nmsed_classes: A 2D Tensor of type float16 with shape (batch, max_total_size),
- * specifying the output nms class per batch.
- *@li nmsed_num: A 1D Tensor of type int32 with shape (batch), specifying the valid num of nmsed_boxes . \n
-
- *@attention Constraints:
- * Only computation of float16 data is supported.
- */
- REG_OP(BatchMultiClassNonMaxSuppression)
- .INPUT(boxes, TensorType({DT_FLOAT16}))
- .INPUT(scores, TensorType({DT_FLOAT16}))
- .OPTIONAL_INPUT(clip_window, TensorType({DT_FLOAT16}))
- .OPTIONAL_INPUT(num_valid_boxes, TensorType({DT_INT32}))
- .OUTPUT(nmsed_boxes, TensorType({DT_FLOAT16}))
- .OUTPUT(nmsed_scores, TensorType({DT_FLOAT16}))
- .OUTPUT(nmsed_classes, TensorType({DT_FLOAT16}))
- .OUTPUT(nmsed_num, TensorType({DT_INT32}))
- .REQUIRED_ATTR(score_threshold, Float)
- .REQUIRED_ATTR(iou_threshold, Float)
- .REQUIRED_ATTR(max_size_per_class, Int)
- .REQUIRED_ATTR(max_total_size, Int)
- .ATTR(change_coordinate_frame, Bool, false)
- .ATTR(transpose_box, Bool, false)
- .OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)
-
- /**
- * @brief To absolute the bounding box . \n
-
- * @par Inputs:
- * @li normalized_boxes: A 3D Tensor of type float16 or float32.
- * @li shape_hw: A 1D Tensor of type int32 . \n
-
- * @par Attributes:
- * @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
- * "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n
-
- * @par Outputs:
- * y: A Tensor. Has the same type and shape as "normalized_boxes" . \n
-
- * @attention Constraints:
- * "normalized_boxes"'s shape must be (batch,num,4) or (batch,4,num).
- * "shape_hw"'s shape must be (4,)
- */
- REG_OP(ToAbsoluteBBox)
- .INPUT(normalized_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(shape_hw, TensorType({DT_INT32}))
- .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
- .ATTR(reversed_box, Bool, false)
- .OP_END_FACTORY_REG(ToAbsoluteBBox)
-
- /**
- *@brief Computes Normalize bbox function.
- *
- *@par Inputs:
- *Inputs include:
- * @li boxes: A Tensor. Must be float16 or float32.
- * @li shape_hw: A Tensor. Must be int32.
- *
- *@par Attributes:
- * reversed_box: optional, bool. Defaults to "False"
- *
- *@par Outputs:
- * y: A Tensor. Must have the same type and shape as boxes.
- */
- REG_OP(NormalizeBBox)
- .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
- .INPUT(shape_hw, TensorType({DT_INT32}))
- .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
- .ATTR(reversed_box, Bool, false)
- .OP_END_FACTORY_REG(NormalizeBBox)
-
- /**
- *@brief Computes decode bboxv2 function.
- *
- *@par Inputs:
- *Inputs include:
- * @li boxes: A Tensor. Must be float16 or float32.
- * @li anchors: A Tensor. Must be int32.
- *
- *@par Attributes:
- * @li scales: optional, listfloat, .
- * @li decode_clip: optional, float, threahold of decode process.
- * @li reversed_boxes: optional, bool,.
- *
- *@par Outputs:
- * y: A Tensor. Must have the same type as box_predictions.
- */
- REG_OP(DecodeBboxV2)
- .INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT}))
- .INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT}))
- .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
- .ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
- .ATTR(decode_clip, Float, 0.0)
- .ATTR(reversed_box, Bool, false)
- .OP_END_FACTORY_REG(DecodeBboxV2)
-
- /**
- *@brief Computes sort function.
- *
- *@par Inputs:
- *Inputs include:
- * x: A Tensor. Must be float16 or float32.
- *
- *@par Attributes:
- * @li axis: optional, int.
- * @li descending: optional,bool.
- *
- *@par Outputs:
- * @li y1: A Tensor. Must have the same type as x.
- * @li y2: A Tensor. Indices of y1 in x.Dtype must be int32.
- */
- REG_OP(Sort)
- .INPUT(x, TensorType({ DT_FLOAT16 }))
- .OUTPUT(y1, TensorType({ DT_FLOAT16 }))
- .OUTPUT(y2, TensorType({ DT_INT32 }))
- .ATTR(axis, Int, -1)
- .ATTR(descending, Bool, false)
- .OP_END_FACTORY_REG(Sort)
-
- } // namespace ge
-
- #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
|