From ec3d7db8441df09cd0311b9800e60852543dbfc0 Mon Sep 17 00:00:00 2001 From: majorzhang Date: Tue, 28 Jun 2022 15:55:25 +0800 Subject: [PATCH] upgrade Ascend package 28 Jun 22 --- inc/external/acl/acl_op_compiler.h | 28 ++ metadef | 2 +- third_party/fwkacllib/inc/ops/array_ops.h | 33 +++ third_party/fwkacllib/inc/ops/cluster.h | 14 +- .../inc/ops/elewise_calculation_ops.h | 228 ++++++++-------- .../inc/ops/matrix_calculation_ops.h | 245 ++++++++++------- .../fwkacllib/inc/ops/nn_batch_norm_ops.h | 27 +- .../fwkacllib/inc/ops/nn_calculation_ops.h | 246 +++++++++--------- third_party/fwkacllib/inc/ops/nn_norm_ops.h | 54 +++- third_party/fwkacllib/inc/ops/nn_ops.h | 20 +- .../fwkacllib/inc/ops/nn_pooling_ops.h | 245 ++++++++--------- .../fwkacllib/inc/ops/nn_training_ops.h | 59 ++--- .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 33 +-- third_party/fwkacllib/inc/ops/quantize_ops.h | 6 +- third_party/fwkacllib/inc/ops/reduce_ops.h | 61 +++-- third_party/fwkacllib/inc/ops/selection_ops.h | 152 +++++------ 16 files changed, 842 insertions(+), 611 deletions(-) diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h index 9de0ee85..a0a3f786 100644 --- a/inc/external/acl/acl_op_compiler.h +++ b/inc/external/acl/acl_op_compiler.h @@ -92,6 +92,34 @@ ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute( int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr, aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream); +/** + * @ingroup AscendCL + * @brief compile and execute op + * + * @param opType [IN] op type + * @param numInputs [IN] number of inputs + * @param inputDesc [IN] pointer to array of input tensor descriptions + * @param inputs [IN] pointer to array of input buffers + * @param numOutputs [IN] number of outputs + * @param outputDesc [IN|OUT] pointer to array of output tensor descriptions + * @param outputs [IN] pointer to array of outputs buffers + * @param attr [IN] pointer to instance of aclopAttr. + * may pass nullptr if the op has no attribute + * @param engineType [IN] engine type + * @param compileFlag [IN] compile flag + * @param opPath [IN] path of op + * @param stream [IN] stream handle + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclopCompileAndExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[], + aclDataBuffer *inputs[], int numOutputs, + aclTensorDesc *outputDesc[], aclDataBuffer *outputs[], + aclopAttr *attr, aclopEngineType engineType, + aclopCompileType compileFlag, const char *opPath, + aclrtStream stream); + /** * @ingroup AscendCL * @brief set compile option diff --git a/metadef b/metadef index 2d98a178..175dce71 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 2d98a17884e656a2446239cdb9cee79543cb0161 +Subproject commit 175dce710e744666c6204540857634f362aafd61 diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 924f98e4..17ab4322 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -1583,6 +1583,39 @@ REG_OP(UniqueConsecutive) .ATTR(return_counts, Bool, false) .ATTR(axis, Int, 1000) .OP_END_FACTORY_REG(UniqueConsecutive) + +/** +* @brief Decodes a variant Tensor into a RaggedTensor. \n +* +* @par Input: +* @li encoded_ragged: A Tensor of type variant. A variant Tensor containing encoded RaggedTensors. \n +* +* @par Outputs: +* @li output_nested_splits: A list of output_ragged_rank Tensor objects with type int32 or int64. +* @li output_dense_values: A Tensor, which must be one of the following types: +* double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool. \n +* +* @par Attributes: +* @li input_ragged_rank: An int that is >= -1. The ragged rank of each encoded RaggedTensor component in the input. +* If set to -1, this is inferred as output_n - rank(encoded_ragged). +* @li output_ragged_rank: An int that is >= 0. The expected ragged rank of the output RaggedTensor. +* The following must hold: output_n = rank(encoded_ragged) + input_n. +* @li Tvalues: The data type of output_dense_values. +* @li Tsplits: The data type of output_nested_splits. An optional DType of "int32, int64". Defaults to `int64`. \n +* +* @par Third-party framework compatibility. +* Compatible with tensorflow RaggedTensorFromVariant operator. +*/ +REG_OP(RaggedTensorFromVariant) + .INPUT(encoded_ragged, TensorType({DT_VARIANT})) + .DYNAMIC_OUTPUT(output_nested_splits, TensorType({DT_INT32, DT_INT64})) + .OUTPUT(output_dense_values, TensorType::BasicType()) + .REQUIRED_ATTR(input_ragged_rank, Int) + .REQUIRED_ATTR(output_ragged_rank, Int) + .REQUIRED_ATTR(Tvalues, Type) + .ATTR(Tsplits, Type, DT_INT64) + .OP_END_FACTORY_REG(RaggedTensorFromVariant) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h index 19b4ea05..6e41e569 100644 --- a/third_party/fwkacllib/inc/ops/cluster.h +++ b/third_party/fwkacllib/inc/ops/cluster.h @@ -29,19 +29,19 @@ namespace ge { * @brief Perform k-means clustering on a data matrix. \n * @par Inputs: -* Three required inputs and one optional inputs, including: \n -* @li x: A 2D tensor of data type float32. \n -* @li y: A 2D tensor of data type float32. \n -* @li sum_square_x: An optional 2D tensor of data type float32. \n +* Three required inputs and one optional inputs, including: +* @li x: A 2D tensor of data type float32. +* @li y: A 2D tensor of data type float32. +* @li sum_square_x: An optional 2D tensor of data type float32. * @li sum_square_y: A 2D tensor of data type float32. \n * @par Attributes: * use_actual_distance: Indicates whether to calculate the complete distance. \n * @par Outputs: -* @li segment_sum: A tensor of data type float32. \n -* @li segment_count: A tensor of data type float32. \n -* @li k_mean_total_sum: A tensor of data type float32. \n +* @li segment_sum: A tensor of data type float32. +* @li segment_count: A tensor of data type float32. +* @li k_mean_total_sum: A tensor of data type float32. */ REG_OP(KMeansCentroids) .INPUT(x, TensorType({DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 58650670..29cfa4f5 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -48,29 +48,29 @@ REG_OP(AddN) .OP_END_FACTORY_REG(AddN) /** -*@brief Calculates the reversed outputs of the function "maximum" +*@brief Calculates the reversed outputs of the function "maximum". *@par Inputs: -*Three inputs, including: -* @li grads: A mutable Tensor. Must be one of the following types: -* float16, float32, int32. -* @li x1: A mutable Tensor of the same type as "grads". -* @li x2: A mutable Tensor of the same type as "grads". \n +* Three inputs, including: +*@li grads: A mutable Tensor. Must be one of the following types: +* float16, float32, int32. +*@li x1: A mutable Tensor of the same type as "grads". +*@li x2: A mutable Tensor of the same type as "grads". \n *@par Attributes: *@li grad_x: An optional bool. Defaults to "True". -* If "True", "y1" will be output. -* If "False", "y1" will not be output. \n +* If "True", "y1" will be output. +* If "False", "y1" will not be output. \n *@li grad_y: An optional bool. Defaults to "True". -* If "True", "y2" will be output. -* If "False", "y2" will not be output. \n +* If "True", "y2" will be output. +* If "False", "y2" will not be output. \n *@par Outputs: -* @li y1: A mutable Tensor. Has the same type as "grads". -* @li y2: A mutable Tensor. Has the same type as "grads". \n +*@li y1: A mutable Tensor. Has the same type as "grads". +*@li y2: A mutable Tensor. Has the same type as "grads". \n -*@par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator MaximumGrad. */ REG_OP(MaximumGrad) @@ -84,29 +84,29 @@ REG_OP(MaximumGrad) .OP_END_FACTORY_REG(MaximumGrad) /** -*@brief Calculates the reversed outputs of the function "minimum" +*@brief Calculates the reversed outputs of the function "minimum". *@par Inputs: -*Three inputs, including: -* @li grads: A mutable Tensor. Must be one of the following types: -* float16, float32, int32. -* @li x1: A mutable Tensor of the same type as "grads". -* @li x2: A mutable Tensor of the same type as "grads". \n +* Three inputs, including: +*@li grads: A mutable Tensor. Must be one of the following types: +* float16, float32, int32. +*@li x1: A mutable Tensor of the same type as "grads". +*@li x2: A mutable Tensor of the same type as "grads". \n *@par Attributes: *@li grad_x: An optional bool. Defaults to "True". -* If "True", "y1" will be output. -* If "False", "y1" will not be output. \n +* If "True", "y1" will be output. +* If "False", "y1" will not be output. \n *@li grad_y: An optional bool. Defaults to "True". -* If "True", "y2" will be output. -* If "False", "y2" will not be output. \n +* If "True", "y2" will be output. +* If "False", "y2" will not be output. \n *@par Outputs: -* @li y1: A mutable Tensor. Has the same type as "grads". -* @li y2: A mutable Tensor. Has the same type as "grads". \n +*@li y1: A mutable Tensor. Has the same type as "grads". +*@li y2: A mutable Tensor. Has the same type as "grads". \n -*@par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator MinimumGrad. */ REG_OP(MinimumGrad) @@ -552,15 +552,16 @@ REG_OP(Expint) .OP_END_FACTORY_REG(Expint) /** -*@brief: Computes the reciprocal of "x". \n +*@brief: Computes the reciprocal of "x". -*@par Inputs:\n -*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n +*@par Inputs: +*x: A Tensor. Must be one of the following types: float16, float32, +* int32, int64, double, complex64, complex128. \n *@par Outputs: -*y: A Tensor. Has the same type as "x". \n +*y: A Tensor. Must be one of the following type: float16, float32, int32. \n -*@par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator Inv. */ REG_OP(Inv) @@ -569,18 +570,19 @@ REG_OP(Inv) .OP_END_FACTORY_REG(Inv) /** -*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", and "dy" - is the corresponding input gradient. \n +*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", +* and "dy" is the corresponding input gradient. *@par Inputs: * Two inputs, including: -* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8. -* @li grad: A Tensor. Has the same type as "x". \n +*@li x: A Tensor. Must be one of the following types: float16, float32, +* int32, int8. +*@li grad: A Tensor. Has the same type as "x". \n *@par Outputs: *y: A Tensor, Has the same type as "x". \n -*@par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator InvGrad. */ REG_OP(InvGrad) @@ -633,25 +635,27 @@ REG_OP(Log1p) /** *@brief Returns element-wise remainder of division. + *@par Inputs: -*Two inputs, including: -* @li x1: A Tensor. Must be one of the following types: float16, float32, - * int32, int64, int8, uint8, double. -* @li x2: A Tensor of the same type as "x1". \n +* Two inputs, including: +*@li x1: A Tensor. Must be one of the following types: float16, float32, +* int32, int64, int8, uint8, double. +*@li x2: A Tensor of the same type as "x1". \n *@par Outputs: -*y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@attention Constraints: -*@li x2: The input data does not support 0 +*@li x2: The input data does not support 0. *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the -*requirement of double thousandths in the mini form +* requirement of double thousandths in the mini form. *@li Due to different architectures, the calculation results of this operator -*on NPU and CPU may be inconsistent -*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 +* on NPU and CPU may be inconsistent. +*@li If shape is expressed as (D1,D2... ,Dn), +* then D1*D2... *DN<=1000000,n<=8. \n -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator Mod. +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator Mod. */ REG_OP(Mod) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, @@ -663,18 +667,18 @@ REG_OP(Mod) .OP_END_FACTORY_REG(Mod) /** -*@brief: Returns the truth value of (x != y) element-wise. \n +*@brief Returns the truth value of (x != y) element-wise. *@par Inputs: * Two inputs, including: *@li x1: A Tensor. Must be one of the following types: float16, float32, int32, - * int8, uint8, double, int16, int64, uint16, half, uint32, uint64 +* int8, uint8, double, int16, int64, uint16, half, uint32, uint64. *@li x2: A Tensor of the same type as "x1". \n *@par Outputs: *y: A Tensor of type bool. \n -*@par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator NotEqual. */ REG_OP(NotEqual) @@ -684,16 +688,17 @@ REG_OP(NotEqual) .OP_END_FACTORY_REG(NotEqual) /** -* @brief Computes ndtri element-wise (y = sqrt(2) * erfinv(2 * x - 1)) +*@brief Computes ndtri element-wise (y = sqrt(2) * erfinv(2 * x - 1)). -* @par Inputs: -* One input: -* x: A Tensor. Must be one of the following types: bfloat16, float16, float32, double \n +*@par Inputs: +* One input, including: \n +*x: A Tensor. Must be one of the following types: bfloat16, float16, +* float32, double. \n -* @par Outputs: -* y: A Tensor. Has the same type and format as input "x". \n +*@par Outputs: +*y: A Tensor. Has the same type and format as input "x". \n -* @par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator Ndtri. */ REG_OP(Ndtri) @@ -721,13 +726,12 @@ REG_OP(Neg) .OP_END_FACTORY_REG(Neg) /** -*@brief Returns x1/x2 element-wise for integer types. \n +*@brief Returns x1/x2 element-wise for integer types. *@par Inputs: *@li x1: A Tensor. Must be one of the following types: -* float32, float64, int32, uint8, int16, int8, -* complex64, int64, qint8, quint8, qint32, uint16, -* complex128, float16, uint32, uint64, complex64, complex128. +* float32, float16, int8, uint8, int32, int16, +* uint16, double, int64, complex64, complex128. *@li x2: A Tensor of the same data type as "x1". \n *@par Outputs: @@ -778,7 +782,7 @@ REG_OP(Xdivy) /** * @brief Computes "x" multiplied by the logarithm of y element-wise, -* if "x" == 0, return "0". \n +* if "x" == 0, return "0". * @par Inputs: * Two inputs, including: @@ -803,7 +807,7 @@ REG_OP(Xlog1py) /** *@brief Computes "x" multiplied by the logarithm of y element-wise, -* if "x" == 0, return "0". \n +* if "x" == 0, return "0". *@par Inputs: * Two inputs, including: @@ -1032,7 +1036,7 @@ REG_OP(LogicalOr) .OP_END_FACTORY_REG(LogicalOr) /** -* @brief Computes spence of x element-wise. \n +* @brief Computes spence of x element-wise. * * @par Inputs: @@ -1423,7 +1427,7 @@ REG_OP(RsqrtGrad) .OP_END_FACTORY_REG(RsqrtGrad) /** -*@brief Computes hyperbolic sine of "x" element-wise. \n +*@brief Computes hyperbolic sine of "x" element-wise. *@par Inputs: *x: An NCHW, NHWC,or ND Tensor of type float, double, complex64, @@ -1509,18 +1513,18 @@ REG_OP(DivNoNan) .OP_END_FACTORY_REG(DivNoNan) /** -*@brief Reverses specific dimensions of a tensor. \n +*@brief Reverses specific dimensions of a tensor. *@par Inputs: * One input: \n *x: A Tensor, Must be one of the following types: -* int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64, -* and format can be [NCHW,NHWC,ND] +* int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64, +* and format can be [NCHW,NHWC,ND]. \n *@par Outputs: -*y: A Tensor. Has the same type and format as "x" +*y: A Tensor. Has the same type and format as "x". \n -*@par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator Invert. */ REG_OP(Invert) @@ -1768,16 +1772,16 @@ REG_OP(Atan2) .OP_END_FACTORY_REG(Atan2) /** -* @brief Computes fresnel_cos of x element-wise. \n - +*@brief Computes fresnel_cos of x element-wise. * -* @par Inputs: -* x: A tensor. Must be one of the following types: bfloat16, float16, float32, double. +*@par Inputs: +*x: A tensor. Must be one of the following types: bfloat16, float16, float32, +* double. \n * -* @par Outputs: -* y: A tensor. Has the same type as "x". +*@par Outputs: +*y: A tensor. Has the same type as "x". \n * -* @par Third-party framework compatibility +*@par Third-party framework compatibility * Compatible with the TensorFlow operator FresnelCos. * */ @@ -1787,16 +1791,17 @@ REG_OP(FresnelCos) .OP_END_FACTORY_REG(FresnelCos) /** -* @brief Computes fresnel_sin of x element-wise. \n +*@brief Computes fresnel_sin of x element-wise. * -* @par Inputs: -* x: A tensor. Must be one of the following types: bfloat16, float16, float32, double. +*@par Inputs: +*x: A tensor. Must be one of the following types: bfloat16, float16, float32, +* double. \n * -* @par Outputs: -* y: A tensor. Has the same type as "x". +*@par Outputs: +*y: A tensor. Has the same type as "x". \n * -* @par Third-party framework compatibility +*@par Third-party framework compatibility: * Compatible with the TensorFlow operator FresnelSin. * */ @@ -2312,7 +2317,7 @@ REG_OP(Sin) .OP_END_FACTORY_REG(Sin) /** -*@brief: Computes tan of "x" element-wise. \n +*@brief: Computes tan of "x" element-wise. *@par Inputs: *One input: @@ -2332,7 +2337,7 @@ REG_OP(Tan) .OP_END_FACTORY_REG(Tan) /** -*@brief Returns element-wise remainder of division. \n +*@brief Returns element-wise remainder of division. *@par Inputs: *Two inputs, including: @@ -2352,7 +2357,7 @@ REG_OP(Tan) *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8 *@par Third-party framework compatibility -*@li Compatible with the TensorFlow operator TruncateMod. +*Compatible with the TensorFlow operator TruncateMod. */ REG_OP(TruncateMod) .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64, @@ -3133,7 +3138,7 @@ REG_OP(SquareSumV2) .OP_END_FACTORY_REG(SquareSumV2) /** -*@brief Confuse reducesumd and square. \n +*@brief Confuse reducesumd and square. *@par Inputs: *x: A Tensor of type float16, float32. \n @@ -3178,19 +3183,20 @@ REG_OP(SquareSumAll) .OP_END_FACTORY_REG(SquareSumAll) /** -*@brief Confuse broadcast, addn and mul. \n +*@brief Confuse broadcast, addn and mul. *@par Inputs: *Three inputs, including: -* @li x1: A Tensor. Must be one of the following types:int32, int16, float16, float32. -* @li x2: A Tensor of the same type as "x1". -* @li x3: A Tensor of the same type as "x1". \n +*@li x1: A Tensor. Must be one of the following types:int32, int16, +* float16, float32. +*@li x2: A Tensor of the same type as "x1". +*@li x3: A Tensor of the same type as "x1". \n *@par Outputs: -* y: A Tensor. Has the same type as "x1". +*y: A Tensor. Has the same type as "x1". \n *@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulAddN) .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16})) @@ -3200,7 +3206,7 @@ REG_OP(FusedMulAddN) .OP_END_FACTORY_REG(FusedMulAddN) /** -*@brief Add 'bias' to 'x'. \n +*@brief Add 'bias' to 'x'. *@par Inputs: * Two inputs, including: @@ -3209,22 +3215,31 @@ REG_OP(FusedMulAddN) *@par Attributes: *@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1". -*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1". -*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n +*@li num_axes: An optional int32 used to compute the shape of +* bias input from a Caffe model trained offline. Defaults to "1". +*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. +* If "false", bias is input from online bottoms. Defaults to "true". \n *@par Outputs: *y: An ND tensor of type float16 or float32. \n -*@attention Constraints:\n +*@attention Constraints: * Assume that the shape length of "x" is "n" and that of "bias" is "m". *@li "axis" is within the range [-n, n-1]. num_axes >= -1. -*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n +*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", +* the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis). * If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis). *@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1. -*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n -* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). -*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n -* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). +*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", +* "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and +* the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes). +* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and +* the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes). +*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0", +* "axis + m" must be less than or equal to "n" and the ith axis of "bias" and +* the (i+"axis")th axis of "x" must have the same size (0 <= i < m). +* If "axis < 0", "n + axis + m" must be less than or equal to "n" and +* the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). \n *@par Third-party framework compatibility * Compatible with the Caffe operator Bias. */ @@ -3405,11 +3420,14 @@ REG_OP(Fills) .OP_END_FACTORY_REG(Adds) /** -*@brief Computes the product of x and y and returns 0 if the y is zero, even if x is NaN or infinite. \n +*@brief Computes the product of x and y and returns 0 if the y is zero, +* even if x is NaN or infinite. *@par Inputs: -* @li x1: A Tensor. Must be one of the following types:float16, float32, double, complex64, complex128. -* @li x2: A Tensor. Has the same type and shape as "x1". \n +* Two inputs, including: \n +*@li x1: A Tensor. Must be one of the following types:float16, float32, +* double, complex64, complex128. +*@li x2: A Tensor. Has the same type and shape as "x1". \n *@par Outputs: *y: A Tensor. Has the same type and shape as "x1". \n diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 38e22be8..61336fb0 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -52,8 +52,8 @@ namespace ge { REG_OP(AttentionQKVGradW) .INPUT(x, TensorType({DT_FLOAT16})) .INPUT(query_dx, TensorType({DT_FLOAT16})) - .INPUT(key_dw, TensorType({DT_FLOAT16})) - .INPUT(value_dw, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(key_dw, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(value_dw, TensorType({DT_FLOAT16})) .OUTPUT(dw_query, TensorType({DT_FLOAT16})) .OUTPUT(dw_key, TensorType({DT_FLOAT16})) .OUTPUT(dw_value, TensorType({DT_FLOAT16})) @@ -199,24 +199,25 @@ REG_OP(SwinTransformerLnQKV) .OP_END_FACTORY_REG(SwinTransformerLnQKV) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n - +*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n *@par Inputs: *Three inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC]. +* float32, int32, bfloat16. Has format [ND, NHWC]. * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC]. +* float32, int32, bfloat16. Has format [ND, NHWC]. * @li bias: A optional 1D Tensor. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC] . \n +* float32, int32, bfloat16. Has format [ND, NHWC]. \n *@par Attributes: -*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M]. -*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n +*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to +* [K, M]. +*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to +* [K, M]. \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float16, -* float32, int32. Has format [ND, NHWC] . \n +* float32, int32, bfloat16. Has format [ND, NHWC]. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -231,35 +232,34 @@ REG_OP(MatMul) .OP_END_FACTORY_REG(MatMul) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n - +*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n *@par Inputs: *Four inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: float32, - float16, int32, int8. Has format [ND, NHWC]. +* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC]. * @li x2: A matrix Tensor. 2D. Must be one of the following types: float32, - float16, int32, int8. Has format [ND, NHWC]. +* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC]. * @li bias: A 1D Tensor. Must be one of the following types: float32, - float16, int32. Has format [ND, NHWC]. +* float16, int32 bfloat16. Has format [ND, NHWC]. * @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8. - Reserved. \n +* Reserved. \n *@par Attributes: * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to - [M, K]. +* [M, K]. * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to [K, N]. * @li offset_x: An optional integer for quantized MatMulV2. * The negative offset added to the input x1 for int8 type. Ensure offset_x - within the effective range of int8 [-128, 127]. Defaults to "0". \n +* within the effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: float32, - float16, int32. Has format [ND, NHWC]. \n +* float16, int32, bfloat16. Has format [ND, NHWC]. \n *@attention Constraints: * if performances better in format NZ, please close - "MatmulTransdataFusionPass" in fusion configuration. \n +* "MatmulTransdataFusionPass" in fusion configuration. \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -276,26 +276,25 @@ REG_OP(MatMulV2) .OP_END_FACTORY_REG(MatMulV2) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n - +*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n *@par Inputs: *Five inputs, including: * @li x1: A matrix Tensor. 2D. Must be one of the following types: int8. * @li x2: A matrix Tensor. 2D. Must be one of the following types: int8. * @li compress_index: A compress index matrix of type int8. * @li bias: An optional Tensor. 1D. Must be one of the following types: int32, - float16. +* float16. * @li offset_w: An optional matrix Tensor. 2D. Must be one of the following - types: int8. \n +* types: int8. \n *@par Attributes: *@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to - [M, K]. +* [M, K]. *@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to - [K, N]. +* [K, N]. *@li offset_x: An optional integer for quantized MatMulV2Compress. *The negative offset added to the input x1 for int8 type. Ensure offset_x - within the effective range of int8 [-128, 127]. Defaults to "0". \n +* within the effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: *y: The result matrix Tensor. 2D. Must be one of the following types: int32, @@ -303,7 +302,7 @@ REG_OP(MatMulV2) *@attention Constraints: * if performances better in format NZ, please close - "MatmulTransdataFusionPass" in fusion configuration. +* "MatmulTransdataFusionPass" in fusion configuration. */ REG_OP(MatMulV2Compress) @@ -319,29 +318,29 @@ REG_OP(MatMulV2Compress) .OP_END_FACTORY_REG(MatMulV2Compress) /** -*@brief Performs Matrix-to-matrix Multiply, producing y=alpha[0]*a*b+beta[0]*c . \n - +*@brief Performs Matrix-to-matrix Multiply, +* producing y=alpha[0]*a*b+beta[0]*c. \n *@attention Constraints: * For better performance, The k-axis must be aligned to 16 (input type * is float16) or 32 (input type is int8). \n *@par Inputs: *Five inputs, including: -*@li a: A matrix Tensor. Must be one of the following types: float16, int8. -* Has format [ND]. -*@li b: A matrix Tensor. Must be one of the following types: float16, int8. -* Has format ND. -*@li c: A matrix Tensor. Must be one of the following types: float16, int32, -* float32. has format ND. -*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following -* types: float16, int32, float32. Has format [ND]. +* @li a: A matrix Tensor. Must be one of the following types:float32, float16, +* int8, int32. Has format ND. +* @li b: A matrix Tensor. Must be one of the following types:float32, float16, +* int8, int32. Has format ND. +*@li c: A matrix Tensor. Must be one of the following types:float32, float16, +* int8, int32. Has format ND. +* @li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the +* following types: float16, int32, float32, int8. Has format ND. *@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following -* types: float16, int32, float32. Has format [ND]. +* types: float16, int32, float32, int8. Has format ND.\n * The format of a, b, c has restriction:\n * When type of a is int8 and type of c is int32, the format of a, b, c should * all be ND.\n -* When type of a is int8 and type of c is float32, the format of a, b, c should -* all be ND.\n +* When type of a is int8 and type of c is float32, the format of a, b, c +* should all be ND.\n * When type of a is float16 and type of c is float16, the format of a, b, c * should all be ND.\n * When type of a is float16 and type of c is float32, the format of a, b, c @@ -352,7 +351,7 @@ REG_OP(MatMulV2Compress) *@li transpose_a: Optional. A bool. If True, changes the shape of "a" from * [M, K] to [K, M]. *@li transpose_b: Optional. A bool. If True, changes the shape of "b" from -* [K, N] to [N, K] . \n +* [K, N] to [N, K]. \n *@par Outputs: *y: The result matrix Tensor. Must be one of the following types: float16, @@ -371,22 +370,25 @@ REG_OP(GEMM) .OP_END_FACTORY_REG(GEMM) /** -*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n - +*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n *@par Inputs: *Two inputs, including: * @li x1: A matrix Tensor. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC]. +* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC]. * @li x2: A matrix Tensor. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC] . \n +* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC]. \n *@par Attributes: -*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. -*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n +*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] +* to [B, K, M]. +*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] +* to [B, K, M]. \n *@par Outputs: -*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n +* y: The result matrix Tensor. 2D or higher. Must be one of the following +* types: float16, bfloat16, +* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape +* length as "x1" and "x2". \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -403,27 +405,33 @@ REG_OP(BatchMatMul) /** * @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n - * @par Inputs: * Three inputs, including: * @li x1: A matrix Tensor. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC]. +* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC]. * @li x2: A matrix Tensor. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC] . \n -* @li bias: A matrix Tensor. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC] . \n +* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC]. +* @li bias: A optional Tensor. Must be one of the following types: +* float16, +* float32, int32, int8, int4, bfloat16. Has format [ND, NHWC]. +* @li offset_w: A optional Tensor. Must be one of the following types: +* int8, int4. Has format [ND, NHWC]. \n * @par Attributes: -* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M]. -* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n +* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to +* [B, K, M]. +* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to +* [B, K, M]. \n * @par Outputs: -* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16, -* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n +* y: The result matrix Tensor. 2D or higher. Must be one of the following +* types: float16, +* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape +* length as "x1" and "x2". \n *@attention Constraints: * if performances better in format NZ, please close - "MatmulTransdataFusionPass" in fusion configuration. \n +* "MatmulTransdataFusionPass" in fusion configuration. \n * @par Third-party framework compatibility * Compatible with the TensorFlow operator BatchMatmul. @@ -1025,29 +1033,33 @@ REG_OP(DiagPart) .OP_END_FACTORY_REG(DiagPart) /** -*@brief Also known as a "fully-connected" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n - +*@brief Also known as a "fully-connected" layer, computes an inner product +* with a set of learned weights, and (optionally) adds biases. \n *@par Inputs: * Four inputs, including: -*@li x: A Tensor of type float16, int8. -*@li w: A weight matrix of type float16, int8. -*@li b: An optional Tensor of type float16, int32, float32. -*@li offset_w: An optional Tensor of type int8. Reserved. Only None Supported. \n +*@li x: A Tensor of type float16, int8, int4, float32, bfloat16. +*@li w: A weight matrix of type float16, int8, int4, float32, bfloat16. +*@li b: An optional Tensor of type float16, int8, int4, float32, bfloat16. +*@li offset_w: An optional Tensor of type int8, int4. +* Reserved. Only None Supported. \n *@par Attributes: *@li num_output: Required. An int, output neuron number. Reserved. -*@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false". -*@li axis: Optional. An int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1. -* The product of the subsequent dimensions starting form first dimension or the second dimension is "K". +*@li transpose: A bool, specifying weight whether to transpose input w, +* either "true" or "false". Defaults to "false". +*@li axis: Optional. An int, 1 or 2, specifying which dimension the input +* "K" starts from. Defaults to 1. +* The product of the subsequent dimensions starting form first dimension +* or the second dimension is "K". *@li offset_x: An optional integer for quantized FullyConnection. -*The negative offset added to the input image for int8 type. Ensure offset_x within the -*effective range of int8 [-128, 127]. Defaults to "0". \n +*The negative offset added to the input image for int8 type. Ensure offset_x +* within the effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: -*y: The result tensor of type float16, int32, float32 . \n +*y: The result tensor of type float16, int32, float32, bfloat16. \n *@par Third-party framework compatibility -* Compatible with the Caffe operator InnerProduct . \n +* Compatible with the Caffe operator InnerProduct. \n *@par Quantization supported or not * Yes @@ -1066,27 +1078,26 @@ REG_OP(FullyConnection) /** *@brief Also known as a "fully-connected-compress" layer, computes an inner -product with a set of learned weights, and (optionally) adds biases . \n - +* product with a set of learned weights, and (optionally) adds biases. \n *@par Inputs: * Five inputs, including: *@li x: A Tensor of type uint8, int8. *@li w: A weight matrix of type int8. *@li compress_index: A compress index matrix of type int8. -*@li b: A Tensor of type int32. -*@li offset_w: A Tensor of type int8. +*@li b: A optional Tensor of type int32. +*@li offset_w: A optional Tensor of type int8. *@par Attributes: *@li num_output: A int, specifying the number of outputs. *@li transpose: A bool, specifying whether to transpose input w, either "true" - or "false". Defaults to "false". +* or "false". Defaults to "false". *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" -starts from. Defaults to "1". -* The product of the subsequent dimensions starting form first dimension or the -second dimension is "K". +* starts from. Defaults to "1". +*The product of the subsequent dimensions starting form first dimension or the +* second dimension is "K". *@li offset_x: An optional integer for quantized FullyConnectionCompress. *The negative offset added to the input image for int8 type. Ensure offset_x -within the effective range of int8 [-128, 127]. Defaults to "0". \n +* within the effective range of int8 [-128, 127]. Defaults to "0". \n *@par Outputs: *y: The result tensor of type int32. \n @@ -1520,21 +1531,18 @@ REG_OP(Tril) /** *@brief Concatenates a list of N tensors along the first dimension. *@par Inputs: -* Two inputs, including: -* @li values: A list of Tensors. Must be one of the following types: int32, float16, float32. -* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. -* It's a dynamic input. -* @li shape: A Tensor of the same type as "x". -* The final shape of the result. Should be equal to the shapes of any input -* but with the number of input values in the first dimension . \n +* @li x: A list of Tensors. Must be one of the following types: int32, +* float16, float32. Tensors to be concatenated. All must have size 1 in +* the first dimension and same shape.It's a dynamic input. \n *@par Attributes: -*equation: The subscripts for the Einstein summation. \n -*N: tensor size of input \n +* @li equation: The subscripts for the Einstein summation. \n +* @li N: tensor size of input. \n *@par Outputs: -*@li y: Sums the product of the elements of the input operands along dimensions specified - using a notation based on the Einstein summation convention. \n +*@li y: Sums the product of the elements of the input operands along +* dimensions specified +* using a notation based on the Einstein summation convention. \n *@attention Constraints: *Input N must be Int. \n @@ -1756,6 +1764,57 @@ REG_OP(MatrixDiagV3) .ATTR(align, String, "RIGHT_LEFT") .OP_END_FACTORY_REG(MatrixDiagV3) +/** +* @brief Function SwinAttentionScore. \n + +* @par Inputs: +* six inputs, including: +* @li query: A matrix Tensor. The type only support float16. +* @li key: A matrix Tensor. The type only support float16. +* @li value: A matrix Tensor. The type only support float16. +* @li padding_mask1: A matrix Tensor. The type only support float16. +* @li padding_mask2: A matrix Tensor. The type only support float16. +* @li scale: A scalar. The type only support float16. +* @li drop_mask: A matrix Tensor. The type only support uint8. \n + +* @par Attributes: +* @li keep_prob: A mutable Tensor. Must met all of the following rules: + shape of "keep_prob" should be (1,) or [1,]. +* @li query_transpose: A bool. If True, changes the shape of "query" from [K, M] to + [M, K]. +* @li key_transpose: A bool. If True, changes the shape of "key" from [N, K] to + [K, N]. +* @li bmm_score_transpose_a: A bool. If True, changes the shape of "mid_data" from [K, M] to + [M, K]. +* @li bmm_score_transpose_b: A bool. If True, changes the shape of "value" from [N, K] to + [K, N]. +* @li axes: A list of int. The dimension softmax would be performed on. Defaults + to "[]" . \n + +* @par Outputs: +* attention_score: The result matrix Tensor. The type only support float16. +* softmax: The result matrix Tensor. The type only support float16. + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(SwinAttentionScore) + .INPUT(query, TensorType({DT_FLOAT16})) + .INPUT(key, TensorType({DT_FLOAT16})) + .INPUT(value, TensorType({DT_FLOAT16})) + .INPUT(padding_mask1, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(padding_mask2, TensorType({DT_FLOAT16})) + .INPUT(scale, TensorType({DT_FLOAT16})) + .OPTIONAL_INPUT(drop_mask, TensorType({DT_INT8})) + .OUTPUT(attention_score, TensorType({DT_FLOAT16})) + .OUTPUT(softmax, TensorType({DT_FLOAT16})) + .ATTR(keep_prob, Float, 1.0) + .ATTR(query_transpose, Bool, false) + .ATTR(key_transpose, Bool, false) + .ATTR(bmm_score_transpose_a, Bool, false) + .ATTR(bmm_score_transpose_b, Bool, false) + .ATTR(softmax_axes, ListInt, {}) + .OP_END_FACTORY_REG(SwinAttentionScore) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h index c6aad6dc..96213764 100644 --- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h @@ -544,23 +544,30 @@ REG_OP(BNInference) .OP_END_FACTORY_REG(BNInference) /** -*@brief Performs batch normalization . \n +*@brief Performs batch normalization . *@par Inputs: *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW. -*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference. -*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference. -*@li scale: An optional tensor of type float16 or float32, no use -*@li offset: An optional tensor of type float16 or float32, no use +*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" +* Specifies the mean used for inference. +*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" +* Specifies the variance used for inference. +*@li scale: An optional tensor of type float16 or float32, no use. +*@li offset: An optional tensor of type float16 or float32, no use. \n + *@par Attributes: -*@li momentum: An optional float32 num, represents the mean and the variance's scale factor -*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001". +*@li momentum: An optional float32 num, represents the mean and +* the variance's scale factor. +*@li epsilon: An optional float32, specifying the small value +* added to variance to avoid dividing by zero. Defaults to "0.00001". *@li use_global_stats: mean inference mode , only can be "True". -*@li mode: An optional attr, not use +*@li mode: An optional attr, not use. \n + *@par Outputs: -*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x" +*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x". \n + *@par Restrictions: -*Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead. +* Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead. */ REG_OP(BNInferenceD) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index bb0770e6..4c55eac0 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -26,15 +26,14 @@ namespace ge { /** * @brief Computes the gradients of depthwise convolution with respect to -* the filter . \n - +* the filter. \n * @par Inputs: -* Three inputs include: \n +* Three inputs include: * @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C], -* support float16, float32, double -* @li filter_size: A 4D tensor of type int32, with shape [H, W, C, K] +* support float16. +* @li filter_size: A 4D tensor of type int32, int64, with shape [H, W, C, K] * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. -* Must be one of the following types: float16, float32, double . \n +* Must be one of the following types: float16. \n * @par Attributes: * @li strides: A required list or tuple. The stride of the sliding window @@ -49,7 +48,7 @@ namespace ge { * @li pads: A required list or tuple. Padding added to each dimension of the * input. * @li data_format: An optional string. Input data format, either "NHWC" or -* "NCHW" . \n +* "NCHW". \n * @par Outputs: * filter_grad: Gradient of the deep convolution relative to the filter with @@ -65,8 +64,9 @@ namespace ge { * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the * data is 5D with shape [N, C1, Ho, Wo, C0], * where C is the same as that of the feature map and C0 is 16.\n -* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 * -* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512 . \n +* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + +* (480 * stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf +* <= l0b_size/512. \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter. @@ -146,34 +146,34 @@ REG_OP(DepthwiseConv2DBackpropFilterD) /** * @brief Computes the gradients of depthwise convolution with respect to the -* input . \n - +* input. \n * @par Inputs: * Three inputs include: \n * @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C], -* support int32, int64 +* support int32, int64. * @li filter: 4D filter tensor with shape of [H, W, C, K], support float16. * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C]. * Must be one of the following types: float16 . \n * @par Attributes: -* @li strides: A required list or tuple of int32. The stride of the sliding window for -* height and width of input "x" of the convolution. +* @li strides: A required list or tuple of int32. The stride of the sliding +* window for height and width of input "x" of the convolution. * Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height, * stride_width, 1]. -* @li dilations: An optional list or tuple of int32. The dilation factor for each -* dimension of input "x". Defaults to "[1, 1, 1, 1]". +* @li dilations: An optional list or tuple of int32. The dilation factor for +* each dimension of input "x". Defaults to "[1, 1, 1, 1]". * If set to k > 1, there will be k-1 skipped cells between each filter element * on that dimension. Must be with shape [1, 1, dilation_height, dilation_width] * or [1, dilation_height, dilation_width, 1]. -* @li pads: A required list or tuple of int32. Padding added to each dimension of the -* input. +* @li pads: A required list or tuple of int32. Padding added to each dimension +* of the input. * @li data_format: An optional string. Input data format, either "NHWC" or * "NCHW". Defaults to "NHWC" . \n * @par Outputs: * input_grad: Gradient of the deep convolution relative to the input with shape -* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16 . \n +* [N, C, H, W] or [N, H, W, C] Must be one of the following types: +* float16, float32. \n * @attention Constraints:\n * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but @@ -331,13 +331,13 @@ REG_OP(DepthwiseConv2D) * For NCHW data format, the feature dimension is the third-to-last . \n *@par Inputs: -*x: A Tensor of type NumberType . \n +* x: A Tensor of type NumberType . \n *@par Attributes: -*data_format: Data format. Defaults to "NHWC" . \n +* data_format: Data format. Defaults to "NHWC" . \n *@par Outputs: -*y: A Tensor.Has the same type as "x" . \n +* y: A Tensor.Has the same type as "x" . \n *@par Third-party framework compatibility * Compatible with the TensorFlow operator BiasAddGrad. @@ -378,8 +378,8 @@ REG_OP(BiasAddGrad) | Format | NCHW | NCHW | NCHW |\n | | NHWC | HWCN | NHWC |\n *\n - * For float32 and float64 type, the actual calculation on the chip is based on - * float16. + * For float32 and float64 type, the actual calculation on the chip is based + * on float16. *\n * *@par Attributes: @@ -419,11 +419,11 @@ REG_OP(BiasAddGrad) | | W | [1, 255] |\n *\n - * In Ascend910, fmap or out_backprop's H and W not support 1 when + * In Ascend910, fmap or out_backprop's H and W not support 1 when\n * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 - * and filter_width > fmap_width - * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 - *\n + * and filter_width > fmap_width. + * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * + * stride_w < 4096. \n * *@par Outputs: * y: A Tensor. Has the same type as filter,and has same format as input_size. @@ -482,7 +482,7 @@ REG_OP(Conv2DBackpropInput) *@par Outputs: * y: A Tensor. Has the same type as filter,4-D tensor [batch, height, width, * channels] or [batch, channels, height, width]. -*@par Third-party framework compatibility +* @par Third-party framework compatibility * Compatible with Tensorflow's conv2d_backprop_input *@par Restrictions: * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead. @@ -503,7 +503,7 @@ REG_OP(Conv2DBackpropInputD) *@brief Computes the Deconvolution with respect to the input. *@par Inputs: * Two required inputs: - * @li x: A Tensor of type float16 or int8. 4D with shape + * @li x: A Tensor of type float16 or int8. 4D with shape * [batch, out_channels, out_height, out_width]. Gradients with respect * to the output of the convolution. * @li filter: A Tensor. Must have the same type as "x". @@ -511,7 +511,7 @@ REG_OP(Conv2DBackpropInputD) * Two optional inputs: * @li bias: An optional tensor. Must have the same type as "y". * @li offset_w: An optional 1D tensor for quantized deconvolution. - * Type is int8. Reserved.\n + * Type is int8. Reserved. *\n *\n * The following are the supported data types and data formats:\n @@ -535,9 +535,9 @@ REG_OP(Conv2DBackpropInputD) * @li dilations: A tuple or list of 4 integers. The dilation factor for each * dimension of input, defaults to [1,1,1,1]. * @li groups: Number of blocked connections from input channels to - output channels. Defaults to "1". + * output channels. Defaults to "1". * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n - Specify the data format of the input and output data. + * Specify the data format of the input and output data. * @li offset_x: An optional integer for quantized deconvolution. * The negative offset added to the input image for int8 type. Ensure offset_x * within the effective range of int8 [-128, 127]. Defaults to "0". @@ -564,10 +564,11 @@ REG_OP(Conv2DBackpropInputD) | | W | [1, 255] |\n | Offset_x | | [-128, 127] |\n *\n - * In Ascend910, fmap or out_backprop's H and W not support 1 when + * In Ascend910, fmap or out_backprop's H and W not support 1 when\n * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 * and filter_width > fmap_width - * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 + * If filter_h = 1 and filter_w = 1, + * out_backprop_w * stride_h * stride_w < 4096 *\n * *@par Outputs: @@ -603,7 +604,7 @@ REG_OP(Deconvolution) *@par Inputs: * Three inputs: * @li x: A Tensor. Must be one of the following types: float16, float32, - * float64.4-D with shape [batch, in_height, in_width, in_channels] or + * float64. 4-D with shape [batch, in_height, in_width, in_channels] or * [batch, in_channels, in_height, in_width]. * @li filter_size: A const Tensor of type int32. Currently does not support * data tensor. An integer vector representing the tensor shape of filter, @@ -627,8 +628,8 @@ REG_OP(Deconvolution) | Format | NCHW | NCHW | NCHW |\n | | NHWC | NHWC | HWCN |\n *\n - * For float32 and float64 type of x and outbackprop, the actual calculation on the chip - * is based on float16. + * For float32 and float64 type of x and outbackprop, the actual calculation + * on the chip is based on float16. *\n * *@par Attributes: @@ -651,21 +652,21 @@ REG_OP(Deconvolution) | Name | Field | Scope |\n |------------------|----------|--------------|\n | x(fmap) | H | [1, 200000] |\n - | | W | [1, 200000] |\n - | Filter Size | H | [1, 200000] |\n - | | W | [1, 200000] |\n + | | W | [1, 4096] |\n + | Filter Size | H | [1, 255] |\n + | | W | [1, 255] |\n | out_backprop | H | [1, 200000] |\n - | | W | [1, 200000] |\n + | | W | [1, 4096] |\n | y | H | [1, 200000] |\n - | | W | [1, 200000] |\n - | Stride | H | [1, 200000] |\n - | | W | [1, 200000] |\n - | Padding | Top | [0, 200000] |\n - | | Bottom | [0, 200000] |\n - | | Left | [0, 200000] |\n - | | Right | [0, 200000] |\n - | Dilation | H | [1, 200000] |\n - | | W | [1, 200000] |\n + | | W | [1, 4096] |\n + | Stride | H | [1, 63] |\n + | | W | [1, 63] |\n + | Padding | Top | [0, 255] |\n + | | Bottom | [0, 255] |\n + | | Left | [0, 255] |\n + | | Right | [0, 255] |\n + | Dilation | H | [1, 255] |\n + | | W | [1, 255] |\n *\n *@par Outputs: * y: A Tensor. Has the same type as x, has the same format as filter_size. @@ -743,16 +744,16 @@ REG_OP(Conv2DBackpropFilterD) .OP_END_FACTORY_REG(Conv2DBackpropFilterD) /** -*@brief Computes a 2D convolution given 4D "x" and "filter" tensors. -*@par Inputs: -*@li x: A 4D tensor of input image. With the format "NHWC", the data is stored +* @brief Computes a 2D convolution given 4D "x" and "filter" tensors. +* @par Inputs: +* @li x: A 4D tensor of input image. With the format "NHWC", the data is stored * in the order of: [batch, in_height, in_width, in_channels]. -*@li filter: A 4D tensor of learnable filters. Must have the same type as "x". +* @li filter: A 4D tensor of learnable filters. Must have the same type as "x". * With the format "HWCN" , the data is stored in the order of: [filter_height, * filter_width, in_channels / groups, out_channels]. -*@li bias: An optional 1D tensor of additive biases to the filter outputs. +* @li bias: An optional 1D tensor of additive biases to the filter outputs. * The data is stored in the order of: [out_channels]. -*@li offset_w: Reserved. +* @li offset_w: Reserved. *\n *\n * The following are the supported data types and data formats: @@ -770,22 +771,22 @@ REG_OP(Conv2DBackpropFilterD) * float16. *\n * -*@par Attributes: -*@li strides: Required. A list of 4 integers. The stride of the sliding window +* @par Attributes: +* @li strides: Required. A list of 4 integers. The stride of the sliding window * for each dimension of input. The dimension order is determined by the data * format of "x". The N and C dimensions must be set to 1. -*@li pads: Required. A list of 4 integers. The number of pixels to add to each +* @li pads: Required. A list of 4 integers. The number of pixels to add to each * (top, bottom, left, right) side of the input. -*@li dilations: Optional. A list of 4 integers. The dilation factor for each +* @li dilations: Optional. A list of 4 integers. The dilation factor for each * dimension of input. The dimension order is determined by the data format of * "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1]. -*@li groups: Optional. An integer of type int32. The number of blocked +* @li groups: Optional. An integer of type int32. The number of blocked * connections from input channels to output channels. In_channels and * out_channels must both be divisible by "groups". Defaults to 1. -*@li offset_x: Optional. An integer of type int32. The negative offset added +* @li offset_x: Optional. An integer of type int32. The negative offset added * to the input image for int8 type. Ensure that the output is within the * effective range. Defaults to 0. -*@li data_format: Reserved. +* @li data_format: Reserved. *\n *\n * The following value range restrictions must be met: @@ -825,10 +826,10 @@ REG_OP(Conv2DBackpropFilterD) * / stride_w + 1 *\n * -*@par Quantization supported or not -*@li Yes +* @par Quantization supported or not +* Yes * -*@par Third-party framework compatibility +* @par Third-party framework compatibility *@li Compatible with the TensorFlow operator "conv2d". *@li Compatible with the Caffe operator 2D "Convolution". */ @@ -847,14 +848,14 @@ REG_OP(Conv2D) .OP_END_FACTORY_REG(Conv2D) /** -*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors. -*@par Inputs: -*@li x: A 4D tensor of input images. -*@li filter_compress: A 4D tensor of compressed filter data blocks. -*@li compress_index: A 1D tensor of index for decompression. -*@li bias: An optional 1D tensor of additive biases to the filter outputs. +* @brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors. +* @par Inputs: +* @li x: A 4D tensor of input images. +* @li filter_compress: A 4D tensor of compressed filter data blocks. +* @li compress_index: A 1D tensor of index for decompression. +* @li bias: An optional 1D tensor of additive biases to the filter outputs. * The data is stored in the order of: [out_channels]. -*@li offset_w: Reserved. +* @li offset_w: Reserved. *\n *\n * The following are the supported data types and data formats: @@ -870,8 +871,8 @@ REG_OP(Conv2D) * float16. *\n * -*@par Attributes: -*@li strides: Required. A list of 4 integers. The stride of the sliding window +* @par Attributes: +* @li strides: Required. A list of 4 integers. The stride of the sliding window * for each dimension of input. The dimension order is determined by the data * format of "x". The N and C dimensions must be set to 1. *@li pads: Required. A list of 4 integers. The number of pixels to add to each @@ -1014,7 +1015,6 @@ REG_OP(DeformableConv2D) /** *@brief Computes a 3D convolution given 5D "x" and "filter" tensors. - *@par Inputs: * @li x: A 5D tensor. Must be one of the following types: float16, * (Currently does not support int8). The format of x is NCDHW or NDHWC. @@ -1025,16 +1025,16 @@ REG_OP(DeformableConv2D) * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n *@par Attributes: - * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window - * for each dimension of "x". + * @li strides: Required. A list of 5 integers. Specifies the stride of the + * sliding window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". * @li pads: Required. A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, * tail, top, bottom, left and right. - * @li dilations: Optional. A list of 5 integers. Specifies the dilation factor for each - * dimension of "x". - * @li groups: Optional. Number of blocked connections from input channels to output - * channels. + * @li dilations: Optional. A list of 5 integers. Specifies the dilation + * factor for each dimension of "x". + * @li groups: Optional. Number of blocked connections from input channels + * to output channels. * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * The N, C and D dimensions must be 1. Has the same format as "x". @@ -1068,10 +1068,9 @@ REG_OP(Conv3D) /** *@brief Computes the gradients of convolution 3d with respect to the input. - *@par Inputs: - * @li input_size: A Tensor of type int32, int64. An integer vector representing - * the shape of input, where input is a 5-D tensor + * @li input_size: A Tensor of type int32, int64. An integer vector + * representing the shape of input, where input is a 5-D tensor * [batch, depth, height, width, channels] or * [batch, channels, depth, height, width]. * @li filter: A Tensor. Must be one of the following types: float16, float32. @@ -1082,22 +1081,23 @@ REG_OP(Conv3D) * respect to the output of the convolution. \n *@par Attributes: - * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window - * for each dimension of "out_backprop". + * @li strides: Required. A list of 5 integers. Specifies the stride of the + * sliding window for each dimension of "out_backprop". * The N and C dimensions must be 1. Has the same format as "out_backprop". * @li pads: Required. A list of 6 integers. * Supports only padding along the D, H and W dimensions in sequence of head, * tail, top, bottom, left and right. - * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each - * dimension of the input. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor + * for each dimension of the input. * The N, C and D dimensions must be 1. Has the same format as "out_backprop". - * @li groups: Optional. Number of blocked connections from input channels to output - * channels. + * @li groups: Optional. Number of blocked connections from input channels + * to output channels. * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor. Has the same type as filter,and has same format as "input_size". \n + * y: A Tensor. Has the same type as filter,and has same format as + * "input_size". \n *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_input @@ -1207,10 +1207,9 @@ REG_OP(LSTM) /** *@brief Computes the gradients of convolution3D with respect to the filter - *@par Inputs: - * @li x: A Tensor. Must be one of the following types: float16, float32. - * Currently does not support double. + * @li x: A Tensor. Must be one of the following types: float16, float32, + * double. Currently does not support double. * 5-D with shape [batch, in_depth, in_height, in_width, in_channels] * or [batch, in_channels, in_depth, in_height, in_width]. * @li filter_size: A Tensor of type int32. An integer vector representing the @@ -1224,21 +1223,22 @@ REG_OP(LSTM) * Gradients with respect to the output of the convolution. \n *@par Attributes: - * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding - * window for each dimension of "x". The N and C dimensions must be 1. - * Has the same format as "x". - * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right] - * pads on feature map. - * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each - * dimension of input. + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride + * of the sliding window for each dimension of "x". The N and C dimensions + * must be 1. Has the same format as "x". + * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, + * left, right] pads on feature map. + * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor + * for each dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Optional. Number of blocked connections from input channels to output - * channels. + * @li groups: Optional. Number of blocked connections from input channels + * to output channels. * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. \n *@par Outputs: - * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW or DHWCN. \n + * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW + * or DHWCN. \n *@par Third-party framework compatibility * Compatible with Tensorflow's conv3d_backprop_filter @@ -1311,25 +1311,26 @@ REG_OP(Conv3DBackpropFilterD) *@brief Computes the transpose of convolution 3d with respect to the input. *@par Inputs: - * @li input_size: A Tensor of type int32. An integer vector representing the - * shape of input. + * @li input_size: A Tensor of type int32, int64. An integer vector + * representing the shape of input. * @li x: A Tensor of type float16, currently does not support int8. The format * is NDHWC or NCDHW. * @li filter: A Tensor of type float16, currently does not support int8. * The format is NDHWC, NCDHW or DHWCN. * @li bias: Optional. An optional 1D tensor of the same type as "x". Reserved. - * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution. Reserved. \n + * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution. + * Reserved. \n *@par Attributes: - * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding - * window for each dimension of "x". + * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of + * the sliding window for each dimension of "x". * The N and C dimensions must be 1. Has the same format as "x". * @li pads: Required. A tuple/list of 6 integers. * @li dilations: Optional. A tuple/list of 5 integers, * The dilation factor for each dimension of input. * The N, C and D dimensions must be 1. Has the same format as "x". - * @li groups: Optional. Number of blocked connections from input channels to output - * channels. + * @li groups: Optional. Number of blocked connections from input channels to + * output channels. * @li data_format: Optional. An string from: "NDHWC", "NCDHW". * Defaults to "NDHWC". Specify the data format of the input and output data. * @li output_padding: Optional. The size will be added in the output shape. @@ -1417,7 +1418,8 @@ REG_OP(Conv3DTransposeD) * 4-D with shape [filter_height, filter_width, in_channels, out_channels] * or [out_channels, filter_height, filter_width, in_channels] * or [out_channels, in_channel, filter_height, filter_width]. - * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND". + * @li bias: An optional 1D tensor of type float16, float32, int32. + * Format is "ND". * @li offset_w: An optional 1D tensor for quantized inference. Reserved. *\n *\n @@ -1446,8 +1448,8 @@ REG_OP(Conv3DTransposeD) * Defaults to "1". * @li dilations: A tuple/list of 4 integers, The dilation factor for each * dimension of input. Must be [1, 1, 1, 1]. - * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". - * Specify the data format of the input and output data. + * @li data_format: An optional string from: "NHWC", "NCHW". + * Defaults to "NHWC". Specify the data format of the input and output data. * @li output_padding: The size will be added in the output shape. Defaults * to [0, 0, 0, 0]. * @li offset_x: An optional int. Input offset, used for quantized inference. @@ -1478,15 +1480,15 @@ REG_OP(Conv3DTransposeD) | | W | [1, 255] |\n | Offset_x | | [-128, 127] |\n *\n - * In Ascend910, fmap or out_backprop's H and W not support 1 when + * In Ascend910, fmap or out_backprop's H and W not support 1 when\n * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1 - * and filter_width > fmap_width - * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096 - *\n + * and filter_width > fmap_width. + * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w + * < 4096. \n * *@par Outputs: - * y: A Tensor. A Tensor of type float16 or int32, and has same format as - * input_size. + * y: A Tensor. A Tensor of type float16, int32, float32, and has + * same format as input_size. *\n * out_backprop_height = (fmap_height + pad_top + pad_bottom - * (dilation_h * (filter_height - 1) + 1)) diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 523fb199..65411e2a 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -124,7 +124,7 @@ REG_OP(SoftmaxGrad) .OP_END_FACTORY_REG(SoftmaxGrad) /** -*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n +* @brief Computes the sigmoid cross entropy loss of "predict" and "target" . *@par Inputs: * Three inputs, including: @@ -146,7 +146,7 @@ REG_OP(SigmoidCrossEntropyWithLogitsGrad) .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGrad) /** -*@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n +* @brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . *@par Inputs: * Two inputs, including: @@ -194,7 +194,7 @@ REG_OP(SigmoidCrossEntropyWithLogitsV2) .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2) /** -*@brief Computes the regression box of the RPN. It is a FasterRCNN operator . \n +* @brief Computes the regression box of the RPN. It is a FasterRCNN operator . *@par Inputs: * Two inputs, including: @@ -221,7 +221,7 @@ REG_OP(SmoothL1Loss) .OP_END_FACTORY_REG(SmoothL1Loss) /** -*@brief Performs the backpropagation of SmoothL1Loss for training scenarios . \n +* @brief Performs the backpropagation of SmoothL1Loss for training scenarios . *@par Inputs: * Three inputs, including: @@ -796,6 +796,52 @@ REG_OP(LayerNormBetaGammaBackpropV2) .REQUIRED_ATTR(shape_gamma, ListInt) .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2) +/** +* @brief LNDropoutGrad operator interface implementation +* calculating: dy, x, variance, mean, gamma +* pd_xl = dy*gamma +* sub_x_mean = x - mean +* var_elta_2 = np.power((variance + EPSLON), (-0.5)) +* pd_var = sum(pd_xl * sub_x_mean, reduce_axis, keepdims=True) * var_elta_2 * var_elta_2 * var_elta_2 * (-0.5) +* pd_mean = sum(pd_xl, reduce_axis, keepdims=True) * var_elta_2 * (-1.0) +* pd_x = pd_xl * var_elta_2 + pd_var * (2.0 / m) * sub_x_mean + pd_mean * (1.0 / m) +* pd_x_dropout = pd_x * mask * (1 / keep_prob) +* pd_gamma = sum(dy * sub_x_mean * var_elta_2, param_axis, keepdims=True) +* pd_beta = sum(dy, param_axis, keepdims=True) + +* @par Inputs: +* Six inputs, including: +* @li dy: A Tensor. Must be one of the following types: float16, float32. +* @li x: A Tensor. Must be one of the following types: float16, float32. +* @li variance: A Tensor. Must be one of the following types: float16, float32. +* @li mean: A Tensor. Must be one of the following types: float16, float32. +* @li gamma: A Tensor. Must be one of the following types: float16, float32. +* @li mask: A Tensor. Must be one of the following types: uint8.\n + +* @par Outputs: +* Four outputs, including: +* @li pd_x: A Tensor. Must be one of the following types: float16, float32. +* @li pd_x_dropout: A Tensor. Must be one of the following types: float16, float32. +* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32. +* @li pd_beta: A Tensor. Must be one of the following types: float16, float32. + +* @par Restrictions: +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ +REG_OP(LNDropoutGrad) + .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(mask, TensorType({DT_UINT8})) + .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_x_dropout, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR(keep_prob, Float) + .OP_END_FACTORY_REG(LNDropoutGrad) + /** *@brief Return "output" according to the algorithm of dropout_do_mask: * scale_x = x *(1 / keep_prob) diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 83aa30d2..8c6987ca 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -112,7 +112,10 @@ REG_OP(FusedBatchNormV2) * @li input_data: A Tensor. Data to be sorted. Support float16 or float32. * @li input_index: A Tensor. Range(0, 2048). Support float16 or int32. * @par Attributes: - * k_num: Int.Number to be sorted. + * @li k_num: Int.Number to be sorted. + * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true. + * If "True", the "k" largest elements are returned in descending order. + * If "False", the "k" smallest elements are returned in ascending order. * @par Outputs: * One output, including: * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. @@ -124,6 +127,7 @@ REG_OP(SegmentSort) .INPUT(input_index, TensorType({DT_FLOAT16,DT_INT32})) .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT})) .REQUIRED_ATTR(k_num, Int) + .ATTR(largest, Bool, true) .OP_END_FACTORY_REG(SegmentSort) /** @@ -132,8 +136,11 @@ REG_OP(SegmentSort) * One input, including: * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 or float32 * @par Attributes: - * k_num: Int.Number to be sorted. - * include_index: Bool.include_index is false,output proposal. include_index is true, output data and index. + * @li k_num: Int.Number to be sorted. + * @li include_index: Bool.include_index is false,output proposal. include_index is true, output data and index. + * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true. + * If "True", the "k" largest elements are returned in descending order. + * If "False", the "k" smallest elements are returned in ascending order. * @par Outputs: * Two output, including: * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel. @@ -147,6 +154,7 @@ REG_OP(MultiMerge) .OUTPUT(output_index, TensorType({DT_INT32})) .REQUIRED_ATTR(k_num, Int) .ATTR(include_index, Bool, false) + .ATTR(largest, Bool, true) .OP_END_FACTORY_REG(MultiMerge) /** @@ -155,7 +163,10 @@ REG_OP(MultiMerge) * One input, including: * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 * @par Attributes: - * k_num: Int.Number to be sorted. + * @li k_num: Int.Number to be sorted. + * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true. + * If "True", the "k" largest elements are returned in descending order. + * If "False", the "k" smallest elements are returned in ascending order. * @par Outputs: * Two output, including: * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted. @@ -168,6 +179,7 @@ REG_OP(SingleMerge) .OUTPUT(output_data, TensorType({ DT_FLOAT16 })) .OUTPUT(output_index, TensorType({ DT_INT32 })) .REQUIRED_ATTR(k_num, Int) + .ATTR(largest, Bool, true) .OP_END_FACTORY_REG(SingleMerge) /** diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 4c6f7293..f34de163 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -75,33 +75,36 @@ REG_OP(Pooling) .OP_END_FACTORY_REG(Pooling) /** -*@brief Performs average pooling on the input . \n - +*@brief Performs average pooling on the input. \n *@par Inputs: -*x: A tensor of type float16, float32, double . \n +*x: A tensor of type float16, float32, double. \n *@par Attributes: -*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window, - * where N = C = 1, and H and W are positive integers within the range [1, 255]. -*@li strides: A required list of 4 ints, specifying the stride of the sliding window. - * The strides of the N and C dimensions are 1. - * The strides of the H and W dimensions are positive integers within the range [1, 63]. +*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) +* of the sliding window, where N = C = 1, and H and W are positive integers +* within the range [1, 255]. +* @li strides: A required list of 4 ints, specifying the stride of the +* sliding window. The strides of the N and C dimensions are 1. The strides of +* the H and W dimensions are positive integers within the range [1, 63]. *@li padding: A required string, specifying the padding algorithm, - * either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs. - * With "VALID" means no padding. -*@li data_format: An optional string, specifying the data format of "ksize" and "strides", - * either "NCHW", or "NHWC" (default) . \n + * either "VALID" or "SAME". With "SAME" means that the outputs will have the + * same spatial dimensions as its inputs. With "VALID" means no padding. +*@li data_format: An optional string, specifying the data format of "ksize" +* and "strides", either "NCHW", or "NHWC" (default). \n *@par Outputs: -*y: The average pooled output tensor. Has the same type and format as input "x" . \n +* y: The average pooled output tensor. Has the same type and format +* as input "x". \n -*@attention Constraints: -*@li This operator applies only to a TensorFlow network. -*@li Only single input and single output are supported. +* @attention Constraints: +* @li This operator applies only to a TensorFlow network. +* @li Only single input and single output are supported. *@li Global pooling is supported. -*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 +* @li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. +* ksize_H * ksize_W < 256 *@li Due to instruction restrictions, - * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. + * the values of "strides_h" and "strides_w" are positive integers within + * the range [1, 63]. *@par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool. */ @@ -116,7 +119,6 @@ REG_OP(AvgPool) /** *@brief Performs average pooling on the input. - *@par Inputs: *x: A tensor of type float16, float32, double. @@ -130,19 +132,19 @@ REG_OP(AvgPool) * either "VALID", "SAME" and "CALCULATED". * With "SAME" means that the outputs will have the same spatial dimensions as its inputs. * With "VALID" means no padding. -*@li pads: Pad value when padding_mode is "CALCULATED". -*@li data_format: An optional string, specifying the data format of "ksize" and "strides", +* @li pads: Pad value when padding_mode is "CALCULATED". +* @li data_format: An optional string, specifying the data format of "ksize" and "strides", * either "NCHW", or "NHWC" (default). -*@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w] -*@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". -*@li exclusive: Ignore padding area or not when calculating average. +* @li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w] +* @li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED". +* @li exclusive: Ignore padding area or not when calculating average. -*@par Outputs: -*y: The average pooled output tensor. Has the same type and format as input "x". +* @par Outputs: +* y: The average pooled output tensor. Has the same type and format as input "x". *@attention Constraints: *@li Only single input and single output are supported. -*@li Global pooling is supported. +* @li Global pooling is supported. *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256 *@li Due to instruction restrictions, * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63]. @@ -163,25 +165,30 @@ REG_OP(AvgPoolV2) .OP_END_FACTORY_REG(AvgPoolV2) /** -*@brief Performs average pooling on the input. - -*@par Inputs: -*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. +* @brief Performs average pooling on the input. \n +* @par Inputs: +* x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type +* float16, float32, double. \n -*@par Attributes: -*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. -*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. -*@li pads: List of ints, implicit zero paddings on both sides of the input. -*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. -*@li count_include_pad: When true, will include the zero-padding in the averaging calculation. -*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. -*@li data_format: A string, format of input data . \n +* @par Attributes: +* @li ksize: List of ints that has length 1, 3 or 5. The size of the window +* for each dimension of the input tensor. +*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding +* window for each dimension of the input tensor. +* @li pads: List of ints, implicit zero paddings on both sides of the input. +* @li ceil_mode: When true, will use ceil instead of floor in the formula to +* compute the output shape. +* @li count_include_pad: When true, will include the zero-padding in the +* averaging calculation. +* @li divisor_override: if specified, it will be used as divisor, otherwise +* size of the pooling region will be used. +* @li data_format: A string, format of input data. \n *@par Outputs: -*y: The average pooled output tensor . \n +*y: The average pooled output tensor. \n *@attention Constraints: -*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] +* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]. *@par Third-party framework compatibility * Compatible with the TensorFlow operator AvgPool3D. @@ -200,19 +207,18 @@ REG_OP(AvgPool3D) /** -*@brief Performs average pooling on the input. - -*@par Inputs: -*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. -*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. +* @brief Performs average pooling on the input. +* @par Inputs: +* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double. +* @li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout. *@li multiplier: An optional tensor of float16, float32, double. -*@par Attributes: +* @par Attributes: *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor. -*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. +* @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor. *@li pads: List of ints, implicit zero paddings on both sides of the input. *@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. -*@li count_include_pad: When true, will include the zero-padding in the averaging calculation. +* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. *@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. *@li data_format: A string, format of input data . \n @@ -240,26 +246,30 @@ REG_OP(AvgPool3DD) .OP_END_FACTORY_REG(AvgPool3DD) /** -* @brief Computes AvgPool3DGrad function. - +* @brief Computes AvgPool3DGrad function. \n * @par Inputs: * @li orig_input_shape: An NDHWC tensor of type int32. -* @li grads: An NDHWC tensor of type float16, float32, or double. +* @li grads: An NDHWC tensor of type float16, float32, or double. \n * @par Attributes: -* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor. -* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor. +* @li ksize: List of ints that has length 5. The size of the window for +* each dimension of the input tensor. +* @li strides:List of ints that has length 5. The stride of the sliding +* window for each dimension of the input tensor. * @li pads: List of ints, implicit zero paddings on both sides of the input. -* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. -* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. -* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. -* @li data_format: A string, format of input data. +* @li ceil_mode: When true, will use ceil instead of floor in the formula to +* compute the output shape. +* @li count_include_pad: When true, will include the zero-padding in the +* averaging calculation. +* @li divisor_override: if specified, it will be used as divisor, otherwise +* size of the pooling region will be used. +* @li data_format: A string, format of input data. \n * @par Outputs: -* @output: A mutable tensor with the same shape and type as "orig_input_shape". +* @li output: A mutable tensor with the same shape and type as "grads". * @attention Constraints: -* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63] +* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]. \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator AvgPoolGrad. @@ -280,21 +290,26 @@ REG_OP(AvgPool3DGrad) /** * @brief Performs average pooling on the input. - * @par Inputs: * @li grads: An NDHWC tensor of type float16. * @li filter: An optional tensor of type float16, fractal_z_3d layout. * @li multiplier: An optional tensor of float16. * @par Attributes: -* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor. -* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor. -* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor. +* @li orig_input_shape: List of ints that has length 5. +* The size of the window for each dimension of the input tensor. +* @li ksize: List of ints that has length 5. +* The size of the window for each dimension of the input tensor. +* @li strides:List of ints that has length 5. +* The stride of the sliding window for each dimension of the input tensor. * @li pads: List of ints, implicit zero paddings on both sides of the input. -* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape. -* @li count_include_pad: When true, will include the zero-padding in the averaging calculation. -* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used. -* @li data_format: A string, format of input data . \n +* @li ceil_mode: When true, will use ceil instead of floor +* in the formula to compute the output shape. +* @li count_include_pad: When true, will include the zero-padding +* in the averaging calculation. +* @li divisor_override: if specified, it will be used as divisor, +* otherwise size of the pooling region will be used. +* @li data_format: A string, format of input data. \n * @par Outputs: * output: The average pooled output tensor . \n @@ -364,9 +379,9 @@ REG_OP(MaxPoolExt2) /** *@brief Performs max pooling on the input . \n -*@par Inputs: +* @par Inputs: * One input: -*x: A Tensor. Supported type:float16, float32, double, int8, int16, +* x: A Tensor. Supported type:float16, float32, double, int8, int16, * int32, int64, uint8, uint16, qint8 *@par Attributes: @@ -406,10 +421,10 @@ REG_OP(MaxPool) .OP_END_FACTORY_REG(MaxPool) /** -*@brief Performs max 3d pooling on the input . \n +* @brief Performs max 3d pooling on the input . \n *@par Inputs: -*x: A Tensor. Supported type float16, float32, double . \n +* x: A Tensor. Supported type float16, float32, double . \n *@par Attributes: *@li ksize: A required list of int8, int16, int32, or int64 values, @@ -680,7 +695,7 @@ REG_OP(MaxPoolV2) .OP_END_FACTORY_REG(MaxPoolV2) /** -*@brief Performs max pooling on the input and outputs both max values and +* @brief Performs max pooling on the input and outputs both max values and * indices . \n *@par Inputs: @@ -702,7 +717,7 @@ REG_OP(MaxPoolV2) *@par Outputs: *@li y: A Tensor. Has the same type and format as input "x". *@li argmax: A Tensor. Has the same type and format as input "x". -*@attention Constraints: +* @attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, * ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, @@ -723,39 +738,39 @@ REG_OP(MaxPoolWithArgmax) .OP_END_FACTORY_REG(MaxPoolWithArgmax) /** -*@brief Performs the backpropagation of MaxPoolWithArgmax . \n +* @brief Performs the backpropagation of MaxPoolWithArgmax . \n -*@par Inputs: +* @par Inputs: * Three inputs, including: -*@li x: An 4d tensor. Supported type: float, double, int32, +* @li x: An 4d tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. * Must set the format, supported format list ["NCHW, NHWC"] -*@li grad: An 4d tensor. Supported type: float, double, int32, +* @li grad: An 4d tensor. Supported type: float, double, int32, * uint8, int16, int8, int64, uint16, half, uint32, uint64. * Must set the format, supported format list ["NCHW, NHWC"] *@li argmx: A tensor of type int32 or int64 . \n -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, * specifying the size of the window for each dimension of the input tensor. * No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, +* @li strides: A required list of int8, int16, int32, or int64 values, * specifying the stride of the sliding window for each dimension of * the input tensor. No default value. -*@li padding: A required string. No default value . \n +* @li padding: A required string. No default value . \n -*@par Outputs: +* @par Outputs: *y: A Tensor. Has the same type and format as input "x" . \n -*@attention Constraints: -*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, +* @attention Constraints: +* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, * ksize[1] * ksize[2] <= 255. -*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 -*@li "padding" is either "SAME" or "VALID". +* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1 +* @li "padding" is either "SAME" or "VALID". \n -*@see max_pool_with_argmax -*@par Third-party framework compatibility +* @see max_pool_with_argmax +* @par Third-party framework compatibility * Compatible with the TensorFlow operator MaxPoolGradWithArgmax. */ REG_OP(MaxPoolGradWithArgmax) @@ -769,23 +784,23 @@ REG_OP(MaxPoolGradWithArgmax) .OP_END_FACTORY_REG(MaxPoolGradWithArgmax) /** -*@brief Performs transform mask to argmax . \n +* @brief Performs transform mask to argmax . \n -*@par Inputs: +* @par Inputs: * Two inputs: -*@li x: A Tensor of type float16. -*@li mask: A Tensor of type uint16 . \n +* @li x: A Tensor of type float16. +* @li mask: A Tensor of type uint16 . \n -*@par Attributes: -*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. -*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. -*@li padding: A required string. No default value . -*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n +* @par Attributes: +* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value. +* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value. +* @li padding: A required string. No default value . +* @li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n -*@par Outputs: +* @par Outputs: *argmax: A Tensor of type int32 . \n -*@attention Constraints: +* @attention Constraints: *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255. *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1. *@li "padding" is either "SAME" or "VALID" . \n @@ -843,11 +858,10 @@ REG_OP(MaxPoolGradGradWithArgmax) .OP_END_FACTORY_REG(MaxPoolGradGradWithArgmax) /** -* @brief Computes avgpoograd function . \n - +* @brief Computes avgpoograd function. \n * @par Inputs: * @li orig_input_shape: An NHWC tensor of type int32. -* @li input_grad: An NHWC tensor of type float16, float32, or double . \n +* @li input_grad: An NHWC tensor of type float16, float32, or double. \n * @par Attributes: * @li ksize: A required tuple or list, specifying the size of the window for @@ -856,10 +870,10 @@ REG_OP(MaxPoolGradGradWithArgmax) * window for each dimension of the input tensor. * @li padding: A required string, specifying the type of * the padding algorithm to use. -* @li data_format: An optional string. Defaults to "NHWC" . \n +* @li data_format: An optional string. Defaults to "NHWC". \n * @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input" . \n +* out_grad: A mutable tensor with the same shape and type as "input_grad". \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator AvgPoolGrad. @@ -876,7 +890,6 @@ REG_OP(AvgPoolGrad) /** * @brief Computes gradients of average pooling function . \n - * @par Inputs: * @input_grad: An NHWC tensor of type float16. * @mean_matrix: Assist matrix, an NHWC tensor of type float16. @@ -911,11 +924,10 @@ REG_OP(AvgPoolGradD) .OP_END_FACTORY_REG(AvgPoolGradD) /** -* @brief Computes avgpoolv2grad function. - +* @brief Computes avgpoolv2grad function. \n * @par Inputs: * @li orig_input_shape: An NHWC tensor of type int32. -* @li input_grad: An NHWC tensor of type float16, float32, or double. +* @li input_grad: An NHWC tensor of type float16, float32, or double. \n * @par Attributes: * @li ksize: A required tuple or list, specifying the size of the window for @@ -924,15 +936,15 @@ REG_OP(AvgPoolGradD) * window for each dimension of the input tensor. * @li padding_mode: A required string, specifying the type of * the padding algorithm to use. -* @li global_pooling: Whether to use the global pooling. If global_pooling=true, -* ksize and pads will be ignored. Default False. -* @li ceil_mode: Whether to use the ceil function to calculate output height and -* width. Default False. +* @li global_pooling: Whether to use the global pooling. If global_pooling = +* true, ksize and pads will be ignored. Default False. +* @li ceil_mode: Whether to use the ceil function to calculate output height +* and width. Default False. * @li exclusive: Whether to exclude padding points. default is true. -* @li data_format: An optional string. Defaults to "NHWC". +* @li data_format: An optional string. Defaults to "NHWC". \n * @par Outputs: -* @out_grad: A mutable tensor with the same shape and type as "orig_input". +* @li out_grad: A mutable tensor with the same shape and type as "orig_input". \n * @par Third-party framework compatibility * @li Compatible with the TensorFlow operator AvgPoolGrad. @@ -952,7 +964,6 @@ REG_OP(AvgPoolV2Grad) .OP_END_FACTORY_REG(AvgPoolV2Grad) /** * @brief Computes gradients of averagev2 pooling function. - * @par Inputs: *input_grad: An NHWC tensor of type float16, float32, or double. @@ -1257,7 +1268,6 @@ REG_OP(MaxPool3DGrad) /** *@brief Performs AvgPool1D on the input . \n - *@par Inputs: *x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n @@ -1286,7 +1296,6 @@ REG_OP(AvgPool1D) /** *@brief Performs AvgPool1D on the input . \n - *@par Inputs: *x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h index 21ee90ab..32da707e 100644 --- a/third_party/fwkacllib/inc/ops/nn_training_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h @@ -2002,40 +2002,41 @@ REG_OP(ApplyAdadeltaD) .OP_END_FACTORY_REG(ApplyAdadeltaD) /** -* @brief Updates "var" according to the ApplyMomentum algorithm. -* accum = accum * momentum + x1 * x2 -* if use_nesterov is True: -* var -= x1 * x2 * lr + accum * momentum * lr -* else: -* var -= accum * lr +*@brief Updates "var" according to the ApplyMomentum algorithm. +* accum = accum * momentum + x1 * x2 +* if use_nesterov is True: +* var -= x1 * x2 * lr + accum * momentum * lr +* else: var -= accum * lr * -* @par Inputs: -* Six inputs, including: -* @li var: A mutable Tensor has type TensorType::NumberType(). -* Should be a Variable Tensor. -* @li accum: A mutable Tensor has the same type as "var". -* Should be a Variable Tensor. -* @li lr: A scalar has the same type as "var", for the scaling factor. -* @li x1: A Tensor has type TensorType::NumberType(). -* @li momentum: A scalar has the same type as "var". -* @li x2: A scalar has the same type as "var". +*@par Inputs: +* Six inputs, including: +*@li var: A mutable Tensor has type TensorType::NumberType(). +* Should be a Variable Tensor. +*@li accum: A mutable Tensor has the same type as "var". +* Should be a Variable Tensor. +*@li lr: A scalar has the same type as "var", for the scaling factor. +*@li x1: A Tensor has type TensorType::NumberType(). +*@li momentum: A scalar has the same type as "var". +*@li x2: A scalar has the same type as "var". \n * -* @par Attributes: -* Two attributes, including: -* @li use_nesterov: An optional bool. Defaults to "False". -* If True, the tensor passed to compute grad will be var - lr * momentum * accum, -* so in the end, the var you get is actually var - lr * momentum * accum. -* @li use_locking: An optional bool. Defaults to "False". -* If "True", updating of the "var", m", and "v" tensors will be protected -* by a lock; otherwise the behavior is undefined, but may exhibit less contention. +*@par Attributes: +* Two attributes, including: +*@li use_nesterov: An optional bool. Defaults to "False". +* If True, the tensor passed to compute grad will be +* var - lr * momentum * accum, so in the end, +* the var you get is actually var - lr * momentum * accum. +*@li use_locking: An optional bool. Defaults to "False". +* If "True", updating of the "var", m", and "v" tensors will be protected +* by a lock; otherwise the behavior is undefined, but may exhibit +* less contention. \n * -* @par Outputs: -* Two outputs, including: -* @li var: A mutable Tensor has the same type as "var". -* @li accum: A mutable Tensor has the same type as "var". +*@par Outputs: +* Two outputs, including: +*@li var: A mutable Tensor has the same type as "var". +*@li accum: A mutable Tensor has the same type as "var". \n *@par Restrictions: -*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. */ REG_OP(FusedMulApplyMomentum) .INPUT(var, TensorType::NumberType()) diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index a582d6e2..e4d7936c 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -26,16 +26,17 @@ namespace ge { /** *@brief The GELU activation function is x*Φ(x), -* where Φ(x) the standard Gaussian cumulative distribution function. \n +* where Φ(x) the standard Gaussian cumulative distribution function. *@par Inputs: -*One input, including: -*x: A Tensor. Must be one of the following types: float16, float32 +*One input, including: \n +*x: A Tensor. Must be one of the following types: float16, float32. \n *@par Outputs: -*y: A Tensor. Has the same type as "x". -*@par Third-party framework compatibility -*Compatible with the TensorFlow operator Gelu +*y: A Tensor. Has the same type as "x". \n + +*@par Third-party framework compatibility: +* Compatible with the TensorFlow operator Gelu. */ REG_OP(Gelu) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -124,18 +125,18 @@ REG_OP(SwishGrad) .OP_END_FACTORY_REG(SwishGrad) /** -*@brief Computes the gradient for the gelu of "x" . \n +*@brief Computes the gradient for the gelu of "x" . *@par Inputs: -*Three inputs, including: -* @li dy: A Tensor. Must be one of the following types: float16, float32 -* @li x: A Tensor of the same type as "dy". -* @li y: A Tensor of the same type as "dy" . \n +* Three inputs, including: +*@li dy: A Tensor. Must be one of the following types: float16, float32. +*@li x: A Tensor of the same type as "dy". +*@li y: A Tensor of the same type as "dy" . \n *@par Outputs: *z: A Tensor. Has the same type as "dy". *@par Third-party framework compatibility -*Compatible with the TensorFlow operator GeluGrad +* Compatible with the TensorFlow operator GeluGrad. */ REG_OP(GeluGrad) .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT})) @@ -419,7 +420,7 @@ REG_OP(Softplus) .OP_END_FACTORY_REG(Softplus) /** -*@brief Computes softplus gradients for a softplus operation . \n +* @brief Computes softplus gradients for a softplus operation . *@par Inputs: *Two inputs: @@ -440,7 +441,7 @@ REG_OP(SoftplusGrad) .OP_END_FACTORY_REG(SoftplusGrad) /** -*@brief Computes softsign: x/(abs(x) + 1) . \n +* @brief Computes softsign: x/(abs(x) + 1) . *@par Inputs: * One input: @@ -798,7 +799,7 @@ REG_OP(LeakyReluGrad) .OP_END_FACTORY_REG(LeakyReluGrad) /** -*@brief Thresholds grad each element of the input Tensor . \n +*@brief Thresholds grad each element of the input Tensor . *@par Inputs: * @li gradients: A Tensor shape and dtype of input gradients. Support float16, int32. @@ -821,7 +822,7 @@ REG_OP(ThresholdGradV2D) .OP_END_FACTORY_REG(ThresholdGradV2D) /** -*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . \n +*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . *@par Inputs: *x: A Tensor dtype of real number . \n diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h index 5a66b4ee..ae701295 100644 --- a/third_party/fwkacllib/inc/ops/quantize_ops.h +++ b/third_party/fwkacllib/inc/ops/quantize_ops.h @@ -93,18 +93,18 @@ REG_OP(Quantize) * @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False". * Defaults to "False". * @li round_mode: An optional string, specifying the float16 to int8 cast type. -* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" . +* The value range is [Round, Floor, Ceil, Trunc]. Defaults to "Round" . * @li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n * @par Outputs: * y: The quantized output tensor of type int8 or int4. \n * @attention Constraints: -* round_mode value range is [Round, Floor, Ceil, Truncate]. +* round_mode value range is [Round, Floor, Ceil, Trunc]. * @li Round: round to nearest, tie to even(c language rint). * @li Floor: round to minus infinity(c language floor). * @li Ceil: round to positive infinity(c language ceil). -* @li Truncate: round to zero(c language trunc). \n +* @li Trunc: round to zero(c language trunc). \n * @par Third-party framework compatibility * It is a custom operator. It has no corresponding operator in Caffe. diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h index 2c4b3059..079982db 100644 --- a/third_party/fwkacllib/inc/ops/reduce_ops.h +++ b/third_party/fwkacllib/inc/ops/reduce_ops.h @@ -25,7 +25,7 @@ namespace ge { /** -*@brief Performs reduced batch normalization . \n +*@brief Performs reduced batch normalization . *@par Inputs: *x: A tensor of type float16 or float32. \n @@ -67,7 +67,7 @@ REG_OP(BN3DTrainingReduce) .OP_END_FACTORY_REG(BN3DTrainingReduce) /** -*@brief Performs the backpropagation of BatchNorm . \n +*@brief Performs the backpropagation of BatchNorm . *@par Inputs: * Seven inputs, including: @@ -153,7 +153,7 @@ REG_OP(BN3DTrainingReduceGrad) .OP_END_FACTORY_REG(BN3DTrainingReduceGrad) /** -*@brief Performs reduced batch normalization . \n +*@brief Performs reduced batch normalization . *@par Inputs: * Seven inputs, including: @@ -183,10 +183,10 @@ REG_OP(BN3DTrainingReduceGrad) *@attention Constraints: *@li This operator is a BatchNorm fusion operator for updating the moving -averages for training. -*This operator is used in conjunction with BNTrainingUpdate. -*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square -* root instruction. +* averages for training. This operator is used in conjunction with +* BNTrainingUpdate. +*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the +* square root instruction. */ REG_OP(BNTrainingUpdate) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -259,7 +259,7 @@ REG_OP(BN3DTrainingUpdate) .OP_END_FACTORY_REG(BN3DTrainingUpdate) /** -*@brief Performs batch normalization for inference . \n +*@brief Performs batch normalization for inference . *@par Inputs: * Five inputs, including: @@ -277,8 +277,8 @@ REG_OP(BN3DTrainingUpdate) *y: A tensor of type float16 or float32 for the normalized "x" . \n *@attention Constraints: -*For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root -* instruction. +*For Ascend 310, the result accuracy fails to reach 1/1000 due to the +* square root instruction. */ REG_OP(BNInfer) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -291,19 +291,21 @@ REG_OP(BNInfer) .OP_END_FACTORY_REG(BNInfer) /** -*@brief Performs reduced batch normalization. For some scene which don't contain -assignmoving average . \n +*@brief Performs reduced batch normalization. For some scenes which don't +* contain assign moving average . *@par Inputs: *Five inputs, including: *@li x: A tensor of type float16 or float32. *@li sum: A tensor of type float32 for the output of operator BNTrainingReduce. -*@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce. +*@li square_sum: A tensor of type float32 for the output of operator +* BNTrainingReduce. *@li scale: A tensor of type float32, for the scaling factor. *@li offset: A tensor of type float32, for the scaling offset . \n *@par Attributes: -*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n +*epsilon: A required float32, specifying the small value added to +* variance to avoid dividing by zero . \n *@par Outputs: *Three outputs, including: @@ -313,7 +315,8 @@ assignmoving average . \n *@attention Constraints: *This operator is used in conjunction with BNTrainingReduce. -For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction. +*For Ascend 310, the result accuracy fails to reach 1/1000 due to +* the square root instruction. */ REG_OP(BNTrainingUpdateV2) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -328,30 +331,35 @@ REG_OP(BNTrainingUpdateV2) .OP_END_FACTORY_REG(BNTrainingUpdateV2) /** -*@brief Performs reduced batch normalization v3. For some scene which don't contain -assign moving average . \n +*@brief Performs reduced batch normalization v3. For some scenes which +* don't contain assign moving average . *@par Inputs: * Five inputs, including: *@li x: A tensor of type float16 or float32. *@li sum: A tensor of type float32 for the output of operator BNTrainingReduce. -*@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce. +*@li square_sum: A tensor of type float32 for the output of operator +* BNTrainingReduce. *@li scale: A tensor of type float32, for the scaling factor. *@li offset: A tensor of type float32, for the scaling offset . \n *@par Attributes: -*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n +*epsilon: A required float32, specifying the small value added to variance +* to avoid dividing by zero . \n *@par Outputs: *@li y: A tensor of type float16 or float32, for normalized "x". *@li batch_mean: A tensor of type float32, for the mean of "x". *@li batch_variance: A tensor of type float32, for the variance of "x". -*@li reserve_1: A tensor of type float32, for the mean of batch "x". Has the same type as batch_mean. -*@li reserve_2: A tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n +*@li reserve_1: A tensor of type float32, for the mean of batch "x". +* Has the same type as batch_mean. +*@li reserve_2: A tensor of type float32, for the variance of batch "x". +* Has the same type as batch_mean . \n *@attention Constraints: *@li This operator is used in conjunction with BNTrainingReduce. -*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction. +*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to +* the square root instruction. */ REG_OP(BNTrainingUpdateV3) .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) @@ -368,7 +376,7 @@ REG_OP(BNTrainingUpdateV3) .OP_END_FACTORY_REG(BNTrainingUpdateV3) /** -*@brief Performs the backpropagation of BatchNorm . \n +*@brief Performs the backpropagation of BatchNorm . *@par Inputs: * Four inputs, including: @@ -436,16 +444,17 @@ REG_OP(BN3DTrainingUpdateGrad) .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad) /** -*@brief Performs the backpropagation of BatchNorm for inference . \n +*@brief Performs the backpropagation of BatchNorm for inference . *@par Inputs: * Three inputs, including: -*@li grads: A tensor of type loat16 or float32, for the gradient. +*@li grads: A tensor of type float16 or float32, for the gradient. *@li scale: A tensor of type float32. *@li batch_variance: A tensor of type float32. It is an output of BatchNorm . \n *@par Attributes: -*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n +*epsilon: An optional float32. Defaults to "0.0001". A small float number +* added to the variance of "x" . \n *@par Outputs: *x_backprop: A Tensor of type float16 or float32, for the offset of "x" . \n diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 5222bf7d..810d024b 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -2029,26 +2029,29 @@ REG_OP(Cummax) /** *@brief Extends the input with copies of data along a specified dimension. For example: -*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); -*(2) axis = 1; -*(3) tiles = 2; -*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], with shape (2, 6, 2) . \n +*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); \n +*(2) axis = 1; \n +*(3) tiles = 2; \n +*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], +* [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], +* with shape (2, 6, 2) . \n *@par Inputs: * One input: *input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n -*@par Attributes: -*@li axis: An optional int32, specifying the axis to tile. Defaults to 1. -*@li tiles: A required int32, specifying the number of copies (tiles) to output . \n +* @par Attributes: +* @li axis: An optional int32, specifying the axis to tile. Defaults to 1. +* @li tiles: A required int32, specifying the number of copies (tiles) to output . \n *@par Outputs: -*output_y: A Tensor of any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n +* output_y: A Tensor of any format. Must be one of the following types: +* float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n -*@attention Constraints: -*@li "axis" must be within the rank of the input tensor. -*@li "tiles" must be greater than 1. -*@par Third-party framework compatibility +* @attention Constraints: +* @li "axis" must be within the rank of the input tensor. +* @li "tiles" must be greater than 1. +* @par Third-party framework compatibility * Compatible with the Caffe operator Tile. */ REG_OP(TileWithAxis) @@ -2061,17 +2064,17 @@ REG_OP(TileWithAxis) .OP_END_FACTORY_REG(TileWithAxis) /** -*@brief Read data with offset and stride . \n +* @brief Read data with offset and stride . -*@par Inputs: -*One input: -*x: A Tensor. Must be one of the following types: float16, int8 . \n +* @par Inputs: +* One input: +* x: A Tensor. Must be one of the following types: float16, int8 . \n -*@par Attributes: -*@li stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n +* @par Attributes: +* stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n -*@par Outputs: -*y: A Tensor of the same type as "x". +* @par Outputs: +* y: A Tensor of the same type as "x". *@par Restrictions: *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. @@ -2083,10 +2086,10 @@ REG_OP(ReadSelect) .OP_END_FACTORY_REG(ReadSelect) /** -*@brief: Write data with offset . \n +* @brief: Write data with offset . -*@par Inputs: -*x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n +* @par Inputs: +* x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -2100,14 +2103,14 @@ REG_OP(WriteSelect) .OP_END_FACTORY_REG(WriteSelect) /** -*@brief Read data by stride. +* @brief Read data by stride. -*@par Inputs: -*x: A Tensor. Must be one of the following types: float16, int8. \n +* @par Inputs: +* x: A Tensor. Must be one of the following types: float16, int8. \n -*@par Attributes: -*@li axis: A required int32, specifying the index of axis to read by stride. \n -*@li stride: A required int32, specifying the value of reading stride. \n +* @par Attributes: +* @li axis: A required int32, specifying the index of axis to read by stride. +* @li stride: A required int32, specifying the value of reading stride. \n *@par Outputs: *y: A Tensor of the same type as "x". @@ -2120,14 +2123,14 @@ REG_OP(StridedRead) .OP_END_FACTORY_REG(StridedRead) /** -*@brief Write data by stride. +* @brief Write data by stride. -*@par Inputs: -*x: A Tensor. Must be one of the following types: float16, int8. \n +* @par Inputs: +* x: A Tensor. Must be one of the following types: float16, int8. \n -*@par Attributes: -*@li axis: A required int32, specifying the index of axis to write by stride. \n -*@li stride: A required int32, specifying the value of writing stride. \n +* @par Attributes: +* @li axis: A required int32, specifying the index of axis to write by stride. +* @li stride: A required int32, specifying the value of writing stride. \n *@par Outputs: *y: A Tensor. Has the same type as "x". @@ -2140,20 +2143,20 @@ REG_OP(StridedWrite) .OP_END_FACTORY_REG(StridedWrite) /** -*@brief Computes the cumulative log sum exp of the tensor "x" along "axis" . \n +* @brief Computes the cumulative log sum exp of the tensor "x" along "axis" . -*@par Inputs: +* @par Inputs: * Two inputs, including: -*@li x: A Tensor. Must be one of the following types: float32, float16. -*@li axis A Tensor of type int32 or int16. Defaults to "0". +* @li x: A Tensor. Must be one of the following types: float32, float16. +* @li axis A Tensor of type int32 or int16. Defaults to "0". * *@par Attributes: *@li exclusive: If "False", performs inclusive CumulativeLogsumexp, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive CumulativeLogsumexp. *@li reverse: A bool. Defaults to "False". * -*@par Outputs: -*@li y: A Tensor. Has the same type as "x". -*@par Third-party framework compatibility +* @par Outputs: +* y: A Tensor. Has the same type as "x". +* @par Third-party framework compatibility * Compatible with the TensorFlow operator Cumsum. */ REG_OP(CumulativeLogsumexp) @@ -2169,7 +2172,7 @@ REG_OP(CumulativeLogsumexp) * *@par Inputs: * One input: -*x: A Tensor. Must be one of the following types: float32, float16. +* x: A Tensor. Must be one of the following types: float32, float16. * *@par Attributes: *@li axis A Tensor of type int32 or int16. Defaults to "0". @@ -2224,15 +2227,16 @@ REG_OP(InplaceIndexAdd) /** * @brief Replace the value of X with value according to mask. + * @par Inputs: -* three inputs, including: -* @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8. -* @li mask: A Tensor of dtype bool. -* @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. +* Three inputs, including: +* @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8. +* @li mask: A Tensor of dtype bool. +* @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. \n * @par Outputs: -* y: A tensor. Must be one of the following dtypes: -* float16, float32, int64, int32, int8. +* y: A tensor. Must be one of the following dtypes: +* float16, float32, int64, int32, int8. */ REG_OP(MaskedFill) .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64})) @@ -2362,25 +2366,25 @@ REG_OP(StridedSliceV2) .OP_END_FACTORY_REG(StridedSliceV2) /** -*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n +* @brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. -*@par Inputs: -*Three inputs, including: +* @par Inputs: +* Three inputs, including: * @li x: A tensor. Must be one of the following types: * float16, float32, int32. \n -*@li assist1: A tensor. Must be one of the following types: +* @li assist1: A tensor. Must be one of the following types: * float16, float32, int32. \n -*@li assist2: A tensor. Must be one of the following types: +* @li assist2: A tensor. Must be one of the following types: * float16, float32, int32. \n * @par Attributes: * dim: A required int. Used to select the dimension of this tensor. \n -*@par Outputs: -*y: A Tensor with the same type and shape of input_x's. \n +* @par Outputs: +* y: A Tensor with the same type and shape of input_x's. \n -*@par Third-party framework compatibility -*Compatible with the Pytorch operator IndexFill. \n +* @par Third-party framework compatibility +* Compatible with the Pytorch operator IndexFill. \n */ REG_OP(IndexFillD) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) @@ -2417,27 +2421,27 @@ REG_OP(AddRowRanges) .OP_END_FACTORY_REG(AddRowRanges) /** -*@brief masked fill tensor along with one axis by range. +* @brief masked fill tensor along with one axis by range. * boxes. It is a customized masked fill range operator . \n -*@par Inputs: +* @par Inputs: * Four inputs, including: -*@li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes +* @li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes * 1-D (D,), 2-D(N, D), 3-D(N, C, D) -*@li start: masked fill start pos. A 3D Tensor of int32 with +* @li start: masked fill start pos. A 3D Tensor of int32 with * shape (num, N). "num" indicates the number of loop masked fill, and the value N -* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1. \n -*@li end: masked fill end pos. A 3D Tensor of int32 with +* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1. +* @li end: masked fill end pos. A 3D Tensor of int32 with * shape (num, N). "num" indicates the number of loop masked fill, and the value N -* indicates the batch of ND Tensor. \n -*@li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with -* shape (num,). "num" indicates the number of loop masked fill +* indicates the batch of ND Tensor. +* @li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with +* shape (num,). "num" indicates the number of loop masked fill. \n -*@par Attributes: -*@li axis: axis with masked fill of int32. Defaults to -1. +* @par Attributes: +* @li axis: axis with masked fill of int32. Defaults to -1. -*@par Outputs: -*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) +* @par Outputs: +* y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D) *@attention Constraints: * Warning: input shape's length must not be bigger than 1024 * 1024 * 1024. @@ -2456,10 +2460,12 @@ REG_OP(MaskedFillRange) * * @par Inputs: * Six inputs, including: -* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. +* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. +* Must be one of the following types: float32, float16. * @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance. * @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance. -* @li pq_distance: A Tensor of type float32 or float16, the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance. +* @li pq_distance: A Tensor of type float32 or float16, +* the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance. * @li pq_index: A Tensor of type int32, index corresponding to pq_distance. * @li pq_ivf: A scalar of type int32 , the bucket number corresponding to pq_distance. \n *