From ec3d7db8441df09cd0311b9800e60852543dbfc0 Mon Sep 17 00:00:00 2001
From: majorzhang <zhangzhenghai@huawei.com>
Date: Tue, 28 Jun 2022 15:55:25 +0800
Subject: [PATCH] upgrade Ascend package 28 Jun 22

---
 inc/external/acl/acl_op_compiler.h            |  28 ++
 metadef                                       |   2 +-
 third_party/fwkacllib/inc/ops/array_ops.h     |  33 +++
 third_party/fwkacllib/inc/ops/cluster.h       |  14 +-
 .../inc/ops/elewise_calculation_ops.h         | 228 ++++++++--------
 .../inc/ops/matrix_calculation_ops.h          | 245 ++++++++++-------
 .../fwkacllib/inc/ops/nn_batch_norm_ops.h     |  27 +-
 .../fwkacllib/inc/ops/nn_calculation_ops.h    | 246 +++++++++---------
 third_party/fwkacllib/inc/ops/nn_norm_ops.h   |  54 +++-
 third_party/fwkacllib/inc/ops/nn_ops.h        |  20 +-
 .../fwkacllib/inc/ops/nn_pooling_ops.h        | 245 ++++++++---------
 .../fwkacllib/inc/ops/nn_training_ops.h       |  59 ++---
 .../fwkacllib/inc/ops/nonlinear_fuc_ops.h     |  33 +--
 third_party/fwkacllib/inc/ops/quantize_ops.h  |   6 +-
 third_party/fwkacllib/inc/ops/reduce_ops.h    |  61 +++--
 third_party/fwkacllib/inc/ops/selection_ops.h | 152 +++++------
 16 files changed, 842 insertions(+), 611 deletions(-)

diff --git a/inc/external/acl/acl_op_compiler.h b/inc/external/acl/acl_op_compiler.h
index 9de0ee85..a0a3f786 100644
--- a/inc/external/acl/acl_op_compiler.h
+++ b/inc/external/acl/acl_op_compiler.h
@@ -92,6 +92,34 @@ ACL_FUNC_VISIBILITY aclError aclopCompileAndExecute(
     int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
     aclopEngineType engineType, aclopCompileType compileFlag, const char *opPath, aclrtStream stream);
 
+/**
+ * @ingroup AscendCL
+ * @brief compile and execute op
+ *
+ * @param opType [IN]           op type
+ * @param numInputs [IN]        number of inputs
+ * @param inputDesc [IN]        pointer to array of input tensor descriptions
+ * @param inputs [IN]           pointer to array of input buffers
+ * @param numOutputs [IN]       number of outputs
+ * @param outputDesc [IN|OUT]   pointer to array of output tensor descriptions
+ * @param outputs [IN]          pointer to array of outputs buffers
+ * @param attr [IN]             pointer to instance of aclopAttr.
+ *                              may pass nullptr if the op has no attribute
+ * @param engineType [IN]       engine type
+ * @param compileFlag [IN]      compile flag
+ * @param opPath [IN]           path of op
+ * @param stream [IN]           stream handle
+ *
+ * @retval ACL_SUCCESS The function is successfully executed.
+ * @retval OtherValues Failure
+ */
+ACL_FUNC_VISIBILITY aclError aclopCompileAndExecuteV2(const char *opType, int numInputs, aclTensorDesc *inputDesc[],
+                                                      aclDataBuffer *inputs[], int numOutputs,
+                                                      aclTensorDesc *outputDesc[], aclDataBuffer *outputs[],
+                                                      aclopAttr *attr, aclopEngineType engineType,
+                                                      aclopCompileType compileFlag, const char *opPath,
+                                                      aclrtStream stream);
+
 /**
  * @ingroup AscendCL
  * @brief set compile option
diff --git a/metadef b/metadef
index 2d98a178..175dce71 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 2d98a17884e656a2446239cdb9cee79543cb0161
+Subproject commit 175dce710e744666c6204540857634f362aafd61
diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h
index 924f98e4..17ab4322 100644
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -1583,6 +1583,39 @@ REG_OP(UniqueConsecutive)
     .ATTR(return_counts, Bool, false)
     .ATTR(axis, Int, 1000)
     .OP_END_FACTORY_REG(UniqueConsecutive)
+
+/**
+* @brief Decodes a variant Tensor into a RaggedTensor. \n
+*
+* @par Input:  
+* @li encoded_ragged:  A Tensor of type variant. A variant Tensor containing encoded RaggedTensors. \n
+*
+* @par Outputs:
+* @li output_nested_splits: A list of output_ragged_rank Tensor objects with type int32 or int64.
+* @li output_dense_values:  A Tensor, which must be one of the following types:
+*               double, float32, float16, int8, uint8, int16, uint16, int32, uint32, int64, uint64, bool. \n
+*
+* @par Attributes:
+* @li input_ragged_rank: An int that is >= -1. The ragged rank of each encoded RaggedTensor component in the input. 
+*         If set to -1, this is inferred as output_n - rank(encoded_ragged).
+* @li output_ragged_rank: An int that is >= 0. The expected ragged rank of the output RaggedTensor. 
+*          The following must hold: output_n = rank(encoded_ragged) + input_n.
+* @li Tvalues: The data type of output_dense_values.
+* @li Tsplits: The data type of output_nested_splits. An optional DType of "int32, int64". Defaults to `int64`. \n
+*
+* @par Third-party framework compatibility.
+* Compatible with tensorflow RaggedTensorFromVariant operator.
+*/
+REG_OP(RaggedTensorFromVariant)
+    .INPUT(encoded_ragged, TensorType({DT_VARIANT}))
+    .DYNAMIC_OUTPUT(output_nested_splits, TensorType({DT_INT32, DT_INT64}))
+    .OUTPUT(output_dense_values, TensorType::BasicType())
+    .REQUIRED_ATTR(input_ragged_rank, Int)
+    .REQUIRED_ATTR(output_ragged_rank, Int)
+    .REQUIRED_ATTR(Tvalues, Type)
+    .ATTR(Tsplits, Type, DT_INT64)
+    .OP_END_FACTORY_REG(RaggedTensorFromVariant)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/cluster.h b/third_party/fwkacllib/inc/ops/cluster.h
index 19b4ea05..6e41e569 100644
--- a/third_party/fwkacllib/inc/ops/cluster.h
+++ b/third_party/fwkacllib/inc/ops/cluster.h
@@ -29,19 +29,19 @@ namespace ge {
 * @brief Perform k-means clustering on a data matrix. \n
 
 * @par Inputs:
-* Three required inputs and one optional inputs, including: \n
-* @li x: A 2D tensor of data type float32. \n
-* @li y: A 2D tensor of data type float32. \n
-* @li sum_square_x: An optional 2D tensor of data type float32. \n
+* Three required inputs and one optional inputs, including:
+* @li x: A 2D tensor of data type float32. 
+* @li y: A 2D tensor of data type float32. 
+* @li sum_square_x: An optional 2D tensor of data type float32. 
 * @li sum_square_y: A 2D tensor of data type float32. \n
 
 * @par Attributes:
 * use_actual_distance: Indicates whether to calculate the complete distance. \n
 
 * @par Outputs:
-* @li segment_sum: A tensor of data type float32. \n
-* @li segment_count: A tensor of data type float32. \n
-* @li k_mean_total_sum: A tensor of data type float32. \n
+* @li segment_sum: A tensor of data type float32. 
+* @li segment_count: A tensor of data type float32. 
+* @li k_mean_total_sum: A tensor of data type float32. 
 */
 REG_OP(KMeansCentroids)
     .INPUT(x, TensorType({DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index 58650670..29cfa4f5 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -48,29 +48,29 @@ REG_OP(AddN)
     .OP_END_FACTORY_REG(AddN)
 
 /**
-*@brief Calculates the reversed outputs of the function "maximum"
+*@brief Calculates the reversed outputs of the function "maximum".
 
 *@par Inputs:
-*Three inputs, including:
-* @li grads: A mutable Tensor. Must be one of the following types:
-*     float16, float32, int32.
-* @li x1: A mutable Tensor of the same type as "grads".
-* @li x2: A mutable Tensor of the same type as "grads". \n
+* Three inputs, including:
+*@li grads: A mutable Tensor. Must be one of the following types:
+* float16, float32, int32.
+*@li x1: A mutable Tensor of the same type as "grads".
+*@li x2: A mutable Tensor of the same type as "grads". \n
 
 *@par Attributes:
 *@li grad_x: An optional bool. Defaults to "True".
-*     If "True", "y1" will be output.
-*     If "False", "y1" will not be output. \n
+* If "True", "y1" will be output.
+* If "False", "y1" will not be output. \n
 
 *@li grad_y: An optional bool. Defaults to "True".
-*     If "True", "y2" will be output.
-*     If "False", "y2" will not be output. \n
+* If "True", "y2" will be output.
+* If "False", "y2" will not be output. \n
 
 *@par Outputs:
-* @li y1: A mutable Tensor. Has the same type as "grads".
-* @li y2: A mutable Tensor. Has the same type as "grads". \n
+*@li y1: A mutable Tensor. Has the same type as "grads".
+*@li y2: A mutable Tensor. Has the same type as "grads". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator MaximumGrad.
 */
 REG_OP(MaximumGrad)
@@ -84,29 +84,29 @@ REG_OP(MaximumGrad)
     .OP_END_FACTORY_REG(MaximumGrad)
 
 /**
-*@brief Calculates the reversed outputs of the function "minimum"
+*@brief Calculates the reversed outputs of the function "minimum".
 
 *@par Inputs:
-*Three inputs, including:
-* @li grads: A mutable Tensor. Must be one of the following types:
-*     float16, float32, int32.
-* @li x1: A mutable Tensor of the same type as "grads".
-* @li x2: A mutable Tensor of the same type as "grads". \n
+* Three inputs, including:
+*@li grads: A mutable Tensor. Must be one of the following types:
+* float16, float32, int32.
+*@li x1: A mutable Tensor of the same type as "grads".
+*@li x2: A mutable Tensor of the same type as "grads". \n
 
 *@par Attributes:
 *@li grad_x: An optional bool. Defaults to "True".
-*     If "True", "y1" will be output.
-*     If "False", "y1" will not be output. \n
+* If "True", "y1" will be output.
+* If "False", "y1" will not be output. \n
 
 *@li grad_y: An optional bool. Defaults to "True".
-*     If "True", "y2" will be output.
-*     If "False", "y2" will not be output. \n
+* If "True", "y2" will be output.
+* If "False", "y2" will not be output. \n
 
 *@par Outputs:
-* @li y1: A mutable Tensor. Has the same type as "grads".
-* @li y2: A mutable Tensor. Has the same type as "grads". \n
+*@li y1: A mutable Tensor. Has the same type as "grads".
+*@li y2: A mutable Tensor. Has the same type as "grads". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator MinimumGrad.
 */
 REG_OP(MinimumGrad)
@@ -552,15 +552,16 @@ REG_OP(Expint)
     .OP_END_FACTORY_REG(Expint)
 
 /**
-*@brief: Computes the reciprocal of "x". \n
+*@brief: Computes the reciprocal of "x".
 
-*@par Inputs:\n
-*x: A Tensor. Must be one of the following types: float16, float32, int32, int64, double, complex64, complex128. \n
+*@par Inputs:
+*x: A Tensor. Must be one of the following types: float16, float32,
+* int32, int64, double, complex64, complex128. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x". \n
+*y: A Tensor. Must be one of the following type: float16, float32, int32. \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator Inv.
 */
 REG_OP(Inv)
@@ -569,18 +570,19 @@ REG_OP(Inv)
     .OP_END_FACTORY_REG(Inv)
 
 /**
-*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x", and "dy"
-    is the corresponding input gradient. \n
+*@brief: Computes "x" reciprocal grad, dx = -1*dy*y*y, where, "y = 1/x",
+* and "dy" is the corresponding input gradient.
 
 *@par Inputs:
 * Two inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32, int32, int8.
-* @li grad: A Tensor. Has the same type as "x". \n
+*@li x: A Tensor. Must be one of the following types: float16, float32,
+* int32, int8.
+*@li grad: A Tensor. Has the same type as "x". \n
 
 *@par Outputs:
 *y: A Tensor, Has the same type as "x". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator InvGrad.
 */
 REG_OP(InvGrad)
@@ -633,25 +635,27 @@ REG_OP(Log1p)
 
 /**
 *@brief Returns element-wise remainder of division.
+
 *@par Inputs:
-*Two inputs, including:
-* @li x1: A Tensor. Must be one of the following types: float16, float32,
- * int32, int64, int8, uint8, double.
-* @li x2: A Tensor of the same type as "x1". \n
+* Two inputs, including:
+*@li x1: A Tensor. Must be one of the following types: float16, float32,
+* int32, int64, int8, uint8, double.
+*@li x2: A Tensor of the same type as "x1". \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x1".
+*y: A Tensor. Has the same type as "x1". \n
 
 *@attention Constraints:
-*@li x2: The input data does not support 0
+*@li x2: The input data does not support 0.
 *@li When NUM exceeds 2048 , the accuracy of operator cannot guarantee the
-*requirement of double thousandths in the mini form
+* requirement of double thousandths in the mini form.
 *@li Due to different architectures, the calculation results of this operator
-*on NPU and CPU may be inconsistent
-*@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
+* on NPU and CPU may be inconsistent.
+*@li If shape is expressed as (D1,D2... ,Dn),
+* then D1*D2... *DN<=1000000,n<=8. \n
 
-*@par Third-party framework compatibility
-*Compatible with the TensorFlow operator Mod.
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator Mod.
 */
 REG_OP(Mod)
     .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8,
@@ -663,18 +667,18 @@ REG_OP(Mod)
     .OP_END_FACTORY_REG(Mod)
 
 /**
-*@brief: Returns the truth value of (x != y) element-wise. \n
+*@brief Returns the truth value of (x != y) element-wise.
 
 *@par Inputs:
 * Two inputs, including:
 *@li x1: A Tensor. Must be one of the following types: float16, float32, int32,
- * int8, uint8, double, int16, int64, uint16, half, uint32, uint64
+* int8, uint8, double, int16, int64, uint16, half, uint32, uint64.
 *@li x2: A Tensor of the same type as "x1". \n
 
 *@par Outputs:
 *y: A Tensor of type bool. \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator NotEqual.
 */
 REG_OP(NotEqual)
@@ -684,16 +688,17 @@ REG_OP(NotEqual)
     .OP_END_FACTORY_REG(NotEqual)
 
 /**
-* @brief Computes ndtri element-wise (y = sqrt(2) * erfinv(2 * x - 1))
+*@brief Computes ndtri element-wise (y = sqrt(2) * erfinv(2 * x - 1)).
 
-* @par Inputs:
-* One input:
-* x: A Tensor. Must be one of the following types: bfloat16, float16, float32, double \n
+*@par Inputs:
+* One input, including: \n
+*x: A Tensor. Must be one of the following types: bfloat16, float16,
+* float32, double. \n
 
-* @par Outputs:
-* y: A Tensor. Has the same type and format as input "x". \n
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x". \n
 
-* @par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator Ndtri.
 */
 REG_OP(Ndtri)
@@ -721,13 +726,12 @@ REG_OP(Neg)
     .OP_END_FACTORY_REG(Neg)
 
 /**
-*@brief Returns x1/x2 element-wise for integer types. \n
+*@brief Returns x1/x2 element-wise for integer types.
 
 *@par Inputs:
 *@li x1: A Tensor. Must be one of the following types:
-*     float32, float64, int32, uint8, int16, int8,
-*     complex64, int64, qint8, quint8, qint32, uint16,
-*     complex128, float16, uint32, uint64, complex64, complex128.
+*     float32, float16, int8, uint8, int32, int16,
+*     uint16, double, int64, complex64, complex128.
 *@li x2: A Tensor of the same data type as "x1". \n
 
 *@par Outputs:
@@ -778,7 +782,7 @@ REG_OP(Xdivy)
 
 /**
 * @brief Computes "x" multiplied by the logarithm of y element-wise,
-* if "x" == 0, return "0". \n
+* if "x" == 0, return "0".
 
 * @par Inputs:
 * Two inputs, including:
@@ -803,7 +807,7 @@ REG_OP(Xlog1py)
 
 /**
 *@brief Computes "x" multiplied by the logarithm of y element-wise,
-* if "x" == 0, return "0". \n
+* if "x" == 0, return "0".
 
 *@par Inputs:
 * Two inputs, including:
@@ -1032,7 +1036,7 @@ REG_OP(LogicalOr)
     .OP_END_FACTORY_REG(LogicalOr)
 
 /**
-* @brief Computes spence of x element-wise. \n
+* @brief Computes spence of x element-wise.
 
 *
 * @par Inputs:
@@ -1423,7 +1427,7 @@ REG_OP(RsqrtGrad)
     .OP_END_FACTORY_REG(RsqrtGrad)
 
 /**
-*@brief Computes hyperbolic sine of "x" element-wise. \n
+*@brief Computes hyperbolic sine of "x" element-wise.
 
 *@par Inputs:
 *x: An NCHW, NHWC,or ND Tensor of type float, double, complex64,
@@ -1509,18 +1513,18 @@ REG_OP(DivNoNan)
     .OP_END_FACTORY_REG(DivNoNan)
 
 /**
-*@brief Reverses specific dimensions of a tensor. \n
+*@brief Reverses specific dimensions of a tensor.
 
 *@par Inputs:
 * One input: \n
 *x: A Tensor, Must be one of the following types:
-*    int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64,
-*    and format can be [NCHW,NHWC,ND]
+* int32, uint8, int16, int8, int64, int64, uint16, uint32, uint64,
+* and format can be [NCHW,NHWC,ND]. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type and format as "x"
+*y: A Tensor. Has the same type and format as "x". \n
 
-*@par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator Invert.
 */
 REG_OP(Invert)
@@ -1768,16 +1772,16 @@ REG_OP(Atan2)
     .OP_END_FACTORY_REG(Atan2)
 
 /**
-* @brief Computes fresnel_cos of x element-wise. \n
- 
+*@brief Computes fresnel_cos of x element-wise.
 * 
-* @par Inputs:
-*  x: A tensor. Must be one of the following types: bfloat16, float16, float32, double.
+*@par Inputs:
+*x: A tensor. Must be one of the following types: bfloat16, float16, float32,
+* double. \n
 * 
-* @par Outputs:
-*  y: A tensor. Has the same type as "x".
+*@par Outputs:
+*y: A tensor. Has the same type as "x". \n
 * 
-* @par Third-party framework compatibility
+*@par Third-party framework compatibility
 * Compatible with the TensorFlow operator FresnelCos.
 * 
 */
@@ -1787,16 +1791,17 @@ REG_OP(FresnelCos)
     .OP_END_FACTORY_REG(FresnelCos)
 
 /**
-* @brief Computes fresnel_sin of x element-wise. \n
+*@brief Computes fresnel_sin of x element-wise.
  
 * 
-* @par Inputs:
-*  x: A tensor. Must be one of the following types: bfloat16, float16, float32, double.
+*@par Inputs:
+*x: A tensor. Must be one of the following types: bfloat16, float16, float32,
+* double. \n
 * 
-* @par Outputs:
-*  y: A tensor. Has the same type as "x".
+*@par Outputs:
+*y: A tensor. Has the same type as "x". \n
 * 
-* @par Third-party framework compatibility
+*@par Third-party framework compatibility:
 * Compatible with the TensorFlow operator FresnelSin.
 * 
 */
@@ -2312,7 +2317,7 @@ REG_OP(Sin)
     .OP_END_FACTORY_REG(Sin)
 
 /**
-*@brief: Computes tan of "x" element-wise. \n
+*@brief: Computes tan of "x" element-wise.
 
 *@par Inputs:
 *One input:
@@ -2332,7 +2337,7 @@ REG_OP(Tan)
     .OP_END_FACTORY_REG(Tan)
 
 /**
-*@brief Returns element-wise remainder of division. \n
+*@brief Returns element-wise remainder of division.
 
 *@par Inputs:
 *Two inputs, including:
@@ -2352,7 +2357,7 @@ REG_OP(Tan)
 *@li If shape is expressed as (D1,D2... ,Dn), then D1*D2... *DN<=1000000,n<=8
 
 *@par Third-party framework compatibility
-*@li Compatible with the TensorFlow operator TruncateMod.
+*Compatible with the TensorFlow operator TruncateMod.
 */
 REG_OP(TruncateMod)
     .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT64,
@@ -3133,7 +3138,7 @@ REG_OP(SquareSumV2)
     .OP_END_FACTORY_REG(SquareSumV2)
 
 /**
-*@brief Confuse reducesumd and square. \n
+*@brief Confuse reducesumd and square.
 
 *@par Inputs:
 *x: A Tensor of type float16, float32. \n
@@ -3178,19 +3183,20 @@ REG_OP(SquareSumAll)
     .OP_END_FACTORY_REG(SquareSumAll)
 
 /**
-*@brief Confuse broadcast, addn and mul. \n
+*@brief Confuse broadcast, addn and mul.
 
 *@par Inputs:
 *Three inputs, including:
-* @li x1: A Tensor. Must be one of the following types:int32, int16, float16, float32.
-* @li x2: A Tensor of the same type as "x1".
-* @li x3: A Tensor of the same type as "x1". \n
+*@li x1: A Tensor. Must be one of the following types:int32, int16,
+* float16, float32.
+*@li x2: A Tensor of the same type as "x1".
+*@li x3: A Tensor of the same type as "x1". \n
 
 *@par Outputs:
-* y: A Tensor. Has the same type as "x1".
+*y: A Tensor. Has the same type as "x1". \n
 
 *@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(FusedMulAddN)
     .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT16}))
@@ -3200,7 +3206,7 @@ REG_OP(FusedMulAddN)
     .OP_END_FACTORY_REG(FusedMulAddN)
 
 /**
-*@brief Add 'bias' to 'x'. \n
+*@brief Add 'bias' to 'x'.
 
 *@par Inputs:
 * Two inputs, including:
@@ -3209,22 +3215,31 @@ REG_OP(FusedMulAddN)
 
 *@par Attributes:
 *@li axis: An optional int32 used to compute the shape of bias input from the online bottoms. Defaults to "1".
-*@li num_axes: An optional int32 used to compute the shape of bias input from a Caffe model trained offline. Defaults to "1".
-*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline. If "false", bias is input from online bottoms. Defaults to "true". \n
+*@li num_axes: An optional int32 used to compute the shape of
+* bias input from a Caffe model trained offline. Defaults to "1".
+*@li bias_from_blob: An optional bool. If "true", bias is input from a Caffe model trained offline.
+* If "false", bias is input from online bottoms. Defaults to "true". \n
 
 *@par Outputs:
 *y: An ND tensor of type float16 or float32. \n
 
-*@attention Constraints:\n
+*@attention Constraints:
 * Assume that the shape length of "x" is "n" and that of "bias" is "m".
 *@li "axis" is within the range [-n, n-1]. num_axes >= -1.
-*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0", the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).\n
+*@li If "bias_from_blob = true", "num_axes = -1", and "axis >= 0",
+* the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < n-axis).
 * If "axis < 0", the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < -axis).
 *@li If "bias_from_blob = true" and "num_axes = 0", "bias" is a scalar with shape length 1 and dimension size 1.
-*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0", "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).\n
-* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
-*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0","axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+"axis")th axis of "x" must have the same size (0 <= i < m).\n
-* If "axis < 0", "n + axis + m" must be less than or equal to "n" and the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m).
+*@li If "bias_from_blob = true", "num_axes > 0, and "axis >= 0",
+* "axis + num_axes" must be less than or equal to "n" and the ith axis of "bias" and
+* the (i+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
+* If "axis < 0", "n + axis + num_axes" must be less than or equal to "n" and
+* the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < num_axes).
+*@li If "bias_from_blob = false", "bias" is not a scalar, and "axis >= 0",
+* "axis + m" must be less than or equal to "n" and the ith axis of "bias" and
+* the (i+"axis")th axis of "x" must have the same size (0 <= i < m).
+* If "axis < 0", "n + axis + m" must be less than or equal to "n" and
+* the ith axis of "bias" and the (i+n+"axis")th axis of "x" must have the same size (0 <= i < m). \n
 *@par Third-party framework compatibility
 * Compatible with the Caffe operator Bias.
 */
@@ -3405,11 +3420,14 @@ REG_OP(Fills)
      .OP_END_FACTORY_REG(Adds)
 
 /**
-*@brief Computes the product of x and y and returns 0 if the y is zero, even if x is NaN or infinite. \n
+*@brief Computes the product of x and y and returns 0 if the y is zero,
+* even if x is NaN or infinite.
 
 *@par Inputs:
-* @li x1: A Tensor. Must be one of the following types:float16, float32, double, complex64, complex128.
-* @li x2: A Tensor. Has the same type and shape as "x1". \n
+* Two inputs, including: \n
+*@li x1: A Tensor. Must be one of the following types:float16, float32,
+* double, complex64, complex128.
+*@li x2: A Tensor. Has the same type and shape as "x1". \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type and shape as "x1". \n
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index 38e22be8..61336fb0 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -52,8 +52,8 @@ namespace ge {
 REG_OP(AttentionQKVGradW)
     .INPUT(x, TensorType({DT_FLOAT16}))
     .INPUT(query_dx, TensorType({DT_FLOAT16}))
-    .INPUT(key_dw, TensorType({DT_FLOAT16}))
-    .INPUT(value_dw, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(key_dw, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(value_dw, TensorType({DT_FLOAT16}))
     .OUTPUT(dw_query, TensorType({DT_FLOAT16}))
     .OUTPUT(dw_key, TensorType({DT_FLOAT16}))
     .OUTPUT(dw_value, TensorType({DT_FLOAT16}))
@@ -199,24 +199,25 @@ REG_OP(SwinTransformerLnQKV)
     .OP_END_FACTORY_REG(SwinTransformerLnQKV)
 
 /**
-*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
-
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
 *@par Inputs:
 *Three inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC].
+* float32, int32, bfloat16. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC].
+* float32, int32, bfloat16. Has format [ND, NHWC].
 * @li bias: A optional 1D Tensor. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC] . \n
+* float32, int32, bfloat16. Has format [ND, NHWC]. \n
 
 *@par Attributes:
-*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to [K, M].
-*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to [K, M] . \n
+*@li transpose_x1: A bool. If True, changes the shape of "x1" from [M, K] to
+* [K, M].
+*@li transpose_x2: A bool. If True, changes the shape of "x2" from [M, K] to
+* [K, M]. \n
 
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float16,
-* float32, int32. Has format [ND, NHWC] . \n
+* float32, int32, bfloat16. Has format [ND, NHWC]. \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -231,35 +232,34 @@ REG_OP(MatMul)
     .OP_END_FACTORY_REG(MatMul)
 
 /**
-*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
-
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
 *@par Inputs:
 *Four inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: float32,
- float16, int32, int8. Has format [ND, NHWC].
+* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: float32,
- float16, int32, int8. Has format [ND, NHWC].
+* float16, int32, int8, int4, bfloat16. Has format [ND, NHWC].
 * @li bias: A 1D Tensor. Must be one of the following types: float32,
- float16, int32. Has format [ND, NHWC].
+* float16, int32 bfloat16. Has format [ND, NHWC].
 * @li offset_w: A Optional 1D Tensor for quantized inference. Type is int8.
- Reserved. \n
+* Reserved. \n
 
 *@par Attributes:
 * @li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
- [M, K].
+* [M, K].
 * @li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
 [K, N].
 * @li offset_x: An optional integer for quantized MatMulV2.
 * The negative offset added to the input x1 for int8 type. Ensure offset_x
- within the effective range of int8 [-128, 127]. Defaults to "0". \n
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: float32,
- float16, int32. Has format [ND, NHWC]. \n
+* float16, int32, bfloat16. Has format [ND, NHWC]. \n
 
 *@attention Constraints:
 * if performances better in format NZ, please close
- "MatmulTransdataFusionPass" in fusion configuration. \n
+* "MatmulTransdataFusionPass" in fusion configuration. \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -276,26 +276,25 @@ REG_OP(MatMulV2)
     .OP_END_FACTORY_REG(MatMulV2)
 
 /**
-*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
-
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
 *@par Inputs:
 *Five inputs, including:
 * @li x1: A matrix Tensor. 2D. Must be one of the following types: int8.
 * @li x2: A matrix Tensor. 2D. Must be one of the following types: int8.
 * @li compress_index: A compress index matrix of type int8.
 * @li bias: An optional Tensor. 1D. Must be one of the following types: int32,
- float16.
+* float16.
 * @li offset_w: An optional matrix Tensor. 2D. Must be one of the following
- types: int8. \n
+* types: int8. \n
 
 *@par Attributes:
 *@li transpose_x1: A bool. If True, changes the shape of "x1" from [K, M] to
- [M, K].
+* [M, K].
 *@li transpose_x2: A bool. If True, changes the shape of "x2" from [N, K] to
- [K, N].
+* [K, N].
 *@li offset_x: An optional integer for quantized MatMulV2Compress.
 *The negative offset added to the input x1 for int8 type. Ensure offset_x
- within the effective range of int8 [-128, 127]. Defaults to "0". \n
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
 *y: The result matrix Tensor. 2D. Must be one of the following types: int32,
@@ -303,7 +302,7 @@ REG_OP(MatMulV2)
 
 *@attention Constraints:
 * if performances better in format NZ, please close
- "MatmulTransdataFusionPass" in fusion configuration.
+* "MatmulTransdataFusionPass" in fusion configuration.
 
 */
 REG_OP(MatMulV2Compress)
@@ -319,29 +318,29 @@ REG_OP(MatMulV2Compress)
     .OP_END_FACTORY_REG(MatMulV2Compress)
 
 /**
-*@brief Performs Matrix-to-matrix Multiply, producing y=alpha[0]*a*b+beta[0]*c . \n
-
+*@brief Performs Matrix-to-matrix Multiply,
+* producing y=alpha[0]*a*b+beta[0]*c. \n
 *@attention Constraints:
 * For better performance, The k-axis must be aligned to 16 (input type
 * is float16) or 32 (input type is int8). \n
 
 *@par Inputs:
 *Five inputs, including:
-*@li a: A matrix Tensor. Must be one of the following types: float16, int8.
-* Has format [ND].
-*@li b: A matrix Tensor. Must be one of the following types: float16, int8.
-* Has format ND.
-*@li c: A matrix Tensor. Must be one of the following types: float16, int32,
-* float32. has format ND.
-*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following
-* types: float16, int32, float32. Has format [ND].
+* @li a: A matrix Tensor. Must be one of the following types:float32, float16,
+* int8, int32. Has format ND.
+* @li b: A matrix Tensor. Must be one of the following types:float32, float16,
+* int8, int32. Has format ND.
+*@li c: A matrix Tensor. Must be one of the following types:float32, float16,
+* int8, int32. Has format ND.
+* @li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the
+* following types: float16, int32, float32, int8. Has format ND.
 *@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following
-* types: float16, int32, float32. Has format [ND].
+* types: float16, int32, float32, int8. Has format ND.\n
 * The format of a, b, c has restriction:\n
 * When type of a is int8 and type of c is int32, the format of a, b, c should
 * all be ND.\n
-* When type of a is int8 and type of c is float32, the format of a, b, c should
-* all be ND.\n
+* When type of a is int8 and type of c is float32, the format of a, b, c
+* should all be ND.\n
 * When type of a is float16 and type of c is float16, the format of a, b, c
 * should all be ND.\n
 * When type of a is float16 and type of c is float32, the format of a, b, c
@@ -352,7 +351,7 @@ REG_OP(MatMulV2Compress)
 *@li transpose_a: Optional. A bool. If True, changes the shape of "a" from
 * [M, K] to [K, M].
 *@li transpose_b: Optional. A bool. If True, changes the shape of "b" from
-* [K, N] to [N, K] . \n
+* [K, N] to [N, K]. \n
 
 *@par Outputs:
 *y: The result matrix Tensor. Must be one of the following types: float16,
@@ -371,22 +370,25 @@ REG_OP(GEMM)
     .OP_END_FACTORY_REG(GEMM)
 
 /**
-*@brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
-
+*@brief Multiplies matrix "a" by matrix "b", producing "a * b". \n
 *@par Inputs:
 *Two inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC].
+* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC] . \n
+* float32, int32, bfloat16. 2D or higher. Has format [ND, NHWC]. \n
 
 *@par Attributes:
-*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
-*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
+*@li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K]
+* to [B, K, M].
+*@li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K]
+* to [B, K, M]. \n
 
 *@par Outputs:
-*y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n
+* y: The result matrix Tensor. 2D or higher. Must be one of the following
+* types: float16, bfloat16,
+* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape
+* length as "x1" and "x2". \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -403,27 +405,33 @@ REG_OP(BatchMatMul)
 
 /**
 * @brief Multiplies matrix "a" by matrix "b", producing "a * b" . \n
-
 * @par Inputs:
 * Three inputs, including:
 * @li x1: A matrix Tensor. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC].
+* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC].
 * @li x2: A matrix Tensor. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC] . \n
-* @li bias: A matrix Tensor. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC] . \n
+* float32, int32, int8, int4, bfloat16. 2D or higher. Has format [ND, NHWC].
+* @li bias: A optional Tensor. Must be one of the following types:
+* float16,
+* float32, int32, int8, int4, bfloat16. Has format [ND, NHWC].
+* @li offset_w: A optional Tensor. Must be one of the following types:
+* int8, int4. Has format [ND, NHWC]. \n
 
 * @par Attributes:
-* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to [B, K, M].
-* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to [B, K, M] . \n
+* @li adj_x1: A bool. If True, changes the shape of "x1" from [B, M, K] to
+* [B, K, M].
+* @li adj_x2: A bool. If True, changes the shape of "x2" from [B, M, K] to
+* [B, K, M]. \n
 
 * @par Outputs:
-* y: The result matrix Tensor. 2D or higher. Must be one of the following types: float16,
-* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape length as "x1" and "x2" . \n
+* y: The result matrix Tensor. 2D or higher. Must be one of the following
+* types: float16,
+* float32, int32. 2D or higher. Has format [ND, NHWC]. Has the same shape
+* length as "x1" and "x2". \n
 
 *@attention Constraints:
 * if performances better in format NZ, please close
- "MatmulTransdataFusionPass" in fusion configuration. \n
+* "MatmulTransdataFusionPass" in fusion configuration. \n
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator BatchMatmul.
@@ -1025,29 +1033,33 @@ REG_OP(DiagPart)
     .OP_END_FACTORY_REG(DiagPart)
 
 /**
-*@brief Also known as a "fully-connected" layer, computes an inner product with a set of learned weights, and (optionally) adds biases . \n
-
+*@brief Also known as a "fully-connected" layer, computes an inner product
+* with a set of learned weights, and (optionally) adds biases. \n
 *@par Inputs:
 * Four inputs, including:
-*@li x: A Tensor of type float16, int8.
-*@li w: A weight matrix of type float16, int8.
-*@li b: An optional Tensor of type float16, int32, float32.
-*@li offset_w: An optional Tensor of type int8. Reserved. Only None Supported. \n
+*@li x: A Tensor of type float16, int8, int4, float32, bfloat16.
+*@li w: A weight matrix of type float16, int8, int4, float32, bfloat16.
+*@li b: An optional Tensor of type float16, int8, int4, float32, bfloat16.
+*@li offset_w: An optional Tensor of type int8, int4.
+* Reserved. Only None Supported. \n
 
 *@par Attributes:
 *@li num_output: Required. An int, output neuron number. Reserved.
-*@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false".
-*@li axis: Optional. An int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
-* The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
+*@li transpose: A bool, specifying weight whether to transpose input w,
+* either "true" or "false". Defaults to "false".
+*@li axis: Optional. An int, 1 or 2, specifying which dimension the input
+* "K" starts from. Defaults to 1.
+* The product of the subsequent dimensions starting form first dimension
+* or the second dimension is "K".
 *@li offset_x: An optional integer for quantized FullyConnection.
-*The negative offset added to the input image for int8 type. Ensure offset_x within the
-*effective range of int8 [-128, 127]. Defaults to "0". \n
+*The negative offset added to the input image for int8 type. Ensure offset_x
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
-*y: The result tensor of type float16, int32, float32 . \n
+*y: The result tensor of type float16, int32, float32, bfloat16. \n
 
 *@par Third-party framework compatibility
-* Compatible with the Caffe operator InnerProduct . \n
+* Compatible with the Caffe operator InnerProduct. \n
 
 *@par Quantization supported or not
 * Yes
@@ -1066,27 +1078,26 @@ REG_OP(FullyConnection)
 
 /**
 *@brief Also known as a "fully-connected-compress" layer, computes an inner
-product with a set of learned weights, and (optionally) adds biases . \n
-
+* product with a set of learned weights, and (optionally) adds biases. \n
 *@par Inputs:
 * Five inputs, including:
 *@li x: A Tensor of type uint8, int8.
 *@li w: A weight matrix of type int8.
 *@li compress_index: A compress index matrix of type int8.
-*@li b: A Tensor of type int32.
-*@li offset_w: A Tensor of type int8.
+*@li b: A optional Tensor of type int32.
+*@li offset_w: A optional Tensor of type int8.
 
 *@par Attributes:
 *@li num_output: A int, specifying the number of outputs.
 *@li transpose: A bool, specifying whether to transpose input w, either "true"
- or "false". Defaults to "false".
+* or "false". Defaults to "false".
 *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K"
-starts from. Defaults to "1".
-* The product of the subsequent dimensions starting form first dimension or the
-second dimension is "K".
+* starts from. Defaults to "1".
+*The product of the subsequent dimensions starting form first dimension or the
+* second dimension is "K".
 *@li offset_x: An optional integer for quantized FullyConnectionCompress.
 *The negative offset added to the input image for int8 type. Ensure offset_x
-within the effective range of int8 [-128, 127]. Defaults to "0". \n
+* within the effective range of int8 [-128, 127]. Defaults to "0". \n
 
 *@par Outputs:
 *y: The result tensor of type int32. \n
@@ -1520,21 +1531,18 @@ REG_OP(Tril)
 /**
 *@brief Concatenates a list of N tensors along the first dimension.
 *@par Inputs:
-* Two inputs, including:
-* @li values: A list of Tensors. Must be one of the following types:  int32, float16, float32.
-*     Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
-*     It's a dynamic input.
-* @li shape: A Tensor of the same type as "x".
-* The final shape of the result. Should be equal to the shapes of any input
-* but with the number of input values in the first dimension . \n
+* @li x: A list of Tensors. Must be one of the following types:  int32,
+* float16, float32. Tensors to be concatenated. All must have size 1 in
+*  the first dimension and same shape.It's a dynamic input. \n
 
 *@par Attributes:
-*equation: The subscripts for the Einstein summation. \n
-*N: tensor size of input \n
+* @li equation: The subscripts for the Einstein summation. \n
+* @li N: tensor size of input. \n
 
 *@par Outputs:
-*@li y: Sums the product of the elements of the input operands along dimensions specified
- using a notation based on the Einstein summation convention. \n
+*@li y: Sums the product of the elements of the input operands along
+* dimensions specified
+* using a notation based on the Einstein summation convention. \n
 
 *@attention Constraints:
 *Input N must be Int. \n
@@ -1756,6 +1764,57 @@ REG_OP(MatrixDiagV3)
     .ATTR(align, String, "RIGHT_LEFT")
     .OP_END_FACTORY_REG(MatrixDiagV3)
 
+/**
+* @brief Function SwinAttentionScore. \n
+
+* @par Inputs:
+* six inputs, including:
+* @li query: A matrix Tensor. The type only support float16.
+* @li key: A matrix Tensor. The type only support float16.
+* @li value: A matrix Tensor. The type only support float16.
+* @li padding_mask1: A matrix Tensor. The type only support float16.
+* @li padding_mask2: A matrix Tensor. The type only support float16.
+* @li scale: A scalar. The type only support float16.
+* @li drop_mask: A matrix Tensor. The type only support uint8. \n
+
+* @par Attributes:
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+ shape of "keep_prob" should be (1,) or [1,].
+* @li query_transpose: A bool. If True, changes the shape of "query" from [K, M] to
+ [M, K].
+* @li key_transpose: A bool. If True, changes the shape of "key" from [N, K] to
+ [K, N].
+* @li bmm_score_transpose_a: A bool. If True, changes the shape of "mid_data" from [K, M] to
+ [M, K].
+* @li bmm_score_transpose_b: A bool. If True, changes the shape of "value" from [N, K] to
+ [K, N].
+* @li axes: A list of int. The dimension softmax would be performed on. Defaults
+ to "[]" . \n
+
+* @par Outputs:
+* attention_score: The result matrix Tensor. The type only support float16.
+* softmax: The result matrix Tensor. The type only support float16.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(SwinAttentionScore)
+    .INPUT(query, TensorType({DT_FLOAT16}))
+    .INPUT(key, TensorType({DT_FLOAT16}))
+    .INPUT(value, TensorType({DT_FLOAT16}))
+    .INPUT(padding_mask1, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(padding_mask2, TensorType({DT_FLOAT16}))
+    .INPUT(scale, TensorType({DT_FLOAT16}))
+    .OPTIONAL_INPUT(drop_mask, TensorType({DT_INT8}))
+    .OUTPUT(attention_score, TensorType({DT_FLOAT16}))
+    .OUTPUT(softmax, TensorType({DT_FLOAT16}))
+    .ATTR(keep_prob, Float, 1.0)
+    .ATTR(query_transpose, Bool, false)
+    .ATTR(key_transpose, Bool, false)
+    .ATTR(bmm_score_transpose_a, Bool, false)
+    .ATTR(bmm_score_transpose_b, Bool, false)
+    .ATTR(softmax_axes, ListInt, {})
+    .OP_END_FACTORY_REG(SwinAttentionScore)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index c6aad6dc..96213764 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -544,23 +544,30 @@ REG_OP(BNInference)
     .OP_END_FACTORY_REG(BNInference)
 
 /**
-*@brief Performs batch normalization . \n
+*@brief Performs batch normalization .
 
 *@par Inputs:
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW.
-*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x" Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x" Specifies the variance used for inference.
-*@li scale: An optional tensor of type float16 or float32, no use
-*@li offset: An optional tensor of type float16 or float32, no use
+*@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"
+* Specifies the mean used for inference.
+*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"
+* Specifies the variance used for inference.
+*@li scale: An optional tensor of type float16 or float32, no use.
+*@li offset: An optional tensor of type float16 or float32, no use. \n
+
 *@par Attributes:
-*@li momentum: An optional float32 num, represents the mean and the variance's scale factor
-*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
+*@li momentum: An optional float32 num, represents the mean and
+* the variance's scale factor.
+*@li epsilon: An optional float32, specifying the small value
+* added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
-*@li mode: An optional attr, not use
+*@li mode: An optional attr, not use. \n
+
 *@par Outputs:
-*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
+*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x". \n
+
 *@par Restrictions:
-*Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead.
+* Warning: THIS FUNCTION IS DEPRECATED. Please use BNInference instead.
 */
 REG_OP(BNInferenceD)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index bb0770e6..4c55eac0 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -26,15 +26,14 @@
 namespace ge {
 /**
 * @brief Computes the gradients of depthwise convolution with respect to
-* the filter . \n
-
+* the filter. \n
 * @par Inputs:
-* Three inputs include: \n
+* Three inputs include: 
 * @li input: 4D origin shape of input tensor [N, C, H, W] or [N, H, W, C],
-* support float16, float32, double
-* @li filter_size: A 4D tensor of type int32, with shape [H, W, C, K]
+* support float16.
+* @li filter_size: A 4D tensor of type int32, int64, with shape [H, W, C, K]
 * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C].
-* Must be one of the following types: float16, float32, double . \n
+* Must be one of the following types: float16. \n
 
 * @par Attributes:
 * @li strides: A required list or tuple. The stride of the sliding window
@@ -49,7 +48,7 @@ namespace ge {
 * @li pads: A required list or tuple. Padding added to each dimension of the
 * input.
 * @li data_format: An optional string. Input data format, either "NHWC" or
-* "NCHW" . \n
+* "NCHW". \n
 
 * @par Outputs:
 * filter_grad: Gradient of the deep convolution relative to the filter with
@@ -65,8 +64,9 @@ namespace ge {
 * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
-* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
-* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512 . \n
+* Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) +
+* (480 * stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf
+*  <= l0b_size/512. \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
@@ -146,34 +146,34 @@ REG_OP(DepthwiseConv2DBackpropFilterD)
 
 /**
 * @brief Computes the gradients of depthwise convolution with respect to the
-* input . \n
-
+* input. \n
 * @par Inputs:
 * Three inputs include: \n
 * @li input_size: 4D shape of input tensor [N, C, H, W] or [N, H, W, C],
-* support int32, int64
+* support int32, int64.
 * @li filter: 4D filter tensor with shape of [H, W, C, K], support float16.
 * @li out_backprop: 4D tensor with shape [N, C, H, W] or [N, H, W, C].
 * Must be one of the following types: float16 . \n
 
 * @par Attributes:
-* @li strides: A required list or tuple of int32. The stride of the sliding window for
-* height and width of input "x" of the convolution.
+* @li strides: A required list or tuple of int32. The stride of the sliding
+* window for height and width of input "x" of the convolution.
 * Must be with shape [1, 1, stride_height, stride_width] or [1, stride_height,
 * stride_width, 1].
-* @li dilations: An optional list or tuple of int32. The dilation factor for each
-* dimension of input "x". Defaults to "[1, 1, 1, 1]".
+* @li dilations: An optional list or tuple of int32. The dilation factor for
+* each dimension of input "x". Defaults to "[1, 1, 1, 1]".
 * If set to k > 1, there will be k-1 skipped cells between each filter element
 * on that dimension. Must be with shape [1, 1, dilation_height, dilation_width]
 * or [1, dilation_height, dilation_width, 1].
-* @li pads: A required list or tuple of int32. Padding added to each dimension of the
-* input.
+* @li pads: A required list or tuple of int32. Padding added to each dimension
+* of the input.
 * @li data_format: An optional string. Input data format, either "NHWC" or
 * "NCHW". Defaults to "NHWC" . \n
 
 * @par Outputs:
 * input_grad: Gradient of the deep convolution relative to the input with shape
-* [N, C, H, W] or [N, H, W, C] Must be one of the following types: float16 . \n
+* [N, C, H, W] or [N, H, W, C] Must be one of the following types:
+* float16, float32. \n
 
 * @attention Constraints:\n
 * The feature map is 4D with shape [N, C, Hi, Wi] or [N, Hi, Wi, C], but
@@ -331,13 +331,13 @@ REG_OP(DepthwiseConv2D)
 *        For NCHW data format, the feature dimension is the third-to-last . \n
 
 *@par Inputs:
-*x: A Tensor of type NumberType . \n
+* x: A Tensor of type NumberType . \n
 
 *@par Attributes:
-*data_format: Data format. Defaults to "NHWC" . \n
+* data_format: Data format. Defaults to "NHWC" . \n
 
 *@par Outputs:
-*y: A Tensor.Has the same type as "x" . \n
+* y: A Tensor.Has the same type as "x" . \n
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator BiasAddGrad.
@@ -378,8 +378,8 @@ REG_OP(BiasAddGrad)
     | Format    | NCHW        | NCHW    | NCHW   |\n
     |           | NHWC        | HWCN    | NHWC   |\n
  *\n
- * For float32 and float64 type, the actual calculation on the chip is based on
- * float16.
+ * For float32 and float64 type, the actual calculation on the chip is based
+ * on float16.
  *\n
  *
 *@par Attributes:
@@ -419,11 +419,11 @@ REG_OP(BiasAddGrad)
     |                  | W        | [1, 255]     |\n
  *\n
 
- * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when\n
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
- * and filter_width > fmap_width
- * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
- *\n
+ * and filter_width > fmap_width.
+ * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h *
+ *  stride_w < 4096. \n
  *
 *@par Outputs:
  * y: A Tensor. Has the same type as filter,and has same format as input_size.
@@ -482,7 +482,7 @@ REG_OP(Conv2DBackpropInput)
 *@par Outputs:
  * y: A Tensor. Has the same type as filter,4-D tensor [batch, height, width,
  * channels] or [batch, channels, height, width].
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
  * Compatible with Tensorflow's conv2d_backprop_input
 *@par Restrictions:
  * Warning: THIS FUNCTION IS DEPRECATED. Please use Conv2DBackpropInput instead.
@@ -503,7 +503,7 @@ REG_OP(Conv2DBackpropInputD)
 *@brief Computes the Deconvolution with respect to the input.
 *@par Inputs:
  * Two required inputs:
- * @li x: A Tensor of type float16 or int8.  4D with shape
+ * @li x: A Tensor of type float16 or int8. 4D with shape
  * [batch, out_channels, out_height, out_width]. Gradients with respect
  * to the output of the convolution.
  * @li filter: A Tensor. Must have the same type as "x".
@@ -511,7 +511,7 @@ REG_OP(Conv2DBackpropInputD)
  * Two optional inputs:
  * @li bias: An optional tensor. Must have the same type as "y".
  * @li offset_w: An optional 1D tensor for quantized deconvolution.
- * Type is int8. Reserved.\n
+ * Type is int8. Reserved.
  *\n
  *\n
  * The following are the supported data types and data formats:\n
@@ -535,9 +535,9 @@ REG_OP(Conv2DBackpropInputD)
  * @li dilations: A tuple or list of 4 integers. The dilation factor for each
  * dimension of input, defaults to [1,1,1,1].
  * @li groups: Number of blocked connections from input channels to
- output channels. Defaults to "1".
+ * output channels. Defaults to "1".
  * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n
-  Specify the data format of the input and output data.
+ * Specify the data format of the input and output data.
  * @li offset_x: An optional integer for quantized deconvolution.
  * The negative offset added to the input image for int8 type. Ensure offset_x
  * within the effective range of int8 [-128, 127]. Defaults to "0".
@@ -564,10 +564,11 @@ REG_OP(Conv2DBackpropInputD)
     |                  | W        | [1, 255]     |\n
     | Offset_x         |          | [-128, 127]  |\n
  *\n
- * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when\n
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
  * and filter_width > fmap_width
- * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
+ * If filter_h = 1 and filter_w = 1,
+ *  out_backprop_w * stride_h * stride_w < 4096
  *\n
  *
 *@par Outputs:
@@ -603,7 +604,7 @@ REG_OP(Deconvolution)
 *@par Inputs:
  * Three inputs:
  * @li x: A Tensor. Must be one of the following types: float16, float32,
- * float64.4-D with shape [batch, in_height, in_width, in_channels] or
+ * float64. 4-D with shape [batch, in_height, in_width, in_channels] or
  * [batch, in_channels, in_height, in_width].
  * @li filter_size: A const Tensor of type int32. Currently does not support
  * data tensor. An integer vector representing the tensor shape of filter,
@@ -627,8 +628,8 @@ REG_OP(Deconvolution)
     | Format    | NCHW    |     NCHW     | NCHW    |\n
     |           | NHWC    |     NHWC     | HWCN    |\n
  *\n
- * For float32 and float64 type of x and outbackprop, the actual calculation on the chip
- * is based on float16.
+ * For float32 and float64 type of x and outbackprop, the actual calculation
+ *  on the chip is based on float16.
  *\n
  *
 *@par Attributes:
@@ -651,21 +652,21 @@ REG_OP(Deconvolution)
     | Name             | Field    | Scope        |\n
     |------------------|----------|--------------|\n
     | x(fmap)          | H        | [1, 200000]  |\n
-    |                  | W        | [1, 200000]  |\n
-    | Filter Size      | H        | [1, 200000]  |\n
-    |                  | W        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Filter Size      | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
     | out_backprop     | H        | [1, 200000]  |\n
-    |                  | W        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
     | y                | H        | [1, 200000]  |\n
-    |                  | W        | [1, 200000]  |\n
-    | Stride           | H        | [1, 200000]  |\n
-    |                  | W        | [1, 200000]  |\n
-    | Padding          | Top      | [0, 200000]  |\n
-    |                  | Bottom   | [0, 200000]  |\n
-    |                  | Left     | [0, 200000]  |\n
-    |                  | Right    | [0, 200000]  |\n
-    | Dilation         | H        | [1, 200000]  |\n
-    |                  | W        | [1, 200000]  |\n
+    |                  | W        | [1, 4096]    |\n
+    | Stride           | H        | [1, 63]      |\n
+    |                  | W        | [1, 63]      |\n
+    | Padding          | Top      | [0, 255]     |\n
+    |                  | Bottom   | [0, 255]     |\n
+    |                  | Left     | [0, 255]     |\n
+    |                  | Right    | [0, 255]     |\n
+    | Dilation         | H        | [1, 255]     |\n
+    |                  | W        | [1, 255]     |\n
  *\n
 *@par Outputs:
  * y: A Tensor. Has the same type as x, has the same format as filter_size.
@@ -743,16 +744,16 @@ REG_OP(Conv2DBackpropFilterD)
     .OP_END_FACTORY_REG(Conv2DBackpropFilterD)
 
 /**
-*@brief Computes a 2D convolution given 4D "x" and "filter" tensors.
-*@par Inputs:
-*@li x: A 4D tensor of input image. With the format "NHWC", the data is stored
+* @brief Computes a 2D convolution given 4D "x" and "filter" tensors.
+* @par Inputs:
+* @li x: A 4D tensor of input image. With the format "NHWC", the data is stored
 * in the order of: [batch, in_height, in_width, in_channels].
-*@li filter: A 4D tensor of learnable filters. Must have the same type as "x".
+* @li filter: A 4D tensor of learnable filters. Must have the same type as "x".
 * With the format "HWCN" , the data is stored in the order of: [filter_height,
 * filter_width, in_channels / groups, out_channels].
-*@li bias: An optional 1D tensor of additive biases to the filter outputs.
+* @li bias: An optional 1D tensor of additive biases to the filter outputs.
 * The data is stored in the order of: [out_channels].
-*@li offset_w: Reserved.
+* @li offset_w: Reserved.
 *\n
 *\n
 * The following are the supported data types and data formats:
@@ -770,22 +771,22 @@ REG_OP(Conv2DBackpropFilterD)
 * float16.
 *\n
 *
-*@par Attributes:
-*@li strides: Required. A list of 4 integers. The stride of the sliding window
+* @par Attributes:
+* @li strides: Required. A list of 4 integers. The stride of the sliding window
 * for each dimension of input. The dimension order is determined by the data
 * format of "x". The N and C dimensions must be set to 1.
-*@li pads: Required. A list of 4 integers. The number of pixels to add to each
+* @li pads: Required. A list of 4 integers. The number of pixels to add to each
 * (top, bottom, left, right) side of the input.
-*@li dilations: Optional. A list of 4 integers. The dilation factor for each
+* @li dilations: Optional. A list of 4 integers. The dilation factor for each
 * dimension of input. The dimension order is determined by the data format of
 * "x". The N and C dimensions must be set to 1. Defaults to [1, 1, 1, 1].
-*@li groups: Optional. An integer of type int32. The number of blocked
+* @li groups: Optional. An integer of type int32. The number of blocked
 * connections from input channels to output channels. In_channels and
 * out_channels must both be divisible by "groups". Defaults to 1.
-*@li offset_x: Optional. An integer of type int32. The negative offset added
+* @li offset_x: Optional. An integer of type int32. The negative offset added
 * to the input image for int8 type. Ensure that the output is within the
 * effective range. Defaults to 0.
-*@li data_format: Reserved.
+* @li data_format: Reserved.
 *\n
 *\n
 * The following value range restrictions must be met:
@@ -825,10 +826,10 @@ REG_OP(Conv2DBackpropFilterD)
 *                 / stride_w + 1
 *\n
 *
-*@par Quantization supported or not
-*@li Yes
+* @par Quantization supported or not
+* Yes
 *
-*@par Third-party framework compatibility
+* @par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator "conv2d".
 *@li Compatible with the Caffe operator 2D "Convolution".
 */
@@ -847,14 +848,14 @@ REG_OP(Conv2D)
     .OP_END_FACTORY_REG(Conv2D)
 
 /**
-*@brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors.
-*@par Inputs:
-*@li x: A 4D tensor of input images.
-*@li filter_compress: A 4D tensor of compressed filter data blocks.
-*@li compress_index: A 1D tensor of index for decompression.
-*@li bias: An optional 1D tensor of additive biases to the filter outputs.
+* @brief Computes a 2D convolution given 4D "x" and "filter_compress" tensors.
+* @par Inputs:
+* @li x: A 4D tensor of input images.
+* @li filter_compress: A 4D tensor of compressed filter data blocks.
+* @li compress_index: A 1D tensor of index for decompression.
+* @li bias: An optional 1D tensor of additive biases to the filter outputs.
 * The data is stored in the order of: [out_channels].
-*@li offset_w: Reserved.
+* @li offset_w: Reserved.
 *\n
 *\n
 * The following are the supported data types and data formats:
@@ -870,8 +871,8 @@ REG_OP(Conv2D)
 * float16.
 *\n
 *
-*@par Attributes:
-*@li strides: Required. A list of 4 integers. The stride of the sliding window
+* @par Attributes:
+* @li strides: Required. A list of 4 integers. The stride of the sliding window
 * for each dimension of input. The dimension order is determined by the data
 * format of "x". The N and C dimensions must be set to 1.
 *@li pads: Required. A list of 4 integers. The number of pixels to add to each
@@ -1014,7 +1015,6 @@ REG_OP(DeformableConv2D)
 
 /**
 *@brief Computes a 3D convolution given 5D "x" and "filter" tensors.
-
 *@par Inputs:
  * @li x: A 5D tensor. Must be one of the following types: float16,
  * (Currently does not support int8). The format of x is NCDHW or NDHWC.
@@ -1025,16 +1025,16 @@ REG_OP(DeformableConv2D)
  * @li offset_w: Optional. An 1D tensor for quantized deconvolution. Reserved. \n
 
 *@par Attributes:
- * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
- * for each dimension of "x".
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the
+ *  sliding window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
  * @li pads: Required. A list of 6 integers.
  * Supports only padding along the D, H and W dimensions in sequence of head,
  * tail, top, bottom, left and right.
- * @li dilations: Optional. A list of 5 integers. Specifies the dilation factor for each
- * dimension of "x".
- * @li groups: Optional. Number of blocked connections from input channels to output
- * channels.
+ * @li dilations: Optional. A list of 5 integers. Specifies the dilation
+ *  factor for each dimension of "x".
+ * @li groups: Optional. Number of blocked connections from input channels
+ *  to output channels.
  * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * The N, C and D dimensions must be 1. Has the same format as "x".
@@ -1068,10 +1068,9 @@ REG_OP(Conv3D)
 
 /**
 *@brief Computes the gradients of convolution 3d with respect to the input.
-
 *@par Inputs:
- * @li input_size: A Tensor of type int32, int64. An integer vector representing
- * the shape of input, where input is a 5-D tensor
+ * @li input_size: A Tensor of type int32, int64. An integer vector
+ *  representing the shape of input, where input is a 5-D tensor
  * [batch, depth, height, width, channels] or
  * [batch, channels, depth, height, width].
  * @li filter: A Tensor. Must be one of the following types: float16, float32.
@@ -1082,22 +1081,23 @@ REG_OP(Conv3D)
  * respect to the output of the convolution. \n
 
 *@par Attributes:
- * @li strides: Required. A list of 5 integers. Specifies the stride of the sliding window
- * for each dimension of "out_backprop".
+ * @li strides: Required. A list of 5 integers. Specifies the stride of the
+ *  sliding window for each dimension of "out_backprop".
  * The N and C dimensions must be 1. Has the same format as "out_backprop".
  * @li pads: Required. A list of 6 integers.
  * Supports only padding along the D, H and W dimensions in sequence of head,
  * tail, top, bottom, left and right.
- * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
- * dimension of the input.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor
+ *  for each dimension of the input.
  * The N, C and D dimensions must be 1. Has the same format as "out_backprop".
- * @li groups: Optional. Number of blocked connections from input channels to output
- * channels.
+ * @li groups: Optional. Number of blocked connections from input channels
+ *  to output channels.
  * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor. Has the same type as filter,and has same format as "input_size". \n
+ * y: A Tensor. Has the same type as filter,and has same format as
+ * "input_size". \n
 
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_input
@@ -1207,10 +1207,9 @@ REG_OP(LSTM)
 
 /**
 *@brief Computes the gradients of convolution3D with respect to the filter
-
 *@par Inputs:
- * @li x: A Tensor. Must be one of the following types: float16, float32.
- * Currently does not support double.
+ * @li x: A Tensor. Must be one of the following types: float16, float32,
+ * double. Currently does not support double.
  * 5-D with shape [batch, in_depth, in_height, in_width, in_channels]
  * or [batch, in_channels, in_depth, in_height, in_width].
  * @li filter_size: A Tensor of type int32. An integer vector representing the
@@ -1224,21 +1223,22 @@ REG_OP(LSTM)
  * Gradients with respect to the output of the convolution. \n
 
 *@par Attributes:
- * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
- * window for each dimension of "x". The N and C dimensions must be 1.
- * Has the same format as "x".
- * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom, left, right]
- * pads on feature map.
- * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor for each
- * dimension of input.
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride
+ * of the sliding window for each dimension of "x". The N and C dimensions
+ * must be 1. Has the same format as "x".
+ * @li pads: Required. A tuple/list of 6 integers, [front, back, top, bottom,
+ * left, right] pads on feature map.
+ * @li dilations: Optional. A tuple/list of 5 integers, The dilation factor
+ * for each dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
- * @li groups: Optional. Number of blocked connections from input channels to output
- * channels.
+ * @li groups: Optional. Number of blocked connections from input channels
+ * to output channels.
  * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data. \n
 
 *@par Outputs:
- * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW or DHWCN. \n
+ * y: A Tensor that has the same type as "x" and the format is NDHWC, NCDHW
+ * or DHWCN. \n
 
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv3d_backprop_filter
@@ -1311,25 +1311,26 @@ REG_OP(Conv3DBackpropFilterD)
 *@brief Computes the transpose of convolution 3d with respect to the input.
 
 *@par Inputs:
- * @li input_size: A Tensor of type int32. An integer vector representing the
- * shape of input.
+ * @li input_size: A Tensor of type int32, int64. An integer vector
+ * representing the shape of input.
  * @li x: A Tensor of type float16, currently does not support int8. The format
  * is NDHWC or NCDHW.
  * @li filter: A Tensor of type float16, currently does not support int8.
  * The format is NDHWC, NCDHW or DHWCN.
  * @li bias: Optional. An optional 1D tensor of the same type as "x". Reserved.
- * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution. Reserved. \n
+ * @li offset_w: Optional. An optional 1D tensor for quantized deconvolution.
+ *  Reserved. \n
 
 *@par Attributes:
- * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of the sliding
- * window for each dimension of "x".
+ * @li strides: Required. A tuple/list of 5 integers. Specifies the stride of
+ * the sliding window for each dimension of "x".
  * The N and C dimensions must be 1. Has the same format as "x".
  * @li pads: Required. A tuple/list of 6 integers.
  * @li dilations: Optional. A tuple/list of 5 integers,
  * The dilation factor for each dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
- * @li groups: Optional. Number of blocked connections from input channels to output
- * channels.
+ * @li groups: Optional. Number of blocked connections from input channels to
+ *  output channels.
  * @li data_format: Optional. An string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li output_padding: Optional. The size will be added in the output shape.
@@ -1417,7 +1418,8 @@ REG_OP(Conv3DTransposeD)
  * 4-D with shape [filter_height, filter_width, in_channels, out_channels]
  * or [out_channels, filter_height, filter_width, in_channels]
  * or [out_channels, in_channel, filter_height, filter_width].
- * @li bias: An optional 1D tensor of type float16 or int32. Format is "ND".
+ * @li bias: An optional 1D tensor of type float16, float32, int32.
+ *  Format is "ND".
  * @li offset_w: An optional 1D tensor for quantized inference. Reserved.
  *\n
  *\n
@@ -1446,8 +1448,8 @@ REG_OP(Conv3DTransposeD)
  * Defaults to "1".
  * @li dilations: A tuple/list of 4 integers, The dilation factor for each
  * dimension of input. Must be [1, 1, 1, 1].
- * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".
- * Specify the data format of the input and output data.
+ * @li data_format: An optional string from: "NHWC", "NCHW".
+ * Defaults to "NHWC". Specify the data format of the input and output data.
  * @li output_padding: The size will be added in the output shape. Defaults
  * to [0, 0, 0, 0].
  * @li offset_x: An optional int. Input offset, used for quantized inference.
@@ -1478,15 +1480,15 @@ REG_OP(Conv3DTransposeD)
     |                  | W        | [1, 255]     |\n
     | Offset_x         |          | [-128, 127]  |\n
  *\n
- * In Ascend910, fmap or out_backprop's H and W not support 1 when
+ * In Ascend910, fmap or out_backprop's H and W not support 1 when\n
  * fmap_h + pad_top + pad_bottom != (filter_height - 1) * dilation_h + 1
- * and filter_width > fmap_width
- * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w < 4096
- *\n
+ * and filter_width > fmap_width.
+ * If filter_h = 1 and filter_w = 1, out_backprop_w * stride_h * stride_w
+ *  < 4096. \n
  *
 *@par Outputs:
- * y: A Tensor. A Tensor of type float16 or int32, and has same format as
- * input_size.
+ * y: A Tensor. A Tensor of type float16, int32, float32, and has
+ *  same format as input_size.
  *\n
  *     out_backprop_height = (fmap_height + pad_top + pad_bottom -
  *                           (dilation_h * (filter_height - 1) + 1))
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 523fb199..65411e2a 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -124,7 +124,7 @@ REG_OP(SoftmaxGrad)
     .OP_END_FACTORY_REG(SoftmaxGrad)
 
 /**
-*@brief Computes the sigmoid cross entropy loss of "predict" and "target" . \n
+* @brief Computes the sigmoid cross entropy loss of "predict" and "target" .
 
 *@par Inputs:
 * Three inputs, including:
@@ -146,7 +146,7 @@ REG_OP(SigmoidCrossEntropyWithLogitsGrad)
     .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGrad)
 
 /**
-*@brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios . \n
+* @brief Performs the backpropagation of SigmoidCrossEntropyWithLogits for training scenarios .
 
 *@par Inputs:
 * Two inputs, including:
@@ -194,7 +194,7 @@ REG_OP(SigmoidCrossEntropyWithLogitsV2)
     .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsV2)
 
 /**
-*@brief Computes the regression box of the RPN. It is a FasterRCNN operator . \n
+* @brief Computes the regression box of the RPN. It is a FasterRCNN operator .
 
 *@par Inputs:
 * Two inputs, including:
@@ -221,7 +221,7 @@ REG_OP(SmoothL1Loss)
     .OP_END_FACTORY_REG(SmoothL1Loss)
 
 /**
-*@brief Performs the backpropagation of SmoothL1Loss for training scenarios . \n
+* @brief Performs the backpropagation of SmoothL1Loss for training scenarios .
 
 *@par Inputs:
 * Three inputs, including:
@@ -796,6 +796,52 @@ REG_OP(LayerNormBetaGammaBackpropV2)
     .REQUIRED_ATTR(shape_gamma, ListInt)
     .OP_END_FACTORY_REG(LayerNormBetaGammaBackpropV2)
 
+/**
+* @brief LNDropoutGrad operator interface implementation
+*   calculating: dy, x, variance, mean, gamma
+*   pd_xl = dy*gamma
+*   sub_x_mean = x - mean
+*   var_elta_2 = np.power((variance + EPSLON), (-0.5))
+*   pd_var = sum(pd_xl * sub_x_mean, reduce_axis, keepdims=True) * var_elta_2 * var_elta_2 * var_elta_2 * (-0.5)
+*   pd_mean = sum(pd_xl, reduce_axis, keepdims=True) * var_elta_2 * (-1.0)
+*   pd_x = pd_xl * var_elta_2 + pd_var * (2.0 / m) * sub_x_mean + pd_mean * (1.0 / m)
+*   pd_x_dropout = pd_x * mask * (1 / keep_prob)
+*   pd_gamma = sum(dy * sub_x_mean * var_elta_2, param_axis, keepdims=True)
+*   pd_beta = sum(dy, param_axis, keepdims=True)
+
+* @par Inputs:
+* Six inputs, including:
+*  @li dy: A Tensor. Must be one of the following types: float16, float32.
+*  @li x: A Tensor. Must be one of the following types: float16, float32.
+*  @li variance: A Tensor. Must be one of the following types: float16, float32.
+*  @li mean: A Tensor. Must be one of the following types: float16, float32.
+*  @li gamma: A Tensor. Must be one of the following types: float16, float32.
+*  @li mask: A Tensor. Must be one of the following types: uint8.\n
+
+* @par Outputs:
+* Four outputs, including:
+*  @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+*  @li pd_x_dropout: A Tensor. Must be one of the following types: float16, float32.
+*  @li pd_gamma: A Tensor. Must be one of the following types:  float16, float32.
+*  @li pd_beta: A Tensor. Must be one of the following types:  float16, float32.
+
+* @par Restrictions:
+* Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*/
+REG_OP(LNDropoutGrad)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x_dropout, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(keep_prob, Float)
+    .OP_END_FACTORY_REG(LNDropoutGrad)
+
 /**
 *@brief Return "output" according to the algorithm of dropout_do_mask:
 *  scale_x = x *(1 / keep_prob)
diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h
index 83aa30d2..8c6987ca 100644
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -112,7 +112,10 @@ REG_OP(FusedBatchNormV2)
  * @li input_data: A Tensor. Data to be sorted. Support float16 or float32.
  * @li input_index: A Tensor. Range(0, 2048). Support float16 or int32.
  * @par Attributes:
- * k_num: Int.Number to be sorted.
+ * @li k_num: Int.Number to be sorted.
+ * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+ * If "True", the "k" largest elements are returned in descending order.
+ * If "False", the "k" smallest elements are returned in ascending order.
  * @par Outputs:
  * One output, including:
  * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
@@ -124,6 +127,7 @@ REG_OP(SegmentSort)
     .INPUT(input_index, TensorType({DT_FLOAT16,DT_INT32}))
     .OUTPUT(output_proposal, TensorType({DT_FLOAT16,DT_FLOAT}))
     .REQUIRED_ATTR(k_num, Int)
+    .ATTR(largest, Bool, true)
     .OP_END_FACTORY_REG(SegmentSort)
 
 /**
@@ -132,8 +136,11 @@ REG_OP(SegmentSort)
  * One input, including:
  * input_proposal: A Tensor. Proposal sorted for each channel. Support float16 or float32
  * @par Attributes:
- * k_num: Int.Number to be sorted.
- * include_index: Bool.include_index is false,output proposal. include_index is true, output data and index.
+ * @li k_num: Int.Number to be sorted.
+ * @li include_index: Bool.include_index is false,output proposal. include_index is true, output data and index.
+ * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+ * If "True", the "k" largest elements are returned in descending order.
+ * If "False", the "k" smallest elements are returned in ascending order.
  * @par Outputs:
  * Two output, including:
  * output_proposal: A Tensor. Datatype and format is same as input_data. Proposal sorted for each channel.
@@ -147,6 +154,7 @@ REG_OP(MultiMerge)
     .OUTPUT(output_index, TensorType({DT_INT32}))
     .REQUIRED_ATTR(k_num, Int)
     .ATTR(include_index, Bool, false)
+    .ATTR(largest, Bool, true)
     .OP_END_FACTORY_REG(MultiMerge)
 
 /**
@@ -155,7 +163,10 @@ REG_OP(MultiMerge)
  * One input, including:
  * input_proposal: A Tensor. Proposal sorted for each channel. Support float16
  * @par Attributes:
- * k_num: Int.Number to be sorted.
+ * @li k_num: Int.Number to be sorted.
+ * @li largest: An optional bool, controls whether to return largest or smallest elements. Defaults to true.
+ * If "True", the "k" largest elements are returned in descending order.
+ * If "False", the "k" smallest elements are returned in ascending order.
  * @par Outputs:
  * Two output, including:
  * @li output_data: A Tensor. Datatype and format is same as input_data. Data sorted.
@@ -168,6 +179,7 @@ REG_OP(SingleMerge)
     .OUTPUT(output_data, TensorType({ DT_FLOAT16 }))
     .OUTPUT(output_index, TensorType({ DT_INT32 }))
     .REQUIRED_ATTR(k_num, Int)
+    .ATTR(largest, Bool, true)
     .OP_END_FACTORY_REG(SingleMerge)
 
 /**
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index 4c6f7293..f34de163 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -75,33 +75,36 @@ REG_OP(Pooling)
     .OP_END_FACTORY_REG(Pooling)
 
 /**
-*@brief Performs average pooling on the input . \n
-
+*@brief Performs average pooling on the input. \n
 *@par Inputs:
-*x: A tensor of type float16, float32, double . \n
+*x: A tensor of type float16, float32, double. \n
 
 *@par Attributes:
-*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W) of the sliding window,
- * where N = C = 1, and H and W are positive integers within the range [1, 255].
-*@li strides: A required list of 4 ints, specifying the stride of the sliding window.
- * The strides of the N and C dimensions are 1.
- * The strides of the H and W dimensions are positive integers within the range [1, 63].
+*@li ksize: A required list of 4 ints, specifying the size (N, C, H, and W)
+* of the sliding window, where N = C = 1, and H and W are positive integers
+*  within the range [1, 255].
+* @li strides: A required list of 4 ints, specifying the stride of the
+* sliding window. The strides of the N and C dimensions are 1. The strides of
+*  the H and W dimensions are positive integers within the range [1, 63].
 *@li padding: A required string, specifying the padding algorithm,
- * either "VALID" or "SAME". With "SAME" means that the outputs will have the same spatial dimensions as its inputs.
- * With "VALID" means no padding.
-*@li data_format: An optional string, specifying the data format of "ksize" and "strides",
- * either "NCHW", or "NHWC" (default) . \n
+ * either "VALID" or "SAME". With "SAME" means that the outputs will have the
+ * same spatial dimensions as its inputs. With "VALID" means no padding.
+*@li data_format: An optional string, specifying the data format of "ksize"
+* and "strides", either "NCHW", or "NHWC" (default). \n
 
 *@par Outputs:
-*y: The average pooled output tensor. Has the same type and format as input "x" . \n
+* y: The average pooled output tensor. Has the same type and format
+* as input "x". \n
 
-*@attention Constraints:
-*@li This operator applies only to a TensorFlow network.
-*@li Only single input and single output are supported.
+* @attention Constraints:
+* @li This operator applies only to a TensorFlow network.
+* @li Only single input and single output are supported.
 *@li Global pooling is supported.
-*@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
+* @li "ksize_H" and "ksize_W" are positive integers within the range [1, 255].
+* ksize_H * ksize_W < 256
 *@li Due to instruction restrictions,
- * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
+ * the values of "strides_h" and "strides_w" are positive integers within
+ * the range [1, 63].
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool.
 */
@@ -116,7 +119,6 @@ REG_OP(AvgPool)
 
 /**
 *@brief Performs average pooling on the input.
-
 *@par Inputs:
 *x: A tensor of type float16, float32, double.
 
@@ -130,19 +132,19 @@ REG_OP(AvgPool)
  * either "VALID", "SAME" and "CALCULATED".
  * With "SAME" means that the outputs will have the same spatial dimensions as its inputs.
  * With "VALID" means no padding.
-*@li pads: Pad value when padding_mode is "CALCULATED".
-*@li data_format: An optional string, specifying the data format of "ksize" and "strides",
+* @li pads: Pad value when padding_mode is "CALCULATED".
+* @li data_format: An optional string, specifying the data format of "ksize" and "strides",
  * either "NCHW", or "NHWC" (default).
-*@li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w]
-*@li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
-*@li exclusive: Ignore padding area or not when calculating average.
+* @li global_pooling: Global or not. If true, pads will change to {0,0,0,0} and ksize will change to [input_h, input_w]
+* @li ceil_mode: Use ceil or floor to calculate the output size when padding_mode is "CALCULATED".
+* @li exclusive: Ignore padding area or not when calculating average.
 
-*@par Outputs:
-*y: The average pooled output tensor. Has the same type and format as input "x".
+* @par Outputs:
+* y: The average pooled output tensor. Has the same type and format as input "x".
 
 *@attention Constraints:
 *@li Only single input and single output are supported.
-*@li Global pooling is supported.
+* @li Global pooling is supported.
 *@li "ksize_H" and "ksize_W" are positive integers within the range [1, 255]. ksize_H * ksize_W < 256
 *@li Due to instruction restrictions,
  * the values of "strides_h" and "strides_w" are positive integers within the range [1, 63].
@@ -163,25 +165,30 @@ REG_OP(AvgPoolV2)
     .OP_END_FACTORY_REG(AvgPoolV2)
 
 /**
-*@brief Performs average pooling on the input.
-
-*@par Inputs:
-*x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
+* @brief Performs average pooling on the input. \n
+* @par Inputs:
+* x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type
+* float16, float32, double. \n
 
-*@par Attributes:
-*@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
-*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
-*@li pads: List of ints, implicit zero paddings on both sides of the input.
-*@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
-*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
-*@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
-*@li data_format: A string, format of input data . \n
+* @par Attributes:
+* @li ksize: List of ints that has length 1, 3 or 5. The size of the window
+* for each dimension of the input tensor.
+*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding
+* window for each dimension of the input tensor.
+* @li pads: List of ints, implicit zero paddings on both sides of the input.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to
+* compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the
+* averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise
+* size of the pooling region will be used.
+* @li data_format: A string, format of input data. \n
 
 *@par Outputs:
-*y: The average pooled output tensor . \n
+*y: The average pooled output tensor. \n
 
 *@attention Constraints:
-*@li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63].
 
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator AvgPool3D.
@@ -200,19 +207,18 @@ REG_OP(AvgPool3D)
 
 
 /**
-*@brief Performs average pooling on the input.
-
-*@par Inputs:
-*@li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
-*@li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout.
+* @brief Performs average pooling on the input.
+* @par Inputs:
+* @li x: A 5-D Tensor of shape [batch, depth, height, width, channels] and type float16, float32, double.
+* @li filter: An optional tensor of type float16, float32, double, fractal_z_3d layout.
 *@li multiplier: An optional tensor of float16, float32, double.
 
-*@par Attributes:
+* @par Attributes:
 *@li ksize: List of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
-*@li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
+* @li strides:List of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input tensor.
 *@li pads: List of ints, implicit zero paddings on both sides of the input.
 *@li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
-*@li count_include_pad: When true, will include the zero-padding in the averaging calculation.
+* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
 *@li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
 *@li data_format: A string, format of input data . \n
 
@@ -240,26 +246,30 @@ REG_OP(AvgPool3DD)
     .OP_END_FACTORY_REG(AvgPool3DD)
 
 /**
-* @brief Computes AvgPool3DGrad function.
-
+* @brief Computes AvgPool3DGrad function. \n
 * @par Inputs:
 * @li orig_input_shape: An NDHWC tensor of type int32.
-* @li grads: An NDHWC tensor of type float16, float32, or double.
+* @li grads: An NDHWC tensor of type float16, float32, or double. \n
 
 * @par Attributes:
-* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor.
-* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor.
+* @li ksize: List of ints that has length 5. The size of the window for
+* each dimension of the input tensor.
+* @li strides:List of ints that has length 5. The stride of the sliding
+* window for each dimension of the input tensor.
 * @li pads: List of ints, implicit zero paddings on both sides of the input.
-* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
-* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
-* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
-* @li data_format: A string, format of input data.
+* @li ceil_mode: When true, will use ceil instead of floor in the formula to
+* compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding in the
+* averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor, otherwise
+* size of the pooling region will be used.
+* @li data_format: A string, format of input data. \n
 
 * @par Outputs:
-* @output: A mutable tensor with the same shape and type as "orig_input_shape".
+* @li output: A mutable tensor with the same shape and type as "grads".
 
 * @attention Constraints:
-* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]. \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
@@ -280,21 +290,26 @@ REG_OP(AvgPool3DGrad)
 
 /**
 * @brief Performs average pooling on the input.
-
 * @par Inputs:
 * @li grads: An NDHWC tensor of type float16.
 * @li filter: An optional tensor of type float16, fractal_z_3d layout.
 * @li multiplier: An optional tensor of float16.
 
 * @par Attributes:
-* @li orig_input_shape: List of ints that has length 5. The size of the window for each dimension of the input tensor.
-* @li ksize: List of ints that has length 5. The size of the window for each dimension of the input tensor.
-* @li strides:List of ints that has length 5. The stride of the sliding window for each dimension of the input tensor.
+* @li orig_input_shape: List of ints that has length 5.
+* The size of the window for each dimension of the input tensor.
+* @li ksize: List of ints that has length 5.
+* The size of the window for each dimension of the input tensor.
+* @li strides:List of ints that has length 5.
+* The stride of the sliding window for each dimension of the input tensor.
 * @li pads: List of ints, implicit zero paddings on both sides of the input.
-* @li ceil_mode: When true, will use ceil instead of floor in the formula to compute the output shape.
-* @li count_include_pad: When true, will include the zero-padding in the averaging calculation.
-* @li divisor_override: if specified, it will be used as divisor, otherwise size of the pooling region will be used.
-* @li data_format: A string, format of input data . \n
+* @li ceil_mode: When true, will use ceil instead of floor
+* in the formula to compute the output shape.
+* @li count_include_pad: When true, will include the zero-padding
+* in the averaging calculation.
+* @li divisor_override: if specified, it will be used as divisor,
+* otherwise size of the pooling region will be used.
+* @li data_format: A string, format of input data. \n
 
 * @par Outputs:
 * output: The average pooled output tensor . \n
@@ -364,9 +379,9 @@ REG_OP(MaxPoolExt2)
 /**
 *@brief Performs max pooling on the input . \n
 
-*@par Inputs:
+* @par Inputs:
 * One input:
-*x: A Tensor. Supported type:float16, float32, double, int8, int16,
+* x: A Tensor. Supported type:float16, float32, double, int8, int16,
 * int32, int64, uint8, uint16, qint8
 
 *@par Attributes:
@@ -406,10 +421,10 @@ REG_OP(MaxPool)
     .OP_END_FACTORY_REG(MaxPool)
 
 /**
-*@brief Performs max 3d pooling on the input . \n
+* @brief Performs max 3d pooling on the input . \n
 
 *@par Inputs:
-*x: A Tensor. Supported type float16, float32, double . \n
+* x: A Tensor. Supported type float16, float32, double . \n
 
 *@par Attributes:
 *@li ksize: A required list of int8, int16, int32, or int64 values,
@@ -680,7 +695,7 @@ REG_OP(MaxPoolV2)
     .OP_END_FACTORY_REG(MaxPoolV2)
 
 /**
-*@brief Performs max pooling on the input and outputs both max values and
+* @brief Performs max pooling on the input and outputs both max values and
  * indices . \n
 
 *@par Inputs:
@@ -702,7 +717,7 @@ REG_OP(MaxPoolV2)
 *@par Outputs:
 *@li y: A Tensor. Has the same type and format as input "x".
 *@li argmax: A Tensor. Has the same type and format as input "x".
-*@attention Constraints:
+* @attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
  * ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1,
@@ -723,39 +738,39 @@ REG_OP(MaxPoolWithArgmax)
     .OP_END_FACTORY_REG(MaxPoolWithArgmax)
 
 /**
-*@brief Performs the backpropagation of MaxPoolWithArgmax . \n
+* @brief Performs the backpropagation of MaxPoolWithArgmax . \n
 
-*@par Inputs:
+* @par Inputs:
 * Three inputs, including:
-*@li x: An 4d tensor. Supported type: float, double, int32,
+* @li x: An 4d tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64.
  * Must set the format, supported format list ["NCHW, NHWC"]
-*@li grad: An 4d tensor. Supported type: float, double, int32,
+* @li grad: An 4d tensor. Supported type: float, double, int32,
  * uint8, int16, int8, int64, uint16, half, uint32, uint64.
  * Must set the format, supported format list ["NCHW, NHWC"]
 *@li argmx: A tensor of type int32 or int64 . \n
 
-*@par Attributes:
-*@li ksize: A required list of int8, int16, int32, or int64 values,
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values,
  * specifying the size of the window for each dimension of the input tensor.
  * No default value.
-*@li strides: A required list of int8, int16, int32, or int64 values,
+* @li strides: A required list of int8, int16, int32, or int64 values,
  * specifying the stride of the sliding window for each dimension of
  * the input tensor. No default value.
-*@li padding: A required string. No default value . \n
+* @li padding: A required string. No default value . \n
 
-*@par Outputs:
+* @par Outputs:
 *y: A Tensor. Has the same type and format as input "x" . \n
 
-*@attention Constraints:
-*@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
+* @attention Constraints:
+* @li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1,
  * ksize[1] * ksize[2] <= 255.
-*@li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
-*@li "padding" is either "SAME" or "VALID".
+* @li "strides" is a list that has length 4: strides[0] = 1 or strides[3] = 1
+* @li "padding" is either "SAME" or "VALID". \n
 
 
-*@see max_pool_with_argmax
-*@par Third-party framework compatibility
+* @see max_pool_with_argmax
+* @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolGradWithArgmax.
 */
 REG_OP(MaxPoolGradWithArgmax)
@@ -769,23 +784,23 @@ REG_OP(MaxPoolGradWithArgmax)
     .OP_END_FACTORY_REG(MaxPoolGradWithArgmax)
 
 /**
-*@brief Performs transform mask to argmax . \n
+* @brief Performs transform mask to argmax . \n
 
-*@par Inputs:
+* @par Inputs:
 * Two inputs:
-*@li x: A Tensor of type float16.
-*@li mask: A Tensor of type uint16 . \n
+* @li x: A Tensor of type float16.
+* @li mask: A Tensor of type uint16 . \n
 
-*@par Attributes:
-*@li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
-*@li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
-*@li padding: A required string. No default value .
-*@li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n
+* @par Attributes:
+* @li ksize: A required list of int8, int16, int32, or int64 values, specifying the size of the window for each dimension of the input tensor. No default value.
+* @li strides: A required list of int8, int16, int32, or int64 values, specifying the stride of the sliding window for each dimension of the input tensor. No default value.
+* @li padding: A required string. No default value .
+* @li originshape:A required list of int8, int16, int32, or int64 values, No default value. \n
 
-*@par Outputs:
+* @par Outputs:
 *argmax: A Tensor of type int32 . \n
 
-*@attention Constraints:
+* @attention Constraints:
 *@li "ksize" is a list that has length 4: ksize[0] = 1 or ksize[3] = 1, ksize[1] * ksize[2] <= 255.
 *@li "stride is a list that has length 4: strides[0] = 1 or strides[3] = 1, strides[1] <= 63, strides[0] >= 1, strides[2] <= 63, strides[2] >= 1.
 *@li "padding" is either "SAME" or "VALID" . \n
@@ -843,11 +858,10 @@ REG_OP(MaxPoolGradGradWithArgmax)
     .OP_END_FACTORY_REG(MaxPoolGradGradWithArgmax)
 
 /**
-* @brief Computes avgpoograd function . \n
-
+* @brief Computes avgpoograd function. \n
 * @par Inputs:
 * @li orig_input_shape: An NHWC tensor of type int32.
-* @li input_grad: An NHWC tensor of type float16, float32, or double . \n
+* @li input_grad: An NHWC tensor of type float16, float32, or double. \n
 
 * @par Attributes:
 * @li ksize: A required tuple or list, specifying the size of the window for
@@ -856,10 +870,10 @@ REG_OP(MaxPoolGradGradWithArgmax)
 * window for each dimension of the input tensor.
 * @li padding: A required string, specifying the type of
 * the padding algorithm to use.
-* @li data_format: An optional string. Defaults to "NHWC" . \n
+* @li data_format: An optional string. Defaults to "NHWC". \n
 
 * @par Outputs:
-* @out_grad: A mutable tensor with the same shape and type as "orig_input" . \n
+* out_grad: A mutable tensor with the same shape and type as "input_grad". \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
@@ -876,7 +890,6 @@ REG_OP(AvgPoolGrad)
 
 /**
 * @brief Computes gradients of average pooling function . \n
-
 * @par Inputs:
 * @input_grad: An NHWC tensor of type float16.
 * @mean_matrix: Assist matrix, an NHWC tensor of type float16.
@@ -911,11 +924,10 @@ REG_OP(AvgPoolGradD)
     .OP_END_FACTORY_REG(AvgPoolGradD)
 
 /**
-* @brief Computes avgpoolv2grad function.
-
+* @brief Computes avgpoolv2grad function. \n
 * @par Inputs:
 * @li orig_input_shape: An NHWC tensor of type int32.
-* @li input_grad: An NHWC tensor of type float16, float32, or double.
+* @li input_grad: An NHWC tensor of type float16, float32, or double. \n
 
 * @par Attributes:
 * @li ksize: A required tuple or list, specifying the size of the window for
@@ -924,15 +936,15 @@ REG_OP(AvgPoolGradD)
 * window for each dimension of the input tensor.
 * @li padding_mode: A required string, specifying the type of
 * the padding algorithm to use.
-* @li global_pooling: Whether to use the global pooling. If global_pooling=true,
-* ksize and pads will be ignored. Default False.
-* @li ceil_mode: Whether to use the ceil function to calculate output height and
-* width. Default False.
+* @li global_pooling: Whether to use the global pooling. If global_pooling =
+* true, ksize and pads will be ignored. Default False.
+* @li ceil_mode: Whether to use the ceil function to calculate output height
+* and width. Default False.
 * @li exclusive: Whether to exclude padding points. default is true.
-* @li data_format: An optional string. Defaults to "NHWC".
+* @li data_format: An optional string. Defaults to "NHWC". \n
 
 * @par Outputs:
-* @out_grad: A mutable tensor with the same shape and type as "orig_input".
+* @li out_grad: A mutable tensor with the same shape and type as "orig_input". \n
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator AvgPoolGrad.
@@ -952,7 +964,6 @@ REG_OP(AvgPoolV2Grad)
     .OP_END_FACTORY_REG(AvgPoolV2Grad)
 /**
 * @brief Computes gradients of averagev2 pooling function.
-
 * @par Inputs:
 *input_grad: An NHWC tensor of type float16, float32, or double.
 
@@ -1257,7 +1268,6 @@ REG_OP(MaxPool3DGrad)
 
 /**
 *@brief Performs AvgPool1D on the input . \n
-
 *@par Inputs:
 *x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n
 
@@ -1286,7 +1296,6 @@ REG_OP(AvgPool1D)
 
 /**
 *@brief Performs AvgPool1D on the input . \n
-
 *@par Inputs:
 *x: A Tensor. Must be one of the following types: int8, uint8, int16, int32, int64, float16, float32, float64 . \n
 
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 21ee90ab..32da707e 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -2002,40 +2002,41 @@ REG_OP(ApplyAdadeltaD)
     .OP_END_FACTORY_REG(ApplyAdadeltaD)
 
 /**
-* @brief Updates "var" according to the ApplyMomentum algorithm.
-*   accum = accum * momentum + x1 * x2
-*   if use_nesterov is True:
-*       var -= x1 * x2 * lr + accum * momentum * lr
-*   else:
-*       var -= accum * lr
+*@brief Updates "var" according to the ApplyMomentum algorithm.
+* accum = accum * momentum + x1 * x2
+* if use_nesterov is True:
+* var -= x1 * x2 * lr + accum * momentum * lr
+* else: var -= accum * lr
 *
-* @par Inputs:
-*   Six inputs, including:
-*  @li var: A mutable Tensor has type TensorType::NumberType().
-*      Should be a Variable Tensor.
-*  @li accum: A mutable Tensor has the same type as "var".
-*      Should be a Variable Tensor.
-*  @li lr: A scalar has the same type as "var", for the scaling factor.
-*  @li x1: A Tensor has type TensorType::NumberType().
-*  @li momentum: A scalar has the same type as "var".
-*  @li x2: A scalar has the same type as "var".
+*@par Inputs:
+* Six inputs, including:
+*@li var: A mutable Tensor has type TensorType::NumberType().
+* Should be a Variable Tensor.
+*@li accum: A mutable Tensor has the same type as "var".
+* Should be a Variable Tensor.
+*@li lr: A scalar has the same type as "var", for the scaling factor.
+*@li x1: A Tensor has type TensorType::NumberType().
+*@li momentum: A scalar has the same type as "var".
+*@li x2: A scalar has the same type as "var". \n
 *
-* @par Attributes:
-*   Two attributes, including:
-*  @li use_nesterov: An optional bool. Defaults to "False".
-*       If True, the tensor passed to compute grad will be var - lr * momentum * accum,
-*       so in the end, the var you get is actually var - lr * momentum * accum.
-*  @li use_locking: An optional bool. Defaults to "False".
-*       If "True", updating of the "var", m", and "v" tensors will be protected
-*       by a lock; otherwise the behavior is undefined, but may exhibit less contention.
+*@par Attributes:
+* Two attributes, including:
+*@li use_nesterov: An optional bool. Defaults to "False".
+* If True, the tensor passed to compute grad will be
+* var - lr * momentum * accum, so in the end,
+* the var you get is actually var - lr * momentum * accum.
+*@li use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var", m", and "v" tensors will be protected
+* by a lock; otherwise the behavior is undefined, but may exhibit
+* less contention. \n
 *
-* @par Outputs:
-*   Two outputs, including:
-*  @li var: A mutable Tensor has the same type as "var".
-*  @li accum: A mutable Tensor has the same type as "var".
+*@par Outputs:
+* Two outputs, including:
+*@li var: A mutable Tensor has the same type as "var".
+*@li accum: A mutable Tensor has the same type as "var". \n
 
 *@par Restrictions:
-*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+* Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(FusedMulApplyMomentum)
     .INPUT(var, TensorType::NumberType())
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index a582d6e2..e4d7936c 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -26,16 +26,17 @@
 namespace ge {
 /**
 *@brief The GELU activation function is x*Φ(x),
-*       where Φ(x) the standard Gaussian cumulative distribution function. \n
+* where Φ(x) the standard Gaussian cumulative distribution function.
 
 *@par Inputs:
-*One input, including:
-*x: A Tensor. Must be one of the following types: float16, float32
+*One input, including: \n
+*x: A Tensor. Must be one of the following types: float16, float32. \n
 
 *@par Outputs:
-*y: A Tensor. Has the same type as "x".
-*@par Third-party framework compatibility
-*Compatible with the TensorFlow operator Gelu
+*y: A Tensor. Has the same type as "x". \n
+
+*@par Third-party framework compatibility:
+* Compatible with the TensorFlow operator Gelu.
 */
 REG_OP(Gelu)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -124,18 +125,18 @@ REG_OP(SwishGrad)
     .OP_END_FACTORY_REG(SwishGrad)
 
 /**
-*@brief Computes the gradient for the gelu of "x" . \n
+*@brief Computes the gradient for the gelu of "x" .
 
 *@par Inputs:
-*Three inputs, including:
-* @li dy: A Tensor. Must be one of the following types: float16, float32
-* @li x: A Tensor of the same type as "dy".
-* @li y: A Tensor of the same type as "dy" . \n
+* Three inputs, including:
+*@li dy: A Tensor. Must be one of the following types: float16, float32.
+*@li x: A Tensor of the same type as "dy".
+*@li y: A Tensor of the same type as "dy" . \n
 
 *@par Outputs:
 *z: A Tensor. Has the same type as "dy".
 *@par Third-party framework compatibility
-*Compatible with the TensorFlow operator GeluGrad
+* Compatible with the TensorFlow operator GeluGrad.
 */
 REG_OP(GeluGrad)
     .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -419,7 +420,7 @@ REG_OP(Softplus)
     .OP_END_FACTORY_REG(Softplus)
 
 /**
-*@brief Computes softplus gradients for a softplus operation . \n
+* @brief Computes softplus gradients for a softplus operation .
 
 *@par Inputs:
 *Two inputs:
@@ -440,7 +441,7 @@ REG_OP(SoftplusGrad)
     .OP_END_FACTORY_REG(SoftplusGrad)
 
 /**
-*@brief Computes softsign: x/(abs(x) + 1) . \n
+* @brief Computes softsign: x/(abs(x) + 1) .
 
 *@par Inputs:
 * One input:
@@ -798,7 +799,7 @@ REG_OP(LeakyReluGrad)
     .OP_END_FACTORY_REG(LeakyReluGrad)
 
 /**
-*@brief Thresholds grad each element of the input Tensor . \n
+*@brief Thresholds grad each element of the input Tensor .
 
 *@par Inputs:
 * @li gradients: A Tensor shape and dtype of input gradients. Support float16, int32.
@@ -821,7 +822,7 @@ REG_OP(ThresholdGradV2D)
     .OP_END_FACTORY_REG(ThresholdGradV2D)
 
 /**
-*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value . \n
+*@brief Thresholds each element of the input Tensor y = (x > threshold) ? x : value .
 
 *@par Inputs:
 *x: A Tensor dtype of real number . \n
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index 5a66b4ee..ae701295 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -93,18 +93,18 @@ REG_OP(Quantize)
 * @li sqrt_mode: A optional bool, specifying whether to perform square root on "scale", either "True" or "False".
 * Defaults to "False".
 * @li round_mode: An optional string, specifying the float16 to int8 cast type.
-* The value range is [Round, Floor, Ceil, Truncate]. Defaults to "Round" .
+* The value range is [Round, Floor, Ceil, Trunc]. Defaults to "Round" .
 * @li dst_type: A optional int32, specifying the output data type. Defaults to "DT_INT8" . \n
 
 * @par Outputs:
 * y: The quantized output tensor of type int8 or int4. \n
 
 * @attention Constraints:
-* round_mode value range is [Round, Floor, Ceil, Truncate].
+* round_mode value range is [Round, Floor, Ceil, Trunc].
 * @li Round: round to nearest, tie to even(c language rint).
 * @li Floor: round to minus infinity(c language floor).
 * @li Ceil: round to positive infinity(c language ceil).
-* @li Truncate: round to zero(c language trunc). \n
+* @li Trunc: round to zero(c language trunc). \n
 
 * @par Third-party framework compatibility
 * It is a custom operator. It has no corresponding operator in Caffe.
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 2c4b3059..079982db 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -25,7 +25,7 @@
 
 namespace ge {
 /**
-*@brief Performs reduced batch normalization . \n
+*@brief Performs reduced batch normalization .
 
 *@par Inputs:
 *x: A tensor of type float16 or float32. \n
@@ -67,7 +67,7 @@ REG_OP(BN3DTrainingReduce)
     .OP_END_FACTORY_REG(BN3DTrainingReduce)
 
 /**
-*@brief Performs the backpropagation of BatchNorm . \n
+*@brief Performs the backpropagation of BatchNorm .
 
 *@par Inputs:
 * Seven inputs, including:
@@ -153,7 +153,7 @@ REG_OP(BN3DTrainingReduceGrad)
     .OP_END_FACTORY_REG(BN3DTrainingReduceGrad)
 
 /**
-*@brief Performs reduced batch normalization . \n
+*@brief Performs reduced batch normalization .
 
 *@par Inputs:
 * Seven inputs, including:
@@ -183,10 +183,10 @@ REG_OP(BN3DTrainingReduceGrad)
 
 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
-averages for training.
-*This operator is used in conjunction with BNTrainingUpdate.
-*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square
-* root instruction.
+* averages for training. This operator is used in conjunction with
+* BNTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the
+* square root instruction.
 */
 REG_OP(BNTrainingUpdate)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -259,7 +259,7 @@ REG_OP(BN3DTrainingUpdate)
     .OP_END_FACTORY_REG(BN3DTrainingUpdate)
 
 /**
-*@brief Performs batch normalization for inference . \n
+*@brief Performs batch normalization for inference .
 
 *@par Inputs:
 * Five inputs, including:
@@ -277,8 +277,8 @@ REG_OP(BN3DTrainingUpdate)
 *y: A tensor of type float16 or float32 for the normalized "x" . \n
 
 *@attention Constraints:
-*For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root
-* instruction.
+*For Ascend 310, the result accuracy fails to reach 1/1000 due to the
+* square root instruction.
 */
 REG_OP(BNInfer)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -291,19 +291,21 @@ REG_OP(BNInfer)
     .OP_END_FACTORY_REG(BNInfer)
 
 /**
-*@brief Performs reduced batch normalization. For some scene which don't contain
-assignmoving average . \n
+*@brief Performs reduced batch normalization. For some scenes which don't
+* contain assign moving average .
 
 *@par Inputs:
 *Five inputs, including:
 *@li x: A tensor of type float16 or float32.
 *@li sum: A tensor of type float32 for the output of operator BNTrainingReduce.
-*@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce.
+*@li square_sum: A tensor of type float32 for the output of operator
+* BNTrainingReduce.
 *@li scale: A tensor of type float32, for the scaling factor.
 *@li offset: A tensor of type float32, for the scaling offset . \n
 
 *@par Attributes:
-*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+*epsilon: A required float32, specifying the small value added to
+* variance to avoid dividing by zero . \n
 
 *@par Outputs:
 *Three outputs, including:
@@ -313,7 +315,8 @@ assignmoving average . \n
 
 *@attention Constraints:
 *This operator is used in conjunction with BNTrainingReduce.
-For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction.
+*For Ascend 310, the result accuracy fails to reach 1/1000 due to
+* the square root instruction.
 */
 REG_OP(BNTrainingUpdateV2)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -328,30 +331,35 @@ REG_OP(BNTrainingUpdateV2)
     .OP_END_FACTORY_REG(BNTrainingUpdateV2)
 
 /**
-*@brief Performs reduced batch normalization v3. For some scene which don't contain
-assign moving average . \n
+*@brief Performs reduced batch normalization v3. For some scenes which
+* don't contain assign moving average .
 
 *@par Inputs:
 * Five inputs, including:
 *@li x: A tensor of type float16 or float32.
 *@li sum: A tensor of type float32 for the output of operator BNTrainingReduce.
-*@li square_sum: A tensor of type float32 for the output of operator BNTrainingReduce.
+*@li square_sum: A tensor of type float32 for the output of operator
+* BNTrainingReduce.
 *@li scale: A tensor of type float32, for the scaling factor.
 *@li offset: A tensor of type float32, for the scaling offset . \n
 
 *@par Attributes:
-*epsilon: A required float32, specifying the small value added to variance to avoid dividing by zero . \n
+*epsilon: A required float32, specifying the small value added to variance
+* to avoid dividing by zero . \n
 
 *@par Outputs:
 *@li y: A tensor of type float16 or float32, for normalized "x".
 *@li batch_mean: A tensor of type float32, for the mean of "x".
 *@li batch_variance: A tensor of type float32, for the variance of "x".
-*@li reserve_1: A tensor of type float32, for the mean of batch "x". Has the same type as batch_mean.
-*@li reserve_2: A tensor of type float32, for the variance of batch "x". Has the same type as batch_mean . \n
+*@li reserve_1: A tensor of type float32, for the mean of batch "x".
+* Has the same type as batch_mean.
+*@li reserve_2: A tensor of type float32, for the variance of batch "x".
+* Has the same type as batch_mean . \n
 
 *@attention Constraints:
 *@li This operator is used in conjunction with BNTrainingReduce.
-*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to the square root instruction.
+*@li For Ascend 310, the result accuracy fails to reach 1/1000 due to
+* the square root instruction.
 */
 REG_OP(BNTrainingUpdateV3)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -368,7 +376,7 @@ REG_OP(BNTrainingUpdateV3)
     .OP_END_FACTORY_REG(BNTrainingUpdateV3)
 
 /**
-*@brief Performs the backpropagation of BatchNorm . \n
+*@brief Performs the backpropagation of BatchNorm .
 
 *@par Inputs:
 * Four inputs, including:
@@ -436,16 +444,17 @@ REG_OP(BN3DTrainingUpdateGrad)
     .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)
 
 /**
-*@brief Performs the backpropagation of BatchNorm for inference . \n
+*@brief Performs the backpropagation of BatchNorm for inference .
 
 *@par Inputs:
 * Three inputs, including:
-*@li grads: A tensor of type loat16 or float32, for the gradient.
+*@li grads: A tensor of type float16 or float32, for the gradient.
 *@li scale: A tensor of type float32.
 *@li batch_variance: A tensor of type float32. It is an output of BatchNorm . \n
 
 *@par Attributes:
-*epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x" . \n
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
 
 *@par Outputs:
 *x_backprop: A Tensor of type float16 or float32, for the offset of "x" . \n
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 5222bf7d..810d024b 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -2029,26 +2029,29 @@ REG_OP(Cummax)
 
 /**
 *@brief Extends the input with copies of data along a specified dimension. For example:
-*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2);
-*(2) axis = 1;
-*(3) tiles = 2;
-*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]], with shape (2, 6, 2) . \n
+*(1) If x = [[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]], with shape (2, 3, 2); \n
+*(2) axis = 1; \n
+*(3) tiles = 2; \n
+*(4) Then, y = [[[1, 2], [3, 4], [5, 6], [1, 2], [3, 4], [5, 6]], [[7, 8],
+* [9, 10], [11, 12], [7, 8], [9, 10], [11, 12]]],
+* with shape (2, 6, 2) . \n
 
 *@par Inputs:
 * One input:
 *input_x: A Tensor with any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
 
-*@par Attributes:
-*@li axis: An optional int32, specifying the axis to tile. Defaults to 1.
-*@li tiles: A required int32, specifying the number of copies (tiles) to output . \n
+* @par Attributes:
+* @li axis: An optional int32, specifying the axis to tile. Defaults to 1.
+* @li tiles: A required int32, specifying the number of copies (tiles) to output . \n
 
 *@par Outputs:
-*output_y: A Tensor of any format. Must be one of the following types: float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
+* output_y: A Tensor of any format. Must be one of the following types:
+* float16, float32, int8, int16, int32, int64, uint8, uint16, uint32, uint64 . \n
 
-*@attention Constraints:
-*@li "axis" must be within the rank of the input tensor.
-*@li "tiles" must be greater than 1.
-*@par Third-party framework compatibility
+* @attention Constraints:
+* @li "axis" must be within the rank of the input tensor.
+* @li "tiles" must be greater than 1.
+* @par Third-party framework compatibility
 * Compatible with the Caffe operator Tile.
 */
 REG_OP(TileWithAxis)
@@ -2061,17 +2064,17 @@ REG_OP(TileWithAxis)
     .OP_END_FACTORY_REG(TileWithAxis)
 
 /**
-*@brief Read data with offset and stride . \n
+* @brief Read data with offset and stride .
 
-*@par Inputs:
-*One input:
-*x: A Tensor. Must be one of the following types: float16, int8 . \n
+* @par Inputs:
+* One input:
+* x: A Tensor. Must be one of the following types: float16, int8 . \n
 
-*@par Attributes:
-*@li stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n
+* @par Attributes:
+* stride_list: An optional 5D list of type int32. Defaults to "[1,1,1,1,1]" . \n
 
-*@par Outputs:
-*y: A Tensor of the same type as "x".
+* @par Outputs:
+* y: A Tensor of the same type as "x".
 
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
@@ -2083,10 +2086,10 @@ REG_OP(ReadSelect)
     .OP_END_FACTORY_REG(ReadSelect)
 
 /**
-*@brief: Write data with offset . \n
+* @brief: Write data with offset .
 
-*@par Inputs:
-*x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n
+* @par Inputs:
+* x: A Tensor. Must be one of the following types: int32, float32, float16, int8 . \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -2100,14 +2103,14 @@ REG_OP(WriteSelect)
     .OP_END_FACTORY_REG(WriteSelect)
 
 /**
-*@brief Read data by stride.
+* @brief Read data by stride.
 
-*@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, int8. \n
+* @par Inputs:
+* x: A Tensor. Must be one of the following types: float16, int8. \n
 
-*@par Attributes:
-*@li axis: A required int32, specifying the index of axis to read by stride. \n
-*@li stride: A required int32, specifying the value of reading stride. \n
+* @par Attributes:
+* @li axis: A required int32, specifying the index of axis to read by stride.
+* @li stride: A required int32, specifying the value of reading stride. \n
 
 *@par Outputs:
 *y: A Tensor of the same type as "x".
@@ -2120,14 +2123,14 @@ REG_OP(StridedRead)
     .OP_END_FACTORY_REG(StridedRead)
 
 /**
-*@brief Write data by stride.
+* @brief Write data by stride.
 
-*@par Inputs:
-*x: A Tensor. Must be one of the following types: float16, int8. \n
+* @par Inputs:
+* x: A Tensor. Must be one of the following types: float16, int8. \n
 
-*@par Attributes:
-*@li axis: A required int32, specifying the index of axis to write by stride. \n
-*@li stride: A required int32, specifying the value of writing stride. \n
+* @par Attributes:
+* @li axis: A required int32, specifying the index of axis to write by stride.
+* @li stride: A required int32, specifying the value of writing stride. \n
 
 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
@@ -2140,20 +2143,20 @@ REG_OP(StridedWrite)
     .OP_END_FACTORY_REG(StridedWrite)
 
 /**
-*@brief Computes the cumulative log sum exp of the tensor "x" along "axis" . \n
+* @brief Computes the cumulative log sum exp of the tensor "x" along "axis" .
 
-*@par Inputs:
+* @par Inputs:
 * Two inputs, including:
-*@li x: A Tensor. Must be one of the following types: float32, float16.
-*@li axis A Tensor of type int32 or int16. Defaults to "0".
+* @li x: A Tensor. Must be one of the following types: float32, float16.
+* @li axis A Tensor of type int32 or int16. Defaults to "0".
 *
 *@par Attributes:
 *@li exclusive: If "False", performs inclusive CumulativeLogsumexp, which means that the first element of the input is identical to the first element of the output. If "True", performs exclusive CumulativeLogsumexp.
 *@li reverse: A bool. Defaults to "False".
 *
-*@par Outputs:
-*@li y: A Tensor. Has the same type as "x".
-*@par Third-party framework compatibility
+* @par Outputs:
+* y: A Tensor. Has the same type as "x".
+* @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Cumsum.
 */
 REG_OP(CumulativeLogsumexp)
@@ -2169,7 +2172,7 @@ REG_OP(CumulativeLogsumexp)
 *
 *@par Inputs:
 * One input:
-*x: A Tensor. Must be one of the following types: float32, float16.
+* x: A Tensor. Must be one of the following types: float32, float16.
 *
 *@par Attributes:
 *@li axis A Tensor of type int32 or int16. Defaults to "0".
@@ -2224,15 +2227,16 @@ REG_OP(InplaceIndexAdd)
 
 /**
 * @brief Replace the value of X with value according to mask.
+
 * @par Inputs:
-* three inputs, including:
-*  @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8.
-*  @li mask: A Tensor of dtype bool.
-*  @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8.
+* Three inputs, including:
+* @li x: A Tensor of dtype is float16 or float32 or int64 or int32 or int8.
+* @li mask: A Tensor of dtype bool.
+* @li value: A Tensor of dtype float16 or float32 or int64 or int32 or int8. \n
 
 * @par Outputs:
-*  y: A tensor. Must be one of the following dtypes:
-*   float16, float32, int64, int32, int8.
+* y: A tensor. Must be one of the following dtypes:
+* float16, float32, int64, int32, int8.
 */
 REG_OP(MaskedFill)
     .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT32, DT_INT64}))
@@ -2362,25 +2366,25 @@ REG_OP(StridedSliceV2)
     .OP_END_FACTORY_REG(StridedSliceV2)
 
 /**
-*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n
+* @brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index.
 
-*@par Inputs:
-*Three inputs, including:
+* @par Inputs:
+* Three inputs, including:
 * @li x: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
-*@li assist1: A tensor. Must be one of the following types:
+* @li assist1: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
-*@li assist2: A tensor. Must be one of the following types:
+* @li assist2: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 
 * @par Attributes:
 * dim: A required int. Used to select the dimension of this tensor. \n
 
-*@par Outputs:
-*y: A Tensor with the same type and shape of input_x's. \n
+* @par Outputs:
+* y: A Tensor with the same type and shape of input_x's. \n
 
-*@par Third-party framework compatibility
-*Compatible with the Pytorch operator IndexFill. \n
+* @par Third-party framework compatibility
+* Compatible with the Pytorch operator IndexFill. \n
 */
 REG_OP(IndexFillD)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
@@ -2417,27 +2421,27 @@ REG_OP(AddRowRanges)
     .OP_END_FACTORY_REG(AddRowRanges)
 
 /**
-*@brief masked fill tensor along with one axis by range.
+* @brief masked fill tensor along with one axis by range.
 * boxes. It is a customized masked fill range operator . \n
 
-*@par Inputs:
+* @par Inputs:
 * Four inputs, including:
-*@li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes
+* @li x: input tensor. A ND Tensor of float32/float16/int32/int8 with shapes
 * 1-D (D,), 2-D(N, D), 3-D(N, C, D)
-*@li start: masked fill start pos. A 3D Tensor of int32 with
+* @li start: masked fill start pos. A 3D Tensor of int32 with
 * shape (num, N). "num" indicates the number of loop masked fill, and the value N
-* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1. \n
-*@li end: masked fill end pos. A 3D Tensor of int32 with
+* indicates the batch of ND Tensor, if input x shape is 1-D, N = 1.
+* @li end: masked fill end pos. A 3D Tensor of int32 with
 * shape (num, N). "num" indicates the number of loop masked fill, and the value N
-* indicates the batch of ND Tensor. \n
-*@li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with
-* shape (num,). "num" indicates the number of loop masked fill
+* indicates the batch of ND Tensor.
+* @li value: masked fill value. A 2D Tensor of float32/float16/int32/int8 with
+* shape (num,). "num" indicates the number of loop masked fill. \n
 
-*@par Attributes:
-*@li axis: axis with masked fill of int32. Defaults to -1.
+* @par Attributes:
+* @li axis: axis with masked fill of int32. Defaults to -1.
 
-*@par Outputs:
-*y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D)
+* @par Outputs:
+* y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D)
 
 *@attention Constraints:
 * Warning: input shape's length must not be bigger than 1024 * 1024 * 1024.
@@ -2456,10 +2460,12 @@ REG_OP(MaskedFillRange)
 *
 * @par Inputs:
 * Six inputs, including:
-* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation. Must be one of the following types: float32, float16. 
+* @li topk_pq_distance: A sorted Tensor, Will be updated after calculation.
+* Must be one of the following types: float32, float16. 
 * @li topk_pq_index: A Tensor of type int32, index corresponding to topk_pq_distance.
 * @li topk_pq_ivf: A Tensor of type int32 , the bucket number corresponding to topk_pq_distance.
-* @li pq_distance: A Tensor of type float32 or float16, the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance.
+* @li pq_distance: A Tensor of type float32 or float16,
+* the new data set will be reordered with topk_pq_distance and updated to topk_pq_distance.
 * @li pq_index: A Tensor of type int32, index corresponding to pq_distance. 
 * @li pq_ivf: A scalar of type int32 , the bucket number corresponding to pq_distance. \n
 *