update include file

4 years ago · cffc6b2e2e
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -111,6 +111,7 @@ static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
 static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
 static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
 static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
 static const int ACL_ERROR_INVALID_OPP_PATH = 148049;

 static const int ACL_ERROR_BAD_ALLOC = 200000;
 static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
--- a/inc/external/acl/error_codes/ge_error_codes.h
+++ b/inc/external/acl/error_codes/ge_error_codes.h
@@ -17,6 +17,20 @@
 #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_

 #if defined(_MSC_VER)
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY _declspec(dllexport)
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #else
 #ifdef FUNC_VISIBILITY
 #define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
 #else
 #define GE_FUNC_VISIBILITY
 #endif
 #endif

 #include <stddef.h>

 #ifdef __cplusplus
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -92,7 +92,8 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;           // label not
 static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;         // program register num use out
 static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;         // device setup error

 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;  // drv internal error
 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error

 #ifdef __cplusplus
 }
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -92,7 +92,8 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;           // label not
 static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;         // program register num use out
 static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;         // device setup error

 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;  // drv internal error
 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error

 #ifdef __cplusplus
 }
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -107,11 +107,13 @@ across multiple sessions .   \n

 REG_OP(Unbatch)
  .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .INPUT(index, TensorType({DT_INT64}))
  .INPUT(id, TensorType({DT_INT64}))
  .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .REQUIRED_ATTR(timeout_micros, Int)
  .ATTR(container, String, "")
  .ATTR(shared_name, String, "")
@@ -146,13 +148,16 @@ across multiple sessions .   \n

 REG_OP(UnbatchGrad)
  .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .INPUT(index, TensorType({DT_INT64}))
  .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .INPUT(id, TensorType({DT_INT64}))
  .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
      DT_COMPLEX64, DT_COMPLEX128}))
  .ATTR(container, String, "")
  .ATTR(shared_name, String, "")
  .OP_END_FACTORY_REG(UnbatchGrad)
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear)
    .ATTR(shared_name, String, "")
    .OP_END_FACTORY_REG(OrderedMapClear)

 /**
 *@brief FakeQueue, support tf api FixedLengthRecordReader. \n

 *@par Inputs:
 *Including:
 * @li resource: A Tensor of type DT_RESOURCE.

 *@par Outputs:
 *handle: A Tensor of type DT_STRING ref. \n

 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator FakeQueue.
 */
 REG_OP(FakeQueue)
    .INPUT(resource, TensorType({DT_RESOURCE}))
    .OUTPUT(handle, TensorType({DT_STRING}))
    .OP_END_FACTORY_REG(FakeQueue)

 /**
 *@brief Returns the number of incomplete elements in the underlying container. \n

@@ -2258,6 +2276,7 @@ REG_OP(LruCache)
  .ATTR(shared_name, String, "LruCache")
  .ATTR(cache_size, Int, 100000)
  .ATTR(load_factor, Float, 1)
  .REQUIRED_ATTR(dtype, Type)
  .OP_END_FACTORY_REG(LruCache)

 /**
@@ -2277,9 +2296,9 @@ REG_OP(CacheAdd)
  .INPUT(cache, TensorType({DT_RESOURCE}))
  .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_in_idx, TensorType({DT_INT64}))
  .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(swap_out_idx, TensorType({DT_INT64}))
  .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OP_END_FACTORY_REG(CacheAdd)

 /**
@@ -2295,9 +2314,31 @@ REG_OP(CacheAdd)
 REG_OP(CacheRemoteIndexToLocal)
  .INPUT(cache, TensorType({DT_RESOURCE}))
  .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OUTPUT(local_idx, TensorType({DT_INT64}))
  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .OP_END_FACTORY_REG(CacheRemoteIndexToLocal)

 /**
 *@brief CacheAllToLocalIndex, get id in cache
 *@par Inputs:
 *cache: resource data
 *local_idx: id in cache.
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(CacheAllIndexToLocal)
  .INPUT(cache, TensorType({DT_RESOURCE}))
  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
  .REQUIRED_ATTR(dtype, Type)
  .OP_END_FACTORY_REG(CacheAllIndexToLocal)

 REG_OP(DynamicGetNext)
  .INPUT(x, TensorType::ALL())
  .DYNAMIC_OUTPUT(y, TensorType::ALL())
  .ATTR(output_types, ListType, {})
  .ATTR(output_shapes, ListListInt, {{}, {}})
  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
  .ATTR(_getnext_inputs_shape_range, String, "")
  .OP_END_FACTORY_REG(DynamicGetNext)
 }   // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -3627,6 +3627,35 @@ REG_OP(Lerp)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(Lerp)

 /**
 *@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n

 *
 *@par Inputs:
 *@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16
 *@li x2: A tensor of the same type as "x1".
 *
 *@par Attributes:
 * atol: Defaults to "1e-05".
 * rtol: Defaults to "1e-03".
 *
 *@par Outputs:
 * num: A tensor of type int32.
 * diff: A tensor of type float16.
 *
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 *
 */
 REG_OP(DataCompare)
  .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
  .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
  .OUTPUT(num, TensorType({DT_FLOAT}))
  .OUTPUT(diff, TensorType({DT_FLOAT16}))
  .ATTR(atol, Float, 1e-5)
  .ATTR(rtol, Float, 1e-3)
  .OP_END_FACTORY_REG(DataCompare)

 /**
 *@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
 *otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
@@ -3650,6 +3679,57 @@ REG_OP(HardMax)
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(axis, Int, -1)
    .OP_END_FACTORY_REG(HardMax)

 /**
 * @brief Computes the dot product (inner product) of two tensors. This function does not broadcast.

 * @par Inputs:
 * Two inputs, including:
 * @li input_x: A Tensor. the first tensor must be 1d. \n
 * @li input_y: A Tensor. the second tensor must be 1d. \n

 * @par Outputs:
 * @li output: A Tensor. Result of the two inputs, must be 1d. \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch dot operator. \n
 */
 REG_OP(Dot)
    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
    .OP_END_FACTORY_REG(Dot)
 	
 /**
 *@brief Returns a new tensor with boolean elements representing \n
 *if each element of input is “close” to the corresponding element of other \n

 *@par Inputs:
 *Two inputs, including:
 * @li x1: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 * @li x2: A tensor with the same type and shape of x1's. \n

 *@par Attributes:
 *@li rtol: An optional float.Defaults to 1e-05. \n
 *@li atol: An optional float.Defaults to 1e-08. \n
 *@li equal_nan: An optional bool.Defaults to false. \n

 *@par Outputs:
 *y: A Tensor bool with the same shape of x1's. \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator isclose. \n
 */
 REG_OP(IsClose)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_BOOL}))
    .ATTR(rtol, Float, 1e-05)
    .ATTR(atol, Float, 1e-08)
    .ATTR(equal_nan, Bool, false)
    .OP_END_FACTORY_REG(IsClose)

 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/hcom_ops.h
+++ b/third_party/fwkacllib/inc/ops/hcom_ops.h
@@ -238,6 +238,15 @@ REG_OP(HcomRemoteRead)
    .REQUIRED_ATTR(dtype, Type)
    .OP_END_FACTORY_REG(HcomRemoteRead)

 /**
 * @brief Performs Remote Ref Read of input tensors
 * @par Inputs:
 * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
 * cache_var: The local base address
 * local_offset: Skip step length
 * @par Outputs:
 * cache_var: The local base address
 */
 REG_OP(HcomRemoteRefRead)
    .INPUT(remote, TensorType({DT_UINT64}))
    .INPUT(cache_var, TensorType({DT_UINT64}))
@@ -258,6 +267,13 @@ REG_OP(HcomRemoteWrite)
    .INPUT(local, TensorType::ALL())
    .OP_END_FACTORY_REG(HcomRemoteWrite)

 /**
 * @brief Performs Remote Write of input tensors
 * @par Inputs:
 * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
 * @par Inputs:
 * local: A Tensor. whose value is length / size_of(Type)
 */
 REG_OP(HcomRemoteScatterWrite)
    .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
    .INPUT(local, TensorType::ALL())
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -652,6 +652,62 @@ REG_OP(RGBToHSV)
 /**
 *@brief Generate a single randomly distorted bounding box for an image . \n

 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li image_size: 1-D, containing [height, width, channels].
 *@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
 boxes associated with the image. \n

 *@par Attributes:
 *@li seed: If either seed or seed2 are set to non-zero, the random number
 generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2: A second seed to avoid seed collision.
 *@li min_object_covered: The cropped area of the image must contain at least
 this fraction of any bounding box supplied. The value of this parameter should
 be non-negative. In the case of 0, the cropped area does not need to overlap
 any of the bounding boxes supplied .
 *@li aspect_ratio_range: The cropped area of the image must have an aspect
 ratio = width / height within this range.
 *@li max_attempts: Number of attempts at generating a cropped region of the
 image of the specified constraints. After max_attempts failures, return the
 entire image.
 *@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
 supplied. If true, assume an implicit bounding box covering the whole input.
 If false, raise an error . \n

 *@par Outputs:
 *@li begin: 1-D, containing [offset_height, offset_width, 0].
 *@li size: 1-D, containing [target_height, target_width, -1].
 *@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n

 *@attention Constraints:
 *Input images can be of different types but output images are always float . \n

 *@par Third-party framework compatibility
 *Compatible with tensorflow SampleDistortedBoundingBox operator.
 */

 REG_OP(SampleDistortedBoundingBox)
    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
        DT_INT32, DT_INT64 }))
    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
        DT_INT32, DT_INT64 }))
    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
        DT_INT32, DT_INT64 }))
    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
    .ATTR(seed, Int, 0)
    .ATTR(seed2, Int, 0)
    .ATTR(min_object_covered, Float, 0.1f)
    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
    .ATTR(max_attempts, Int, 100)
    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
    .OP_END_FACTORY_REG(SampleDistortedBoundingBox)

 /**
 *@brief Generate a single randomly distorted bounding box for an image . \n

 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li image_size: 1-D, containing [height, width, channels].
@@ -1424,11 +1480,11 @@ REG_OP(Resize)

 *@par Attributes:
 *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
 *@li ratio: An optional int. Defaults to 1. Downscaling ratio. 
 *@li ratio: An optional int. Defaults to 1. Downscaling ratio.
 *@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
 *@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
 *@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
 *@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n 
 *@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n

 *@par Outputs:
 *image: A Tensor dtype of uint8.
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -61,8 +61,8 @@ REG_OP(CholeskyGrad)

 *@par Inputs:
 *The input x has to be symmetric and positive definite.Inputs include:
 *x:A Tensor. Must be one of the following types: double, float32. Shape
 is [..., M, M] . \n
 *x:A Tensor. Must be one of the following types: double, float32, float16,
 complex64, complex128. Shape is [..., M, M] . \n

 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -76,8 +76,10 @@ form square matrices.
 */

 REG_OP(Cholesky)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(Cholesky)

 /**
@@ -87,8 +89,8 @@ of one or more square matrices . \n
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
 *x:A Tensor. Must be one of the following types: double, float32. Shape is
 [..., M, M] . \n
 *x:A Tensor. Must be one of the following types: double, float32,
 complex64, complex128. Shape is [..., M, M] . \n

 *@par Outputs:
 *@li y:A Tensor. Has the same type as x.
@@ -103,9 +105,9 @@ form square matrices. \n
 */

 REG_OP(LogMatrixDeterminant)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(LogMatrixDeterminant)

 /**
@@ -114,8 +116,8 @@ REG_OP(LogMatrixDeterminant)
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
 *x:A Tensor. Must be one of the following types: double, float32. Shape is
 [..., M, M] . \n
 *x:A Tensor. Must be one of the following types: double, float32, complex64,
 complex128. Shape is [..., M, M] . \n

 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -129,8 +131,8 @@ form square matrices.
 */

 REG_OP(MatrixDeterminant)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(MatrixDeterminant)

 /**
@@ -140,8 +142,7 @@ their adjoints (conjugate transposes) . \n
 *@par Inputs:
 *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
 *x:A Tensor. Must be one of the following types: double, float. Shape is
 [..., M, M] . \n
 *x:A Tensor of input. Shape is [..., M, M] . \n

 *@par Attributes:
 *adjoint:An optional bool. Defaults to False.Boolean indicating whether to
@@ -159,8 +160,10 @@ form square matrices.  \n
 */

 REG_OP(MatrixInverse)
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
        DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
        DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(adjoint, Bool, false)
    .OP_END_FACTORY_REG(MatrixInverse)

@@ -169,8 +172,7 @@ REG_OP(MatrixInverse)

 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
 *@li matrix:A Tensor. Must be one of the following types: double, float.
 Shape is [..., M, M].
 *@li matrix:A Tensor of input. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n

 *@par Attributes:
@@ -189,9 +191,9 @@ dimensions form square matrices.  \n
 */

 REG_OP(MatrixSolve)
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(adjoint, Bool, false)
    .OP_END_FACTORY_REG(MatrixSolve)

@@ -221,8 +223,10 @@ dimensions form square matrices.  \n
 */

 REG_OP(MatrixSolveLs)
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
        DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
        DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(l2, TensorType({DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .ATTR(fast, Bool, true)
@@ -234,8 +238,7 @@ matrices by backsubstitution . \n

 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
 *@li matrix: A Tensor. Must be one of the following types: double, float.
 Shape is [..., M, M].
 *@li matrix: A Tensor. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n

 *@par Attributes:
@@ -256,9 +259,12 @@ dimensions form square matrices.  \n
 */

 REG_OP(MatrixTriangularSolve)
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
        DT_COMPLEX64, DT_COMPLEX128}))
    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
        DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
        DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(lower, Bool, true)
    .ATTR(adjoint, Bool, false)
    .OP_END_FACTORY_REG(MatrixTriangularSolve)
@@ -268,8 +274,7 @@ REG_OP(MatrixTriangularSolve)

 *@par Inputs:
 *The input shape of x must be [..., M, N]. Inputs include:
 *x:A Tensor whose shape is [..., M, N]. Must be one of the following types:
 double, float . \n
 *x:A Tensor whose shape is [..., M, N]. \n

 *@par Attributes:
 *full_matrices: An optional bool. Defaults to False. If true, compute
@@ -289,9 +294,12 @@ dimensions form matrices of size [M, N].  \n
 */

 REG_OP(Qr)
    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
        DT_COMPLEX64, DT_COMPLEX128 }))
    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
        DT_COMPLEX64, DT_COMPLEX128 }))
    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
        DT_COMPLEX64, DT_COMPLEX128 }))
    .ATTR(full_matrices, Bool, false)
    .OP_END_FACTORY_REG(Qr)

@@ -384,8 +392,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
 */

 REG_OP(Lu)
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
    .REQUIRED_ATTR(output_idx_type, Type)
    .OP_END_FACTORY_REG(Lu)
@@ -404,8 +412,8 @@ y: Shape is `[..., M, M]` . \n
 */

 REG_OP(MatrixSquareRoot)
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(MatrixSquareRoot)

 /**
--- a/third_party/fwkacllib/inc/ops/list_ops.h
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -192,7 +192,7 @@ REG_OP(TensorListGetItem)
    .INPUT(element_shape, TensorType({DT_INT32}))
    .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
        DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
        DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
    .ATTR(element_dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(TensorListGetItem)
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -222,6 +222,24 @@ REG_OP(Bucketize)
    .REQUIRED_ATTR(boundaries, ListFloat)
    .OP_END_FACTORY_REG(Bucketize)

 /**
 *@brief Returns a new tensor with the truncated integer values of the elements of input. \n

 *@par Inputs:
 *One inputs, including:
 *   @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n

 *@par Outputs:
 *y: A tensor with the same type and shape of input_x \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator Trunc. \n
 */
 REG_OP(Trunc)
    .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
    .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
    .OP_END_FACTORY_REG(Trunc)
 	
 /**
 *@brief Computes the sum along sparse segments of a tensor . \n

@@ -645,6 +663,7 @@ REG_OP(NLLLoss)
    .OUTPUT(y, TensorType({DT_FLOAT}))
    .OUTPUT(total_weight, TensorType({DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .ATTR(ignore_index, Int, -100)
    .OP_END_FACTORY_REG(NLLLoss)

 /**
@@ -674,6 +693,7 @@ REG_OP(NLLLossGrad)
    .INPUT(total_weight, TensorType({DT_FLOAT}))
    .OUTPUT(x_grad, TensorType({DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .ATTR(ignore_index, Int, -100)
    .OP_END_FACTORY_REG(NLLLossGrad)

 /**
@@ -884,6 +904,54 @@ REG_OP(LpNorm)
    .ATTR(keepdim, Bool, false)
    .ATTR(epsilon, Float, 1e-12)
    .OP_END_FACTORY_REG(LpNorm)

 /**
 * @brief get complex.

 * @par Inputs:
 * @li real: An ND tensor of type  float32. double
 * @li imag: An ND tensor of type  float32. double \n
 *
 * @par Outputs:
 * @li out: An ND tensor of type complex64, complex128 \n
 */
 REG_OP(Complex)
    .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .ATTR(Tout, Type, DT_COMPLEX64)
    .OP_END_FACTORY_REG(Complex)

 /**
 * @brief  deal complex.

 * @par Inputs:
 * @li input: An ND tensor of type complex64, complex128 \n
 *
 * @par Outputs:
 * @li output: An ND tensor of type float32. double \n
 */
 REG_OP(Imag)
    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
    .ATTR(Tout, Type, DT_FLOAT)
    .OP_END_FACTORY_REG(Imag)

 /**
 * @brief  deal complex.

 * @par Inputs:
 * @li input: An ND tensor of type complex64, complex128 \n
 *
 * @par Outputs:
 * @li output: An ND tensor of type float32. double \n
 */
 REG_OP(Angle)
    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
    .ATTR(Tout, Type, DT_FLOAT)
    .OP_END_FACTORY_REG(Angle)

 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -1022,6 +1022,27 @@ REG_OP(IndexAdd)
    .ATTR(axis, Int, 0)
    .OP_END_FACTORY_REG(IndexAdd)

 /**
 *@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n

 *@par Inputs:
 * Two inputs, including:
 *@li x: A Tensor. Must be one of the following types:
 *    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
 *    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
 *@li diagonal:(int, optional) – the diagonal to consider。\n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n

 *@par Third-party framework compatibility
 * Compatible with the Pytorch operator Triu.
 */
 REG_OP(Triu)
    .INPUT(x, TensorType::BasicType())
    .ATTR(diagonal, Int, 0)
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(Triu)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -144,6 +144,64 @@ REG_OP(BatchNorm)
 /**
 *@brief Performs batch normalization . \n

 *@par Inputs:
 * Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
 *@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NDC1HWC0. Specifies the scaling factor.
 *@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NC1HWC0. Specifies the offset.
 *@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
 operation is used for training.
 *@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
 5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
 if the operation is used for training . \n

 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
 *@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
 *@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n

 *@par Outputs:
 * Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
 *@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
 *@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
 if input "x" is with format NDC1HWC0. Specifies the mean of "x".
 *@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
 *@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
 *@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
 Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n

 *@attention Constraints:
 *@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
 then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n

 *@par Third-party framework compatibility
 *@li Compatible with the TensorFlow operator fused_batch_norm.
 *@li Compatible with the TensorFlow operator fused_batch_norm_v2.
 */
 REG_OP(BatchNorm3D)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(offset, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .ATTR(data_format, String, "NCDHW")
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(BatchNorm3D)
 /**
 *@brief Performs batch normalization . \n

 *@par Inputs:
 * Five inputs, including: (NHWC or NCHW supported)
 *@li x: A 4D Tensor of type float16 or float32.
@@ -242,6 +300,52 @@ REG_OP(BatchNormGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n

 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
 *@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
 *@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
 *@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
 *@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n

 *@par Attributes:
 *@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
 *@li data_format: An optional string. Defaults to "NCDHW".
 *@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n

 *@par Outputs:
 *@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
 *@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
 *@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
 *@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
 *@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n

 *@attention Constraints:
 * The preceding layer of this operator must be operator BatchNorm . \n

 *@see BatchNorm
 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
 */
 REG_OP(BatchNorm3DGrad)
    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
    .OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .ATTR(data_format, String, "NCDHW")
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(BatchNorm3DGrad)

 /**
 *@brief Performs the backpropagation of BatchNorm . \n

 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -1059,7 +1059,7 @@ REG_OP(DeformableConv2D)

 *@par Attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li dilations: A list of 5 integers. Specifies the dilation factor for each
@@ -1119,7 +1119,7 @@ REG_OP(Conv3D)
 *@par Attributes:
 * Three attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li dilations: A tuple/list of 5 integers, The dilation factor for each
@@ -1167,7 +1167,7 @@ REG_OP(Conv3DBackpropInput)
 *@par Attributes:
 * Three attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li dilations: A tuple/list of 5 integers, The dilation factor for each
@@ -1267,7 +1267,7 @@ REG_OP(LSTM)
 * dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.

@@ -1319,7 +1319,7 @@ REG_OP(Conv3DBackpropFilter)
 * dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.

@@ -1369,7 +1369,7 @@ REG_OP(Conv3DBackpropFilterD)
 *@par Attributes:
 * Five attributes:
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li dilations: A tuple/list of 5 integers,
 * The dilation factor for each dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
@@ -1422,7 +1422,7 @@ REG_OP(Conv3DTranspose)
 * dimension of input.
 * The N, C and D dimensions must be 1. Has the same format as "x".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Reserved.
 * channels.
 * @li data_format: An optional string from: "NDHWC", "NCDHW".
 * Defaults to "NDHWC". Specify the data format of the input and output data.
 * @li output_padding: The size will be added in the output shape.
@@ -1624,7 +1624,7 @@ REG_OP(Conv2DTransposeD)
 * of the input.
 * @li ksize: A tuple/list of 2 integers.kernel size.
 *@par Attributes:
 * Three attributes:
 * Four attributes:
 * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
 * of input.  Defaults to [1, 1, 1, 1]
 * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -968,8 +968,9 @@ REG_OP(SPP)
 * Three inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
 * map.
 *@li rois: A tensor of type float16 or float32, with shape
 *@li rois: A tensor of type float16 or float32, with 3D shape
 * [batch, 5, roi_max_num], describing the RIOs.
 * roi_max_num must be less than or equal to 6000 and must be divided by 16.
 *@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
 * the number of ROIs per batch . \n

@@ -1604,6 +1605,50 @@ REG_OP(NonMaxSuppressionV7)
    .ATTR(max_boxes_size, Int, 0)
    .OP_END_FACTORY_REG(NonMaxSuppressionV7)

 /**
 *@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n

 *@par Inputs:
 * Three inputs, including:
 *@li features: A 5HD Tensor list of type float32 or float16.
 *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
 * the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".

 *@par Attributes:
 *@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
 *@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
 *@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
 * to the original image.
 *@li pooled_height: A optional attribute of type int32, specifying the H dimension.
 *@li pooled_width: A optional attribute of type int32, specifying the W dimension.
 *@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
 * of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
 * which is a floating point number. Defaults to "0".
 *@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
 *@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n

 *@par Outputs:
 * output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
 * The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
 * "pooled_width", and "features", respectively.

 *@par Third-party framework compatibility
 *Compatible with mmdetection SingleRoIExtractor operator.
 */
 REG_OP(RoiExtractor)
    .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(finest_scale, Int, 56)
    .ATTR(roi_scale_factor, Float, 0)
    .ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 })
    .ATTR(pooled_height, Int, 7)
    .ATTR(pooled_width, Int, 7)
    .ATTR(sample_num, Int, 0)
    .ATTR(pool_mode, String, "avg")
    .ATTR(aligned, Bool, true)
    .OP_END_FACTORY_REG(RoiExtractor)

 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1233,6 +1233,47 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
    .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
 /**
 * @brief Calculate the PoissonNllLoss function. 
 *        target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n

 * @par Inputs:
 * Two inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * 
 * @par Inputs:
 * @li target: A tensor. Must be one of the following types:
 *     float16, float32. \n

 * @par Attributes:
 * four Attributes, including:
 * @li log_input: An optional bool. Defaults to "True" \n
 * 
 *  @par Attributes:
 * @li full: An optional bool. Defaults to "False" \n
 * 
 *  @par Attributes:
 * @li eps: An optional float. Defaults to "1e-8" \n
 * 
 *  @par Attributes:
 * @li reduction: An optional string. Defaults to "mean" \n

 * @par Outputs:
 * loss: A Tensor has same element type as two inputs. \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator PoissonNllLoss. \n
 */
 REG_OP(PoissonNllLoss)
    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(log_input, Bool, true)
    .ATTR(full, Bool, false)
    .ATTR(eps, Float, 1e-8)
    .ATTR(reduction, String, "mean")
    .OP_END_FACTORY_REG(PoissonNllLoss)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -20,7 +20,34 @@
 */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_

 #include "graph/operator_reg.h"
 #include "nn_pooling_ops.h"

 namespace ge {
 /**
 * @brief Says whether the targets are in the top "k" predictions . \n

 * @par Inputs:
 * Three inputs, including:
 * @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
 * @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
 * @li k: A 1D Tensor of the same type as "targets".
 * Specifies the number of top elements to look at for computing precision . \n

 * @par Outputs:
 * precision: A Tensor of type bool . \n

 * @attention Constraints:
 * @li targets must be non-negative tensor.

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator InTopKV2.
 */
 REG_OP(InTopKV2)
    .INPUT(predictions, TensorType({DT_FLOAT}))
    .INPUT(targets, TensorType(IndexNumberType))
    .INPUT(k, TensorType({IndexNumberType}))
    .OUTPUT(precision, TensorType({DT_BOOL}))
    .OP_END_FACTORY_REG(InTopKV2)
 }// namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -223,7 +223,29 @@ REG_OP(Relu6Grad)
    .INPUT(features, TensorType::RealNumberType())
    .OUTPUT(backprops, TensorType::RealNumberType())
    .OP_END_FACTORY_REG(Relu6Grad)

 /**
 *@brief Calculate the elu_grad_v2 function. 
 *Applies the element-wise function:
 * Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
 *@par Inputs:
 *One inputs, including:
 * @li grads: A tensor. Must be one of the following types:
 *     float16, float32. 
 * @li activations: A tensor. Must be one of the following types:
 *     float16, float32. 
 *
 *@par Outputs:
 *y: A Tensor with the same type and shape of grads's.
 * 
 *@par Attributes:
 *@li alpha: scalar parameter, default value = 1.0
 */	
 REG_OP(EluGradV2)
    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(alpha, Float, 1.0)
    .OP_END_FACTORY_REG(EluGradV2)
 /**
 * @brief Compute sigmoid of "x" element-wise . \n

@@ -842,6 +864,26 @@ REG_OP(SoftShrinkGrad)
     .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(lambd, Float, 0.5)
     .OP_END_FACTORY_REG(SoftShrinkGrad)
 	 
 /**
 *@brief Calculate -ln(1+e^(-x)). \n

 *@par Inputs:
 *One inputs, including:
 * @li x: A tensor. Must be one of the following types:
 *       float16, float32. \n

 *@par Outputs:
 *One outputs, including:
 * @li y: A tensor with the same type and shape of x's. \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator LogSigmoid. \n
 */
 REG_OP(LogSigmoid)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))  /* "output:y" */
    .OP_END_FACTORY_REG(LogSigmoid)
 } // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -37,7 +37,7 @@ namespace ge {
 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
 * averages for training.
 * This operator is used in conjunction with BNTrainingUpdate.
 * This operator is used in conjunction with BNTrainingReduce.
 */
 REG_OP(BNTrainingReduce)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce)
    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BNTrainingReduce)

 /**
 *@brief Performs reduced batch normalization . \n

 *@par Inputs:
 *x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n

 *@par Outputs:
 *@li sum: A 3D Tensor of type float32 for SUM reduced "x".
 *@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n

 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
 * averages for training.
 * This operator is used in conjunction with BN3DTrainingReduce.
 */
 REG_OP(BN3DTrainingReduce)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(sum, TensorType({DT_FLOAT}))
    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BN3DTrainingReduce)

 /**
 *@brief Performs the backpropagation of BatchNorm . \n

@@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad)
    .ATTR(epsilon, Float, 0.0001)
    .OP_END_FACTORY_REG(BNTrainingReduceGrad)

 /**
 *@brief Performs the backpropagation of BatchNorm . \n

 *@par Inputs:
 * Seven inputs, including:
 *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for
 * the gradient.
 *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
 *@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the mean of "x".
 *@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the variance of "x".
 *@li scale: A 6D Tensor of type float32, with format NDC1HWC0.
 *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the variance of "x" . \n

 *@par Attributes:
 *epsilon: An optional float32. Defaults to "0.0001". A small float number
 * added to the variance of "x" . \n

 *@par Outputs:
 *y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset
 * of "x" . \n

 *@attention Constraints:
 * The preceding layer of this operator must be BN3DTrainingReduceGrad . \n

 *@see BN3DTrainingReduceGrad
 */
 REG_OP(BN3DTrainingReduceGrad)
    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(diff_scale, TensorType({DT_FLOAT}))
    .INPUT(diff_offset, TensorType({DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(batch_mean, TensorType({DT_FLOAT}))
    .INPUT(batch_variance, TensorType({DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .OP_END_FACTORY_REG(BN3DTrainingReduceGrad)

 /**
 *@brief Performs reduced batch normalization . \n

@@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad)
 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
 averages for training.
 *This operator is used in conjunction with BNTrainingReduce.
 *This operator is used in conjunction with BNTrainingUpdate.
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
 * root instruction.
 */
@@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate)
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BNTrainingUpdate)

 /**
 *@brief Performs reduced batch normalization . \n

 *@par Inputs:
 * Seven inputs, including: (NDC1HWC0 supported)
 *@li x: A 6D Tensor of type float16 or float32.
 *@li sum: A 6D Tensor of type float32 for the output of operator
 * BN3DTrainingUpdate.
 *@li square_sum: A 6D Tensor of type float32 for the output of operator
 * BN3DTrainingUpdate.
 *@li scale: A 6D Tensor of type float32, for the scaling factor.
 *@li offset: A 6D Tensor of type float32, for the scaling offset.
 *@li mean: A 6D Tensor of type float32, for the updated mean.
 *@li variance: A 6D Tensor of type float32, for the updated variance . \n

 *@par Attributes:
 *@li epsilon: A required float32, specifying the small value added to variance
 * to avoid dividing by zero.
 *@li factor: A required float32, specifying the weight for updating the mean
 * and variance . \n

 *@par Outputs:
 * Five outputs, including: (NDC1HWC0 supported)
 *@li y: A 6D Tensor of type float16 or float32, for normalized "x".
 *@li mean: A 6D Tensor of type float32, for the updated mean.
 *@li variance: A 6D Tensor of type float32, for the updated variance.
 *@li batch_mean: A 6D Tensor of type float32, for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n

 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
 averages for training.
 *This operator is used in conjunction with BN3DTrainingUpdate.
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
 * root instruction.
 */
 REG_OP(BN3DTrainingUpdate)
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(sum, TensorType({DT_FLOAT}))
    .INPUT(square_sum, TensorType({DT_FLOAT}))
    .INPUT(scale, TensorType({DT_FLOAT}))
    .INPUT(offset, TensorType({DT_FLOAT}))
    .INPUT(mean, TensorType({DT_FLOAT}))
    .INPUT(variance, TensorType({DT_FLOAT}))
    .REQUIRED_ATTR(factor, Float)
    .REQUIRED_ATTR(epsilon, Float)
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
    .OUTPUT(mean, TensorType({DT_FLOAT}))
    .OUTPUT(variance, TensorType({DT_FLOAT}))
    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BN3DTrainingUpdate)

 /**
 *@brief Performs batch normalization for inference . \n

@@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad)
    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BNTrainingUpdateGrad)

 /**
 *@brief Performs the backpropagation of BatchNorm . \n

 *@par Inputs:
 * Four inputs, including:
 *@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0,
 * for the gradient.
 *@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
 *@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the mean of "x".
 *@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
 * for the variance of "x" . \n

 *@par Attributes:
 *epsilon: An optional float32. Defaults to "0.0001". A small float number
 * added to the variance of "x" . \n

 *@par Outputs:
 *@li diff_scale: A Tensor of type float32, with format NDC1HWC0,
 * for the offset of "scale".
 *@li diff_offset: A Tensor of type float32, with format NDC1HWC0,
 * for the offset of "offset" . \n

 */
 REG_OP(BN3DTrainingUpdateGrad)
    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
    .INPUT(batch_mean, TensorType({DT_FLOAT}))
    .INPUT(batch_variance, TensorType({DT_FLOAT}))
    .ATTR(epsilon, Float, 0.0001)
    .OUTPUT(diff_scale, TensorType({DT_FLOAT}))
    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)

 /**
 *@brief Performs the backpropagation of BatchNorm for inference . \n

--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -857,8 +857,8 @@ REG_OP(SliceDV2)
 * @li sorted = true
 * @li It's unstable sorted indices on the platform of Ascend310

 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator TopK.
 * @par Restrictions:
 * Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead.
 */
 REG_OP(TopKD)
    .INPUT(x, TensorType::RealNumberType())
@@ -883,6 +883,44 @@ REG_OP(TopKD)
 * Number of top elements to look for along the last dimension (along each row
 * for matrices) . \n

 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
 * order.
 * @li dim: An optional int. Defaults to -1. For reserved use.
 * @li largest: An optional bool. Defaults to true. For reserved use. \n

 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as
 * "input".
 * @li indices: A Tensor of type int32, specifying the indices of sorted data . \n

 * @see TopK()
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator TopKV2.
 */
 REG_OP(TopKV2)
    .INPUT(x, TensorType::RealNumberType())
    .INPUT(k, TensorType({DT_INT32}))
    .OUTPUT(values, TensorType::RealNumberType())
    .OUTPUT(indices, TensorType({DT_INT32}))
    .ATTR(sorted, Bool, true)
    .ATTR(dim, Int, -1)
    .ATTR(largest, Bool, true)
    .OP_END_FACTORY_REG(TopKV2)

 /**
 * @brief Finds values and indices of the "k" largest elements for the last
 * dimension . \n

 * @par Inputs:
 * Two inputs, including:
 * @li x: A 1D or higher tensor of type BasicType, with the last dimension
 * at least "k".
 * @li k: A 0D Tensor of type int32.
 * Number of top elements to look for along the last dimension (along each row
 * for matrices) . \n

 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
@@ -2103,6 +2141,34 @@ REG_OP(StridedSliceV2)
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(StridedSliceV2)

 /**
 *@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n

 *@par Inputs:
 *Three inputs, including:
 * @li x: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 *@li assist1: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n
 *@li assist2: A tensor. Must be one of the following types:
 *     float16, float32, int32. \n

 * @par Attributes:
 * @li dim: A required int. Used to select the dimension of this tensor. \n

 *@par Outputs:
 *y: A Tensor with the same type and shape of input_x's. \n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator IndexFill. \n
 */
 REG_OP(IndexFillD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
    .REQUIRED_ATTR(dim, Int)
    .OP_END_FACTORY_REG(IndexFillD)
 } // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad)
 REG_OP(SparseTensorDenseMatMul)
    .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
    .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \
        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16}))
        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64}))
    .INPUT(x1_shape, TensorType({DT_INT64}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
        DT_COMPLEX128, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
        DT_COMPLEX128, DT_FLOAT16}))
    .ATTR(adjoint_a, Bool, false)
    .ATTR(adjoint_b, Bool, false)
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -26,6 +26,24 @@

 namespace ge {

 /**
 *@brief Computes the inverse 1-dimensional discrete Fourier transform over the
 inner-most dimension of `x`. \n

 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n

 *@par Outputs:
 *@li y: A complex tensor of the same rank as `x`. \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow IFFT operator.
 */
 REG_OP(IFFT)
    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OP_END_FACTORY_REG(IFFT)

 /**
 *@brief Real-valued fast Fourier transform . \n

@@ -47,6 +65,84 @@ REG_OP(RFFT)
    .OUTPUT(y, TensorType({DT_COMPLEX64}))
    .OP_END_FACTORY_REG(RFFT)

 /**
 *@brief Inverse real-valued fast Fourier transform . \n

 *@par Inputs:
 *@li x: A complex64 tensor.
 *@li fft_length: An int32 tensor of shape [1]. The FFT length . \n

 *@par Outputs:
 *@li y: A float32 tensor of the same rank as `input`. The inner-most
  dimension of `input` is replaced with the `fft_length` samples of its inverse
  1D Fourier transform . \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow IRFFT operator.
 */
 REG_OP(IRFFT)
    .INPUT(x, TensorType({DT_COMPLEX64}))
    .INPUT(fft_length, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(IRFFT)


 /**
 *@brief 2D fast Fourier transform. \n

 *@par Inputs:
 *@li x: A complex64 tensor..

 *@par Outputs:
 *@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
  dimensions of `input` are replaced with their 2D Fourier transform.\n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow FFT2D operator.
 */
 REG_OP(FFT2D)
    .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
    .OP_END_FACTORY_REG(FFT2D)

 /**
 *@brief Calculate the one-dimensional discrete Fourier transform on the
 innermost dimension of the input. \n

 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n

 *@par Outputs:
 *@li y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its 1-dimensional Fourier transform. \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow FFT operator.
 */
 REG_OP(FFT)
    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OP_END_FACTORY_REG(FFT)

 /**
 *@brief Calculate the inverse 1-dimensional discrete Fourier transform on the
 innermost dimension of the input. \n

 *@par Inputs:
 *@li x: A Tensor. Must be the following types: complex64, complex128. \n

 *@par Outputs:
 *@li y: A complex tensor with the same shape as input. The innermost dimension
 of the input is replaced by its inverse two-dimensional Fourier transform. \n

 *@par Third-party framework compatibility
 * Compatible with TensorFlow IFFT2D operator.
 */
 REG_OP(IFFT2D)
    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
    .OP_END_FACTORY_REG(IFFT2D)

 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -62,8 +62,8 @@ REG_OP(Split)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64

 *@par Attributes:
 *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
 *@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int32. Specifies the number of output tensors. No default value . \n

 *@par Outputs:
 *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n
@@ -94,12 +94,12 @@ REG_OP(SplitD)
 *@par Inputs:
 * Three inputs, including:
 *@li x: An ND Tensor.
 *Must be one of the following types:
 *@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n
 *Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
 *@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n

 *@par Attributes:
 *num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
 *num_split: A required int32. Specifies the number of output tensors. No default value . \n

 *@par Outputs:
 *y:  Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -129,9 +129,9 @@ REG_OP(SplitV)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64

 *@par Attributes:
 *@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
 *@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension.
 *@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
 *@li num_split: A required int32. Specifies the number of output tensors. No default value . \n

 *@par Outputs:
 *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -488,7 +488,7 @@ include:
 */
 REG_OP(AsString)
    .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \
        DT_DOUBLE, DT_BOOL}))
        DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128}))
    .OUTPUT(y, TensorType({DT_STRING}))
    .ATTR(precision, Int, -1)
    .ATTR(scientific, Bool, false)
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -46,6 +46,12 @@ typedef enum tagRtChipType {
    CHIP_END,
 } rtChipType_t;

 typedef enum tagRtAicpuScheType {
    SCHEDULE_SOFTWARE = 0, /* Software Schedule */
    SCHEDULE_SOFTWARE_OPT,
    SCHEDULE_HARDWARE, /* HWTS Schedule */
 } rtAicpuScheType;

 typedef enum tagRtVersion {
    VER_BEGIN = 0,
    VER_NA = VER_BEGIN,
@@ -184,6 +190,19 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
 */
 RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);


 /**
 * @ingroup
 * @brief get device feature ability by device id, such as task schedule ability.
 * @param [in] deviceId
 * @param [in] moduleType
 * @param [in] featureType
 * @param [out] value
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -62,6 +62,11 @@ typedef enum tagRtFeatureType {
    FEATURE_TYPE_RSV
 } rtFeatureType_t;

 typedef enum tagRtDeviceFeatureType {
  FEATURE_TYPE_SCHE,
  FEATURE_TYPE_END,
 } rtDeviceFeatureType_t;

 typedef enum tagMemcpyInfo {
    MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
    MEMCPY_INFO_RSV
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData);
 #define RT_FUSION_KERNEL_DUMPFLAG (0x04)
 #define RT_KERNEL_CUSTOM_AICPU (0x08)

 /**
 * @ingroup rt_kernel
 * @brief kernel mode
 **/
 #define RT_DEFAULT_KERNEL_MODE (0x00)
 #define RT_NORMAL_KERNEL_MODE (0x01)
 #define RT_ALL_KERNEL_MODE (0x02)

 /**
 * @ingroup rt_kernel
 * @brief kernel L1 Fusion Dump bit flags
@@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData);
 */
 RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);

 /**
 * @ingroup rt_kernel
 * @brief register device binary with all kernel
 * @param [in] bin   device binary description
 * @param [out] handle   device binary handle
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);

 /**
 * @ingroup rt_kernel
 * @brief register fast memeory device binary
@@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
 RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                 rtSmDesc_t *smDesc, rtStream_t stream);

 /**
 * @ingroup rt_kernel
 * @brief launch kernel with handle to device
 * @param [in] handle   program
 * @param [in] devFunc   device function description.
 * @param [in] blockDim   block dimentions
 * @param [in] args   argments address for kernel function
 * @param [in] argsSize   argements size
 * @param [in] smDesc   shared memory description
 * @param [in] stream   associated stream
 * @param [in] kernelInfo   kernel info
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                            rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);

 /**
 * @ingroup rt_kernel
 * @brief launch kernel to device
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -50,6 +50,7 @@ typedef enum tagModelTaskType {
    RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
    RT_MODEL_TASK_STREAM_LABEL_GOTO,
    RT_MODEL_TASK_MODEL_EXIT,
    RT_MODEL_TASK_ALL_KERNEL,
 } rtModelTaskType_t;

 typedef enum tagModelStreamType {
@@ -127,6 +128,18 @@ typedef struct tagKernelTaskInfo {
    uint16_t *argsOffset;
 } rtKernelTaskInfo_t;

 typedef struct tagAllKernelTaskInfo {
    uint16_t blockDim;
    uint16_t argsCount;
    uint16_t argsSize;
    uint16_t reserved;
    void *devfunc;
    void *handle;
    uint8_t *smDesc;
    uint8_t *args;
    uint16_t *argsOffset;
 } rtAllKernelTaskInfo_t;

 typedef struct tagKernelTaskInfoEx {
    uint32_t flags;
    uint32_t argsSize;
@@ -251,6 +264,7 @@ typedef struct tagTaskInfo {
    union {
        rtKernelTaskInfoEx_t kernelTaskEx;
        rtKernelTaskInfo_t kernelTask;
        rtAllKernelTaskInfo_t allKernelTask;
        rtEventTaskInfo_t eventTask;
        rtStreamSwitchTaskInfo_t streamSwitchTask;
        rtStreamActiveTaskInfo_t streamActiveTask;
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -120,15 +120,15 @@ typedef struct tagKV {
 } KeyValue;

 typedef enum {
  APPLICATION = 0,
  SYSTEM
    APPLICATION = 0,
    SYSTEM
 } ProcessType;

 typedef struct {
  ProcessType type;
  unsigned int pid;
  unsigned int deviceId;
  char reserved[RESERVERD_LENGTH];
    ProcessType type;
    unsigned int pid;
    unsigned int deviceId;
    char reserved[RESERVERD_LENGTH];
 } LogAttr;

 /**
@@ -381,13 +381,13 @@ DLL_EXPORT void DlogFlush(void);
 * @ingroup slog
 * @brief Internal log interface, other modules are not allowed to call this interface
 */
 void DlogErrorInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogWarnInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogInfoInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogDebugInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogEventInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
 void DlogInner(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
 void DlogErrorInner(int moduleId, const char *fmt, ...);
 void DlogWarnInner(int moduleId, const char *fmt, ...);
 void DlogInfoInner(int moduleId, const char *fmt, ...);
 void DlogDebugInner(int moduleId, const char *fmt, ...);
 void DlogEventInner(int moduleId, const char *fmt, ...);
 void DlogInner(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);

 #ifdef __cplusplus
 #ifndef LOG_CPP
@@ -500,8 +500,8 @@ DLL_EXPORT void DlogFlushForC(void);
 * @ingroup slog
 * @brief Internal log interface, other modules are not allowed to call this interface
 */
 void DlogInnerForC(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
 void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
 void DlogInnerForC(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);

 #ifdef __cplusplus
 }
--- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
+++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
@@ -1,72 +1,137 @@
 /**
 * @file tune_api.h
 *
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
 * 描述：mstune调优接口头文件
 */
 /** @defgroup mstune mstune调优接口 */
 #ifndef TUNE_API_H
 #define TUNE_API_H
 #include <vector>
 #include <map>
 #include <string>
 #include "graph/graph.h"
 #include "ge/ge_api.h"

 /**
 * @ingroup mstune
 *
 * mstune status
 */
 enum MsTuneStatus {
    MSTUNE_SUCCESS,  /** tune success */
    MSTUNE_FAILED,   /** tune failed */
 };

 // Option key: for train options sets
 const std::string MSTUNE_SELF_KEY = "mstune";
 const std::string MSTUNE_GEINIT_KEY = "initialize";
 const std::string MSTUNE_GESESS_KEY = "session";

 /**
 * @ingroup mstune
 * @par 描述: 命令行调优
 *
 * @attention 无
 * @param  option [IN] 调优参数
 * @param  msg [OUT] 调优异常下返回信息
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);

 /**
 * @ingroup mstune
 * @par 描述: 梯度调优
 *
 * @attention 无
 * @param  tuningGraph [IN] 调优图
 * @param  dependGraph [IN] 调优依赖图
 * @param  session [IN] ge连接会话
 * @param  option [IN] 参数集. 包含调优参数及ge参数
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);

 #endif
 /**
 * @file tune_api.h
 *
 * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
 * 描述：mstune调优接口头文件
 */
 /** @defgroup mstune mstune调优接口 */
 #ifndef TUNE_API_H
 #define TUNE_API_H
 #include <vector>
 #include <map>
 #include <string>
 #include "graph/graph.h"
 #include "ge/ge_api.h"

 /**
 * @ingroup mstune
 *
 * mstune status
 */
 enum MsTuneStatus {
    MSTUNE_SUCCESS,  /** tune success */
    MSTUNE_FAILED,   /** tune failed */
 };

 // Option key: for train options sets
 const std::string MSTUNE_SELF_KEY = "mstune";
 const std::string MSTUNE_GEINIT_KEY = "initialize";
 const std::string MSTUNE_GESESS_KEY = "session";

 #ifdef __cplusplus
 extern "C" {
 #endif

 struct RunnerInitConfig {
    // onilne online
    std::string profPath;
    std::string parserPath;
    // ncs only
    std::vector<uint32_t> devList;
 };

 struct RunnerOpInfo {
    std::string opName;
    uint64_t opCostTime;
    uint64_t aicoreCostTime;
    // gradient_split only
    std::string modelName;
    std::string opType;
    std::vector<uint64_t> start;
    std::vector<uint64_t> end;
 };

 struct RunnerModelInfo {
    uint64_t totalCostTime;
 };

 struct RunnerRunResult {
    std::vector<RunnerModelInfo> modelInfo;
    std::vector<RunnerOpInfo> opInfo;
 };

 struct RunnerResult {
    uint64_t totalCostTime;
    std::map<std::string, uint64_t> opCostTime;
    std::map<std::string, uint64_t> aicoreCostTime;
 };

 struct RunnerDataBuf {
    void *ptr = nullptr;
    size_t size = 0;
 };

 struct AOEBufferData {
    std::shared_ptr<uint8_t> data = nullptr;
    uint64_t length;
 };

 struct RunnerConfig {
    bool isProf;
    uint32_t loop;
    // offline only
    std::vector<RunnerDataBuf> input;
    std::vector<RunnerDataBuf> output;
    std::string modelPath;
    RunnerDataBuf modelData;
    // online only
    uint32_t devId;
    std::vector<std::vector<ge::Tensor>> inputs;
    std::vector<ge::Graph> dependGraph; // run graph (for training)
 };
 #ifdef __cplusplus
 }
 #endif

 /**
 * @ingroup mstune
 * @par 描述: 命令行调优
 *
 * @attention 无
 * @param  option [IN] 调优参数
 * @param  msg [OUT] 调优异常下返回信息
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);

 /**
 * @ingroup mstune
 * @par 描述: 梯度调优
 *
 * @attention 无
 * @param  tuningGraph [IN] 调优图
 * @param  dependGraph [IN] 调优依赖图
 * @param  session [IN] ge连接会话
 * @param  option [IN] 参数集. 包含调优参数及ge参数
 * @retval #MSTUNE_SUCCESS 执行成功
 * @retval #MSTUNE_FAILED 执行失败
 * @par 依赖:
 * @li tune_api.cpp：该接口所属的开发包。
 * @li tune_api.h：该接口声明所在的头文件。
 * @see 无
 * @since
 */
 extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);

 #endif