diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h
index b3111860..839bd597 100644
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -111,6 +111,7 @@ static const int ACL_ERROR_DUMP_NOT_RUN = 100045;
 static const int ACL_ERROR_PROF_REPEAT_SUBSCRIBE = 148046;
 static const int ACL_ERROR_PROF_API_CONFLICT = 148047;
 static const int ACL_ERROR_INVALID_MAX_OPQUEUE_NUM_CONFIG = 148048;
+static const int ACL_ERROR_INVALID_OPP_PATH = 148049;
 
 static const int ACL_ERROR_BAD_ALLOC = 200000;
 static const int ACL_ERROR_API_NOT_SUPPORT = 200001;
diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h
index 041fc7ae..b477a18c 100644
--- a/inc/external/acl/error_codes/ge_error_codes.h
+++ b/inc/external/acl/error_codes/ge_error_codes.h
@@ -17,6 +17,20 @@
 #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_
 
+#if defined(_MSC_VER)
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY _declspec(dllexport)
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#else
+#ifdef FUNC_VISIBILITY
+#define GE_FUNC_VISIBILITY __attribute__((visibility("default")))
+#else
+#define GE_FUNC_VISIBILITY
+#endif
+#endif
+
 #include <stddef.h>
 
 #ifdef __cplusplus
diff --git a/inc/external/acl/error_codes/rt_error_codes.h b/inc/external/acl/error_codes/rt_error_codes.h
index d2373525..2109fb79 100644
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -92,7 +92,8 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;           // label not
 static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;         // program register num use out
 static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;         // device setup error
 
-static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;  // drv internal error
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
 
 #ifdef __cplusplus
 }
diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h
index d2373525..2109fb79 100644
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -92,7 +92,8 @@ static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031;           // label not
 static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032;         // program register num use out
 static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;         // device setup error
 
-static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;  // drv internal error
+static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
+static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
 
 #ifdef __cplusplus
 }
diff --git a/third_party/fwkacllib/inc/ops/batch_ops.h b/third_party/fwkacllib/inc/ops/batch_ops.h
index a4786cd3..181bf694 100644
--- a/third_party/fwkacllib/inc/ops/batch_ops.h
+++ b/third_party/fwkacllib/inc/ops/batch_ops.h
@@ -107,11 +107,13 @@ across multiple sessions .   \n
 
 REG_OP(Unbatch)
   .INPUT(x_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(index, TensorType({DT_INT64}))
   .INPUT(id, TensorType({DT_INT64}))
   .OUTPUT(y_tensor, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .REQUIRED_ATTR(timeout_micros, Int)
   .ATTR(container, String, "")
   .ATTR(shared_name, String, "")
@@ -146,13 +148,16 @@ across multiple sessions .   \n
 
 REG_OP(UnbatchGrad)
   .INPUT(x_input, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(index, TensorType({DT_INT64}))
   .INPUT(grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .INPUT(id, TensorType({DT_INT64}))
   .OUTPUT(y_grad, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, \
-      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE}))
+      DT_INT32, DT_INT64, DT_BOOL, DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+      DT_COMPLEX64, DT_COMPLEX128}))
   .ATTR(container, String, "")
   .ATTR(shared_name, String, "")
   .OP_END_FACTORY_REG(UnbatchGrad)
diff --git a/third_party/fwkacllib/inc/ops/data_flow_ops.h b/third_party/fwkacllib/inc/ops/data_flow_ops.h
index 45303828..0043c027 100644
--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -1430,6 +1430,24 @@ REG_OP(OrderedMapClear)
     .ATTR(shared_name, String, "")
     .OP_END_FACTORY_REG(OrderedMapClear)
 
+/**
+*@brief FakeQueue, support tf api FixedLengthRecordReader. \n
+
+*@par Inputs:
+*Including:
+* @li resource: A Tensor of type DT_RESOURCE.
+
+*@par Outputs:
+*handle: A Tensor of type DT_STRING ref. \n
+
+*@par Third-party framework compatibility
+*Compatible with the TensorFlow operator FakeQueue.
+*/
+REG_OP(FakeQueue)
+    .INPUT(resource, TensorType({DT_RESOURCE}))
+    .OUTPUT(handle, TensorType({DT_STRING}))
+    .OP_END_FACTORY_REG(FakeQueue)
+
 /**
 *@brief Returns the number of incomplete elements in the underlying container. \n
 
@@ -2258,6 +2276,7 @@ REG_OP(LruCache)
   .ATTR(shared_name, String, "LruCache")
   .ATTR(cache_size, Int, 100000)
   .ATTR(load_factor, Float, 1)
+  .REQUIRED_ATTR(dtype, Type)
   .OP_END_FACTORY_REG(LruCache)
 
 /**
@@ -2277,9 +2296,9 @@ REG_OP(CacheAdd)
   .INPUT(cache, TensorType({DT_RESOURCE}))
   .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OUTPUT(swap_in_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(swap_in_idx, TensorType({DT_INT64}))
+  .OUTPUT(swap_in_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OUTPUT(swap_out_id, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(swap_out_idx, TensorType({DT_INT64}))
+  .OUTPUT(swap_out_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OP_END_FACTORY_REG(CacheAdd)
 
 /**
@@ -2295,9 +2314,31 @@ REG_OP(CacheAdd)
 REG_OP(CacheRemoteIndexToLocal)
   .INPUT(cache, TensorType({DT_RESOURCE}))
   .INPUT(ids, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
-  .OUTPUT(local_idx, TensorType({DT_INT64}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
   .OP_END_FACTORY_REG(CacheRemoteIndexToLocal)
 
+/**
+*@brief CacheAllToLocalIndex, get id in cache
+*@par Inputs:
+*cache: resource data
+*local_idx: id in cache.
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
+*/
+REG_OP(CacheAllIndexToLocal)
+  .INPUT(cache, TensorType({DT_RESOURCE}))
+  .OUTPUT(local_idx, TensorType({DT_INT64, DT_INT32, DT_UINT64, DT_UINT32}))
+  .REQUIRED_ATTR(dtype, Type)
+  .OP_END_FACTORY_REG(CacheAllIndexToLocal)
+
+REG_OP(DynamicGetNext)
+  .INPUT(x, TensorType::ALL())
+  .DYNAMIC_OUTPUT(y, TensorType::ALL())
+  .ATTR(output_types, ListType, {})
+  .ATTR(output_shapes, ListListInt, {{}, {}})
+  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
+  .ATTR(_getnext_inputs_shape_range, String, "")
+  .OP_END_FACTORY_REG(DynamicGetNext)
 }   // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index e65c7027..9f981d12 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -3627,6 +3627,35 @@ REG_OP(Lerp)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .OP_END_FACTORY_REG(Lerp)
 
+/**
+*@brief Returns the num value of abs(x1-x2) > atol+rtol*abs(x2) element-wise. \n
+
+*
+*@par Inputs:
+*@li x1: A tensor. Must be one of the following types: float32, int32, uint8, int8, float16
+*@li x2: A tensor of the same type as "x1".
+*
+*@par Attributes:
+* atol: Defaults to "1e-05".
+* rtol: Defaults to "1e-03".
+*
+*@par Outputs:
+* num: A tensor of type int32.
+* diff: A tensor of type float16.
+*
+*@par Restrictions:
+*Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
+*
+*/
+REG_OP(DataCompare)
+  .INPUT(x1, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .INPUT(x2, TensorType({ DT_FLOAT16, DT_FLOAT,DT_INT8, DT_UINT8, DT_INT32 }))
+  .OUTPUT(num, TensorType({DT_FLOAT}))
+  .OUTPUT(diff, TensorType({DT_FLOAT16}))
+  .ATTR(atol, Float, 1e-5)
+  .ATTR(rtol, Float, 1e-3)
+  .OP_END_FACTORY_REG(DataCompare)
+
 /**
 *@brief Hardmax(element in input, axis) = 1 if the element is the first maximum value along the specified axis, 0
 *otherwise The input does not need to explicitly be a 2D vector.The "axis" attribute indicates the dimension along
@@ -3650,6 +3679,57 @@ REG_OP(HardMax)
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(axis, Int, -1)
     .OP_END_FACTORY_REG(HardMax)
+
+/**
+* @brief Computes the dot product (inner product) of two tensors. This function does not broadcast.
+
+* @par Inputs:
+* Two inputs, including:
+* @li input_x: A Tensor. the first tensor must be 1d. \n
+* @li input_y: A Tensor. the second tensor must be 1d. \n
+
+* @par Outputs:
+* @li output: A Tensor. Result of the two inputs, must be 1d. \n
+
+* @par Third-party framework compatibility
+* Compatible with the Pytorch dot operator. \n
+*/
+REG_OP(Dot)
+    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_FLOAT16, DT_UINT8, DT_INT8, DT_INT32}))
+    .OP_END_FACTORY_REG(Dot)
+	
+/**
+*@brief Returns a new tensor with boolean elements representing \n
+*if each element of input is “close” to the corresponding element of other \n
+
+*@par Inputs:
+*Two inputs, including:
+* @li x1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+* @li x2: A tensor with the same type and shape of x1's. \n
+
+*@par Attributes:
+*@li rtol: An optional float.Defaults to 1e-05. \n
+*@li atol: An optional float.Defaults to 1e-08. \n
+*@li equal_nan: An optional bool.Defaults to false. \n
+
+*@par Outputs:
+*y: A Tensor bool with the same shape of x1's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator isclose. \n
+*/
+REG_OP(IsClose)
+    .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_BOOL}))
+    .ATTR(rtol, Float, 1e-05)
+    .ATTR(atol, Float, 1e-08)
+    .ATTR(equal_nan, Bool, false)
+    .OP_END_FACTORY_REG(IsClose)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h
index cb9fbe22..f4ded0cd 100644
--- a/third_party/fwkacllib/inc/ops/hcom_ops.h
+++ b/third_party/fwkacllib/inc/ops/hcom_ops.h
@@ -238,6 +238,15 @@ REG_OP(HcomRemoteRead)
     .REQUIRED_ATTR(dtype, Type)
     .OP_END_FACTORY_REG(HcomRemoteRead)
 
+/**
+ * @brief Performs Remote Ref Read of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to read: u64 remoteId, u64 addrRemote, u64 length
+ * cache_var: The local base address
+ * local_offset: Skip step length
+ * @par Outputs:
+ * cache_var: The local base address
+ */
 REG_OP(HcomRemoteRefRead)
     .INPUT(remote, TensorType({DT_UINT64}))
     .INPUT(cache_var, TensorType({DT_UINT64}))
@@ -258,6 +267,13 @@ REG_OP(HcomRemoteWrite)
     .INPUT(local, TensorType::ALL())
     .OP_END_FACTORY_REG(HcomRemoteWrite)
 
+/**
+ * @brief Performs Remote Write of input tensors
+ * @par Inputs:
+ * remote: A tensor. describing the remote memory address to write: u64 remoteId, u64 addrRemote, u64 length
+ * @par Inputs:
+ * local: A Tensor. whose value is length / size_of(Type)
+ */
 REG_OP(HcomRemoteScatterWrite)
     .INPUT(remote, TensorType({DT_INT64, DT_UINT64}))
     .INPUT(local, TensorType::ALL())
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index d7f60346..4703705b 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -652,6 +652,62 @@ REG_OP(RGBToHSV)
 /**
 *@brief Generate a single randomly distorted bounding box for an image . \n
 
+*@par Inputs:
+*Input images must be a 4-D tensor. Inputs include:
+*@li image_size: 1-D, containing [height, width, channels].
+*@li bounding_boxes: 3-D with shape [batch, N, 4] describing the N bounding
+boxes associated with the image. \n
+
+*@par Attributes:
+*@li seed: If either seed or seed2 are set to non-zero, the random number
+generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
+*@li seed2: A second seed to avoid seed collision.
+*@li min_object_covered: The cropped area of the image must contain at least
+this fraction of any bounding box supplied. The value of this parameter should
+be non-negative. In the case of 0, the cropped area does not need to overlap
+any of the bounding boxes supplied .
+*@li aspect_ratio_range: The cropped area of the image must have an aspect
+ratio = width / height within this range.
+*@li max_attempts: Number of attempts at generating a cropped region of the
+image of the specified constraints. After max_attempts failures, return the
+entire image.
+*@li use_image_if_no_bounding_boxes: Controls behavior if no bounding boxes
+supplied. If true, assume an implicit bounding box covering the whole input.
+If false, raise an error . \n
+
+*@par Outputs:
+*@li begin: 1-D, containing [offset_height, offset_width, 0].
+*@li size: 1-D, containing [target_height, target_width, -1].
+*@li bboxes: 3-D with shape [1, 1, 4] containing the distorted bounding box . \n
+
+*@attention Constraints:
+*Input images can be of different types but output images are always float . \n
+
+*@par Third-party framework compatibility
+*Compatible with tensorflow SampleDistortedBoundingBox operator.
+*/
+
+REG_OP(SampleDistortedBoundingBox)
+    .INPUT(image_size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .INPUT(bounding_boxes, TensorType({ DT_FLOAT }))
+    .OUTPUT(begin, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(size, TensorType({ DT_UINT8, DT_INT8, DT_INT16, \
+        DT_INT32, DT_INT64 }))
+    .OUTPUT(bboxes, TensorType({ DT_FLOAT }))
+    .ATTR(seed, Int, 0)
+    .ATTR(seed2, Int, 0)
+    .ATTR(min_object_covered, Float, 0.1f)
+    .ATTR(aspect_ratio_range, ListFloat, { 0.75f, 1.33f })
+    .ATTR(area_range, ListFloat, { 0.05f, 1.0f })
+    .ATTR(max_attempts, Int, 100)
+    .ATTR(use_image_if_no_bounding_boxes, Bool, false)
+    .OP_END_FACTORY_REG(SampleDistortedBoundingBox)
+
+/**
+*@brief Generate a single randomly distorted bounding box for an image . \n
+
 *@par Inputs:
 *Input images must be a 4-D tensor. Inputs include:
 *@li image_size: 1-D, containing [height, width, channels].
@@ -1424,11 +1480,11 @@ REG_OP(Resize)
 
 *@par Attributes:
 *@li channels: An optional int. Defaults to 0. Number of color channels for the decoded image.
-*@li ratio: An optional int. Defaults to 1. Downscaling ratio. 
+*@li ratio: An optional int. Defaults to 1. Downscaling ratio.
 *@li fancy_upscaling: An optional bool. Defaults to True. If true use a slower but nicer upscaling of the chroma planes
 *@li try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input.
 *@li acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted.
-*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n 
+*@li dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression. \n
 
 *@par Outputs:
 *image: A Tensor dtype of uint8.
diff --git a/third_party/fwkacllib/inc/ops/linalg_ops.h b/third_party/fwkacllib/inc/ops/linalg_ops.h
index d8f45c5d..330fef2e 100644
--- a/third_party/fwkacllib/inc/ops/linalg_ops.h
+++ b/third_party/fwkacllib/inc/ops/linalg_ops.h
@@ -61,8 +61,8 @@ REG_OP(CholeskyGrad)
 
 *@par Inputs:
 *The input x has to be symmetric and positive definite.Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape
-is [..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32, float16,
+complex64, complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -76,8 +76,10 @@ form square matrices.
 */
 
 REG_OP(Cholesky)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, \
+        DT_FLOAT16, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(Cholesky)
 
 /**
@@ -87,8 +89,8 @@ of one or more square matrices . \n
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape is
-[..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32,
+complex64, complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *@li y:A Tensor. Has the same type as x.
@@ -103,9 +105,9 @@ form square matrices. \n
 */
 
 REG_OP(LogMatrixDeterminant)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(sign, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(LogMatrixDeterminant)
 
 /**
@@ -114,8 +116,8 @@ REG_OP(LogMatrixDeterminant)
 *@par Inputs:
 *The input x is a tensor of shape [N, M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float32. Shape is
-[..., M, M] . \n
+*x:A Tensor. Must be one of the following types: double, float32, complex64,
+complex128. Shape is [..., M, M] . \n
 
 *@par Outputs:
 *y:A Tensor. Has the same type as x . \n
@@ -129,8 +131,8 @@ form square matrices.
 */
 
 REG_OP(MatrixDeterminant)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(MatrixDeterminant)
 
 /**
@@ -140,8 +142,7 @@ their adjoints (conjugate transposes) . \n
 *@par Inputs:
 *The input x is a tensor of shape [..., M, M] whose inner-most 2 dimensions
 form square matrices. Inputs include:
-*x:A Tensor. Must be one of the following types: double, float. Shape is
-[..., M, M] . \n
+*x:A Tensor of input. Shape is [..., M, M] . \n
 
 *@par Attributes:
 *adjoint:An optional bool. Defaults to False.Boolean indicating whether to
@@ -159,8 +160,10 @@ form square matrices.  \n
 */
 
 REG_OP(MatrixInverse)
-    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixInverse)
 
@@ -169,8 +172,7 @@ REG_OP(MatrixInverse)
 
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
-*@li matrix:A Tensor. Must be one of the following types: double, float.
-Shape is [..., M, M].
+*@li matrix:A Tensor of input. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 
 *@par Attributes:
@@ -189,9 +191,9 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixSolve)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixSolve)
 
@@ -221,8 +223,10 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixSolveLs)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
     .INPUT(l2, TensorType({DT_DOUBLE}))
     .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
     .ATTR(fast, Bool, true)
@@ -234,8 +238,7 @@ matrices by backsubstitution . \n
 
 *@par Inputs:
 *The input rhs must have the same type as matrix. Inputs include:
-*@li matrix: A Tensor. Must be one of the following types: double, float.
-Shape is [..., M, M].
+*@li matrix: A Tensor. Shape is [..., M, M].
 *@li rhs:A Tensor. Must have the same type as matrix. Shape is [..., M, K] . \n
 
 *@par Attributes:
@@ -256,9 +259,12 @@ dimensions form square matrices.  \n
 */
 
 REG_OP(MatrixTriangularSolve)
-    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(matrix, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .INPUT(rhs, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_FLOAT16, \
+        DT_COMPLEX64, DT_COMPLEX128}))
     .ATTR(lower, Bool, true)
     .ATTR(adjoint, Bool, false)
     .OP_END_FACTORY_REG(MatrixTriangularSolve)
@@ -268,8 +274,7 @@ REG_OP(MatrixTriangularSolve)
 
 *@par Inputs:
 *The input shape of x must be [..., M, N]. Inputs include:
-*x:A Tensor whose shape is [..., M, N]. Must be one of the following types:
-double, float . \n
+*x:A Tensor whose shape is [..., M, N]. \n
 
 *@par Attributes:
 *full_matrices: An optional bool. Defaults to False. If true, compute
@@ -289,9 +294,12 @@ dimensions form matrices of size [M, N].  \n
 */
 
 REG_OP(Qr)
-    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
-    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
-    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE }))
+    .INPUT(x, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(q, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
+    .OUTPUT(r, TensorType({ DT_FLOAT16, DT_FLOAT, DT_DOUBLE, \
+        DT_COMPLEX64, DT_COMPLEX128 }))
     .ATTR(full_matrices, Bool, false)
     .OP_END_FACTORY_REG(Qr)
 
@@ -384,8 +392,8 @@ of the rows encoded as a list of indices in `0..M-1`. Shape is `[..., M]` . \n
 */
 
 REG_OP(Lu)
-    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(lu, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OUTPUT(p, TensorType({DT_INT32, DT_INT64}))
     .REQUIRED_ATTR(output_idx_type, Type)
     .OP_END_FACTORY_REG(Lu)
@@ -404,8 +412,8 @@ y: Shape is `[..., M, M]` . \n
 */
 
 REG_OP(MatrixSquareRoot)
-    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(input, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128}))
     .OP_END_FACTORY_REG(MatrixSquareRoot)
 
 /**
diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h
index 292b1dbe..33270ea8 100644
--- a/third_party/fwkacllib/inc/ops/list_ops.h
+++ b/third_party/fwkacllib/inc/ops/list_ops.h
@@ -192,7 +192,7 @@ REG_OP(TensorListGetItem)
     .INPUT(element_shape, TensorType({DT_INT32}))
     .OUTPUT(item, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8,
         DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8,
-        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,DT_RESOURCE,
+        DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL,
         DT_STRING,DT_COMPLEX64,DT_COMPLEX128}))
     .ATTR(element_dtype, Type, DT_INT32)
     .OP_END_FACTORY_REG(TensorListGetItem)
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index 4cbcc027..50d058ba 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -222,6 +222,24 @@ REG_OP(Bucketize)
     .REQUIRED_ATTR(boundaries, ListFloat)
     .OP_END_FACTORY_REG(Bucketize)
 
+/**
+*@brief Returns a new tensor with the truncated integer values of the elements of input. \n
+
+*@par Inputs:
+*One inputs, including:
+*   @li input_x: A tensor. Must be one of the following types: float16, float32, int8, uint8, int32. \n
+
+*@par Outputs:
+*y: A tensor with the same type and shape of input_x \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator Trunc. \n
+*/
+REG_OP(Trunc)
+    .INPUT(input_x, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OUTPUT(output_y, TensorType({DT_FLOAT16,DT_FLOAT, DT_INT8, DT_INT32, DT_UINT8}))
+    .OP_END_FACTORY_REG(Trunc)
+	
 /**
 *@brief Computes the sum along sparse segments of a tensor . \n
 
@@ -645,6 +663,7 @@ REG_OP(NLLLoss)
     .OUTPUT(y, TensorType({DT_FLOAT}))
     .OUTPUT(total_weight, TensorType({DT_FLOAT}))
     .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
     .OP_END_FACTORY_REG(NLLLoss)
 
 /**
@@ -674,6 +693,7 @@ REG_OP(NLLLossGrad)
     .INPUT(total_weight, TensorType({DT_FLOAT}))
     .OUTPUT(x_grad, TensorType({DT_FLOAT}))
     .ATTR(reduction, String, "mean")
+    .ATTR(ignore_index, Int, -100)
     .OP_END_FACTORY_REG(NLLLossGrad)
 
 /**
@@ -884,6 +904,54 @@ REG_OP(LpNorm)
     .ATTR(keepdim, Bool, false)
     .ATTR(epsilon, Float, 1e-12)
     .OP_END_FACTORY_REG(LpNorm)
+
+/**
+* @brief get complex.
+
+* @par Inputs:
+* @li real: An ND tensor of type  float32. double
+* @li imag: An ND tensor of type  float32. double \n
+*
+* @par Outputs:
+* @li out: An ND tensor of type complex64, complex128 \n
+*/
+REG_OP(Complex)
+    .INPUT(real, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .INPUT(imag, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .OUTPUT(out, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .ATTR(Tout, Type, DT_COMPLEX64)
+    .OP_END_FACTORY_REG(Complex)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* @li input: An ND tensor of type complex64, complex128 \n
+*
+* @par Outputs:
+* @li output: An ND tensor of type float32. double \n
+*/
+REG_OP(Imag)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Imag)
+
+/**
+* @brief  deal complex.
+
+* @par Inputs:
+* @li input: An ND tensor of type complex64, complex128 \n
+*
+* @par Outputs:
+* @li output: An ND tensor of type float32. double \n
+*/
+REG_OP(Angle)
+    .INPUT(input, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(output, TensorType({DT_FLOAT, DT_DOUBLE}))
+    .ATTR(Tout, Type, DT_FLOAT)
+    .OP_END_FACTORY_REG(Angle)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index 33b596d8..6bff7f82 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -1022,6 +1022,27 @@ REG_OP(IndexAdd)
     .ATTR(axis, Int, 0)
     .OP_END_FACTORY_REG(IndexAdd)
 
+/**
+*@brief: Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input \n
+
+*@par Inputs:
+* Two inputs, including:
+*@li x: A Tensor. Must be one of the following types:
+*    float16, float32, double, int32, uint8, int16, int8, complex64, int64,
+*    qint8, quint8, qint32, uint16, complex128, uint32, uint64.
+*@li diagonal:(int, optional) – the diagonal to consider。\n
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x" . \n
+
+*@par Third-party framework compatibility
+* Compatible with the Pytorch operator Triu.
+*/
+REG_OP(Triu)
+    .INPUT(x, TensorType::BasicType())
+    .ATTR(diagonal, Int, 0)
+    .OUTPUT(y, TensorType::BasicType())
+    .OP_END_FACTORY_REG(Triu)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index a35cee03..ddd70bc8 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -144,6 +144,64 @@ REG_OP(BatchNorm)
 /**
 *@brief Performs batch normalization . \n
 
+*@par Inputs:
+* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
+*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NDC1HWC0. Specifies the scaling factor.
+*@li offset: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NC1HWC0. Specifies the offset.
+*@li mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
+operation is used for training.
+*@li variance: A Tensor of type float32. Must be 3D if input "x" is with format NHWC or NCHW. Must be
+5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
+if the operation is used for training . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
+*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
+*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
+
+*@par Outputs:
+* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
+*@li y: A 3D or 6D Tensor of type float16 or float32 for the normalized "x", with format NDHWC or NCDHW for 4D or NDC1HWC0 for 6D.
+*@li batch_mean: A Tensor of type float32. Must be 3D if input "x" is with format NDHWC or NCDHW. Must be 6D
+if input "x" is with format NDC1HWC0. Specifies the mean of "x".
+*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x".
+*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NDHWC or NCDHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
+*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
+Must be 6D if input "x" is with format NDC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
+then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n
+
+*@par Third-party framework compatibility
+*@li Compatible with the TensorFlow operator fused_batch_norm.
+*@li Compatible with the TensorFlow operator fused_batch_norm_v2.
+*/
+REG_OP(BatchNorm3D)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
+    .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3D)
+/**
+*@brief Performs batch normalization . \n
+
 *@par Inputs:
 * Five inputs, including: (NHWC or NCHW supported)
 *@li x: A 4D Tensor of type float16 or float32.
@@ -242,6 +300,52 @@ REG_OP(BatchNormGrad)
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 
+*@par Inputs:
+* Five inputs, including:
+*@li y_backprop: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0, for the gradient.
+*@li x: A 3D or 6D Tensor of type float16 or float32, with format NDHWC, NCDHW, or NDC1HWC0.
+*@li scale: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0.
+*@li reserve_space_1: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm.
+*@li reserve_space_2: A 3D or 6D Tensor of type float32, with format NDHWC, NCDHW, or NC1HWC0. It is an output of BatchNorm . \n
+
+*@par Attributes:
+*@li epsilon: An optional float32. Defaults to "0.0001". A small float number added to the variance of "x".
+*@li data_format: An optional string. Defaults to "NCDHW".
+*@li is_training: An optional bool. Defaults to "true". Specifies the operation is for training (default) or inference . \n
+
+*@par Outputs:
+*@li x_backprop: A Tensor of type float16 or float32, with format NHWC, NCHW, or NC1HWC0, for the offset of "x".
+*@li scale_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "scale".
+*@li *offset_backprop: A Tensor of type float32, with format NDHWC, NCDHW, or NDC1HWC0, for the offset of "offset".
+*@li *reserve_space_4: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output.
+*@li *reserve_space_5: A Tensor of type float32, with shape NDHWC, NCDHW, or NDC1HWC0. Pass "None" to skip this output . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be operator BatchNorm . \n
+
+*@see BatchNorm
+*@par Third-party framework compatibility
+* Compatible with the TensorFlow operators FusedBatchNormGradV2 and FusedBatchNorm3DGrad.
+*/
+REG_OP(BatchNorm3DGrad)
+    .INPUT(y_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_1, TensorType({DT_FLOAT}))
+    .INPUT(reserve_space_2, TensorType({DT_FLOAT}))
+    .OUTPUT(x_backprop, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(scale_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(offset_backprop, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_4, TensorType({DT_FLOAT}))
+    .OUTPUT(reserve_space_5, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .ATTR(data_format, String, "NCDHW")
+    .ATTR(is_training, Bool, true)
+    .OP_END_FACTORY_REG(BatchNorm3DGrad)
+
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
 *@par Inputs:
 * Five inputs, including:
 *@li y_backprop: A 4D Tensor of type float16 or float32, with format NHWC or NCHW, for the gradient.
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index c848668f..53922ee6 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -1059,7 +1059,7 @@ REG_OP(DeformableConv2D)
 
 *@par Attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li dilations: A list of 5 integers. Specifies the dilation factor for each
@@ -1119,7 +1119,7 @@ REG_OP(Conv3D)
 *@par Attributes:
  * Three attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li dilations: A tuple/list of 5 integers, The dilation factor for each
@@ -1167,7 +1167,7 @@ REG_OP(Conv3DBackpropInput)
 *@par Attributes:
  * Three attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li dilations: A tuple/list of 5 integers, The dilation factor for each
@@ -1267,7 +1267,7 @@ REG_OP(LSTM)
  * dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
 
@@ -1319,7 +1319,7 @@ REG_OP(Conv3DBackpropFilter)
  * dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
 
@@ -1369,7 +1369,7 @@ REG_OP(Conv3DBackpropFilterD)
 *@par Attributes:
  * Five attributes:
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li dilations: A tuple/list of 5 integers,
  * The dilation factor for each dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
@@ -1422,7 +1422,7 @@ REG_OP(Conv3DTranspose)
  * dimension of input.
  * The N, C and D dimensions must be 1. Has the same format as "x".
  * @li groups: Number of blocked connections from input channels to output
- * channels. Reserved.
+ * channels.
  * @li data_format: An optional string from: "NDHWC", "NCDHW".
  * Defaults to "NDHWC". Specify the data format of the input and output data.
  * @li output_padding: The size will be added in the output shape.
@@ -1624,7 +1624,7 @@ REG_OP(Conv2DTransposeD)
  * of the input.
  * @li ksize: A tuple/list of 2 integers.kernel size.
 *@par Attributes:
- * Three attributes:
+ * Four attributes:
  * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension
  * of input.  Defaults to [1, 1, 1, 1]
  * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x.
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index 39b4b227..af59b4e2 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -968,8 +968,9 @@ REG_OP(SPP)
 * Three inputs, including:
 *@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
 * map.
-*@li rois: A tensor of type float16 or float32, with shape
+*@li rois: A tensor of type float16 or float32, with 3D shape
 * [batch, 5, roi_max_num], describing the RIOs.
+* roi_max_num must be less than or equal to 6000 and must be divided by 16.
 *@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
 * the number of ROIs per batch . \n
 
@@ -1604,6 +1605,50 @@ REG_OP(NonMaxSuppressionV7)
     .ATTR(max_boxes_size, Int, 0)
     .OP_END_FACTORY_REG(NonMaxSuppressionV7)
 
+/**
+*@brief Obtains the ROI feature matrix from the feature map list. It is a customized fused operator for mmdetection. \n
+
+*@par Inputs:
+* Three inputs, including:
+*@li features: A 5HD Tensor list of type float32 or float16.
+*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
+* the value "5" indicates the indexes of images where the ROIs are located, "x0", "y0", "x1", and "y1".
+
+*@par Attributes:
+*@li finest_scale: A optional attribute of type int, specifying the scale of calculate levels of "rois".
+*@li roi_scale_factor: A optional attribute of type float32, specifying the rescaling of "rois" coordinates.
+*@li spatial_scale: A optional attribute of type list float32, specifying the scaling ratio of "features"
+* to the original image.
+*@li pooled_height: A optional attribute of type int32, specifying the H dimension.
+*@li pooled_width: A optional attribute of type int32, specifying the W dimension.
+*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency
+* of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois",
+* which is a floating point number. Defaults to "0".
+*@li pool_mode: An optional attribute of type string to indicate pooling mode. Defaults to "avg" . \n
+*@li aligned: An optional attribute of type bool, specifying the align to corner. Defaults to true . \n
+
+*@par Outputs:
+* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
+* The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height",
+* "pooled_width", and "features", respectively.
+
+*@par Third-party framework compatibility
+*Compatible with mmdetection SingleRoIExtractor operator.
+*/
+REG_OP(RoiExtractor)
+    .DYNAMIC_INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(finest_scale, Int, 56)
+    .ATTR(roi_scale_factor, Float, 0)
+    .ATTR(spatial_scale, ListFloat, { 1.f/4, 1.f/8, 1.f/16, 1.f/32 })
+    .ATTR(pooled_height, Int, 7)
+    .ATTR(pooled_width, Int, 7)
+    .ATTR(sample_num, Int, 0)
+    .ATTR(pool_mode, String, "avg")
+    .ATTR(aligned, Bool, true)
+    .OP_END_FACTORY_REG(RoiExtractor)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index af223552..00e2020f 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1233,6 +1233,47 @@ REG_OP(SigmoidCrossEntropyWithLogitsGradV2)
     .OUTPUT(gradient, TensorType({DT_FLOAT16, DT_FLOAT}))
     .ATTR(reduction, String, "mean")
     .OP_END_FACTORY_REG(SigmoidCrossEntropyWithLogitsGradV2)
+/**
+ * @brief Calculate the PoissonNllLoss function. 
+ *        target∼Poisson(input)loss(input,target)=input−target∗log(input)+log(target!) \n
+
+ * @par Inputs:
+ * Two inputs, including:
+ * @li input_x: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+ * 
+ * @par Inputs:
+ * @li target: A tensor. Must be one of the following types:
+ *     float16, float32. \n
+
+ * @par Attributes:
+ * four Attributes, including:
+ * @li log_input: An optional bool. Defaults to "True" \n
+ * 
+ *  @par Attributes:
+ * @li full: An optional bool. Defaults to "False" \n
+ * 
+ *  @par Attributes:
+ * @li eps: An optional float. Defaults to "1e-8" \n
+ * 
+ *  @par Attributes:
+ * @li reduction: An optional string. Defaults to "mean" \n
+
+ * @par Outputs:
+ * loss: A Tensor has same element type as two inputs. \n
+
+ * @par Third-party framework compatibility
+ * Compatible with the Pytorch operator PoissonNllLoss. \n
+ */
+REG_OP(PoissonNllLoss)
+    .INPUT(input_x, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .INPUT(target, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .OUTPUT(loss, TensorType({DT_FLOAT16, DT_FLOAT}))
+    .ATTR(log_input, Bool, true)
+    .ATTR(full, Bool, false)
+    .ATTR(eps, Float, 1e-8)
+    .ATTR(reduction, String, "mean")
+    .OP_END_FACTORY_REG(PoissonNllLoss)
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h
index 16552eee..820aa00d 100644
--- a/third_party/fwkacllib/inc/ops/nn_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_ops.h
@@ -20,7 +20,34 @@
  */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
-
+#include "graph/operator_reg.h"
 #include "nn_pooling_ops.h"
 
+namespace ge {
+/**
+* @brief Says whether the targets are in the top "k" predictions . \n
+
+* @par Inputs:
+* Three inputs, including:
+* @li predictions: A 2D Tensor of type float32. A "batch_size * classes" tensor.
+* @li targets: A 1D Tensor of type IndexNumberType. A batch_size tensor of class ids.
+* @li k: A 1D Tensor of the same type as "targets".
+* Specifies the number of top elements to look at for computing precision . \n
+
+* @par Outputs:
+* precision: A Tensor of type bool . \n
+
+* @attention Constraints:
+* @li targets must be non-negative tensor.
+
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator InTopKV2.
+*/
+REG_OP(InTopKV2)
+    .INPUT(predictions, TensorType({DT_FLOAT}))
+    .INPUT(targets, TensorType(IndexNumberType))
+    .INPUT(k, TensorType({IndexNumberType}))
+    .OUTPUT(precision, TensorType({DT_BOOL}))
+    .OP_END_FACTORY_REG(InTopKV2)
+}// namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index e0897280..a225bb5f 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -223,7 +223,29 @@ REG_OP(Relu6Grad)
     .INPUT(features, TensorType::RealNumberType())
     .OUTPUT(backprops, TensorType::RealNumberType())
     .OP_END_FACTORY_REG(Relu6Grad)
-
+/**
+*@brief Calculate the elu_grad_v2 function. 
+*Applies the element-wise function:
+* Computes the backward for the elu: if x>0, 1; otherwise elu() + alpha .
+*@par Inputs:
+*One inputs, including:
+* @li grads: A tensor. Must be one of the following types:
+*     float16, float32. 
+* @li activations: A tensor. Must be one of the following types:
+*     float16, float32. 
+*
+*@par Outputs:
+*y: A Tensor with the same type and shape of grads's.
+* 
+*@par Attributes:
+*@li alpha: scalar parameter, default value = 1.0
+*/	
+REG_OP(EluGradV2)
+    .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(activations, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(alpha, Float, 1.0)
+    .OP_END_FACTORY_REG(EluGradV2)
 /**
 * @brief Compute sigmoid of "x" element-wise . \n
 
@@ -842,6 +864,26 @@ REG_OP(SoftShrinkGrad)
      .OUTPUT(output_y, TensorType({DT_FLOAT16, DT_FLOAT}))
      .ATTR(lambd, Float, 0.5)
      .OP_END_FACTORY_REG(SoftShrinkGrad)
+	 
+/**
+*@brief Calculate -ln(1+e^(-x)). \n
+
+*@par Inputs:
+*One inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*       float16, float32. \n
+
+*@par Outputs:
+*One outputs, including:
+* @li y: A tensor with the same type and shape of x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator LogSigmoid. \n
+*/
+REG_OP(LogSigmoid)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))  /* "output:y" */
+    .OP_END_FACTORY_REG(LogSigmoid)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index 0b114134..5b97d226 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -37,7 +37,7 @@ namespace ge {
 *@attention Constraints:
 * This operator is a BatchNorm fusion operator for updating the moving
 * averages for training.
-* This operator is used in conjunction with BNTrainingUpdate.
+* This operator is used in conjunction with BNTrainingReduce.
 */
 REG_OP(BNTrainingReduce)
     .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -45,6 +45,27 @@ REG_OP(BNTrainingReduce)
     .OUTPUT(square_sum, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(BNTrainingReduce)
 
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+*x: A 6D Tensor of type float16 or float32, with format NDC1HWC0 . \n
+
+*@par Outputs:
+*@li sum: A 3D Tensor of type float32 for SUM reduced "x".
+*@li square_sum: A 3D Tensor of type float32 for SUMSQ reduced "x" . \n
+
+*@attention Constraints:
+* This operator is a BatchNorm fusion operator for updating the moving
+* averages for training.
+* This operator is used in conjunction with BN3DTrainingReduce.
+*/
+REG_OP(BN3DTrainingReduce)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(sum, TensorType({DT_FLOAT}))
+    .OUTPUT(square_sum, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingReduce)
+
 /**
 *@brief Performs the backpropagation of BatchNorm . \n
 
@@ -88,6 +109,49 @@ REG_OP(BNTrainingReduceGrad)
     .ATTR(epsilon, Float, 0.0001)
     .OP_END_FACTORY_REG(BNTrainingReduceGrad)
 
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Seven inputs, including:
+*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0, for
+* the gradient.
+*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
+*@li diff_scale: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li diff_offset: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x".
+*@li scale: A 6D Tensor of type float32, with format NDC1HWC0.
+*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*y: A Tensor of type float16 or float32, with format NDC1HWC0, for the offset
+* of "x" . \n
+
+*@attention Constraints:
+* The preceding layer of this operator must be BN3DTrainingReduceGrad . \n
+
+*@see BN3DTrainingReduceGrad
+*/
+REG_OP(BN3DTrainingReduceGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(diff_scale, TensorType({DT_FLOAT}))
+    .INPUT(diff_offset, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OP_END_FACTORY_REG(BN3DTrainingReduceGrad)
+
 /**
 *@brief Performs reduced batch normalization . \n
 
@@ -120,7 +184,7 @@ REG_OP(BNTrainingReduceGrad)
 *@attention Constraints:
 *@li This operator is a BatchNorm fusion operator for updating the moving
 averages for training.
-*This operator is used in conjunction with BNTrainingReduce.
+*This operator is used in conjunction with BNTrainingUpdate.
 *@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
 * root instruction.
 */
@@ -141,6 +205,59 @@ REG_OP(BNTrainingUpdate)
     .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(BNTrainingUpdate)
 
+/**
+*@brief Performs reduced batch normalization . \n
+
+*@par Inputs:
+* Seven inputs, including: (NDC1HWC0 supported)
+*@li x: A 6D Tensor of type float16 or float32.
+*@li sum: A 6D Tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li square_sum: A 6D Tensor of type float32 for the output of operator
+* BN3DTrainingUpdate.
+*@li scale: A 6D Tensor of type float32, for the scaling factor.
+*@li offset: A 6D Tensor of type float32, for the scaling offset.
+*@li mean: A 6D Tensor of type float32, for the updated mean.
+*@li variance: A 6D Tensor of type float32, for the updated variance . \n
+
+*@par Attributes:
+*@li epsilon: A required float32, specifying the small value added to variance
+* to avoid dividing by zero.
+*@li factor: A required float32, specifying the weight for updating the mean
+* and variance . \n
+
+*@par Outputs:
+* Five outputs, including: (NDC1HWC0 supported)
+*@li y: A 6D Tensor of type float16 or float32, for normalized "x".
+*@li mean: A 6D Tensor of type float32, for the updated mean.
+*@li variance: A 6D Tensor of type float32, for the updated variance.
+*@li batch_mean: A 6D Tensor of type float32, for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, for the variance of "x" . \n
+
+*@attention Constraints:
+*@li This operator is a BatchNorm fusion operator for updating the moving
+averages for training.
+*This operator is used in conjunction with BN3DTrainingUpdate.
+*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square
+* root instruction.
+*/
+REG_OP(BN3DTrainingUpdate)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(sum, TensorType({DT_FLOAT}))
+    .INPUT(square_sum, TensorType({DT_FLOAT}))
+    .INPUT(scale, TensorType({DT_FLOAT}))
+    .INPUT(offset, TensorType({DT_FLOAT}))
+    .INPUT(mean, TensorType({DT_FLOAT}))
+    .INPUT(variance, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(factor, Float)
+    .REQUIRED_ATTR(epsilon, Float)
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(mean, TensorType({DT_FLOAT}))
+    .OUTPUT(variance, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_mean, TensorType({DT_FLOAT}))
+    .OUTPUT(batch_variance, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdate)
+
 /**
 *@brief Performs batch normalization for inference . \n
 
@@ -284,6 +401,40 @@ REG_OP(BNTrainingUpdateGrad)
     .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(BNTrainingUpdateGrad)
 
+/**
+*@brief Performs the backpropagation of BatchNorm . \n
+
+*@par Inputs:
+* Four inputs, including:
+*@li grads: A 6D Tensor of type float16 or float32, with format NDC1HWC0,
+* for the gradient.
+*@li x: A 6D Tensor of type float16 or float32, with format NDC1HWC0.
+*@li batch_mean: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the mean of "x".
+*@li batch_variance: A 6D Tensor of type float32, with format NDC1HWC0,
+* for the variance of "x" . \n
+
+*@par Attributes:
+*epsilon: An optional float32. Defaults to "0.0001". A small float number
+* added to the variance of "x" . \n
+
+*@par Outputs:
+*@li diff_scale: A Tensor of type float32, with format NDC1HWC0,
+* for the offset of "scale".
+*@li diff_offset: A Tensor of type float32, with format NDC1HWC0,
+* for the offset of "offset" . \n
+
+*/
+REG_OP(BN3DTrainingUpdateGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(batch_mean, TensorType({DT_FLOAT}))
+    .INPUT(batch_variance, TensorType({DT_FLOAT}))
+    .ATTR(epsilon, Float, 0.0001)
+    .OUTPUT(diff_scale, TensorType({DT_FLOAT}))
+    .OUTPUT(diff_offset, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(BN3DTrainingUpdateGrad)
+
 /**
 *@brief Performs the backpropagation of BatchNorm for inference . \n
 
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index dee9e0f7..33980d43 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -857,8 +857,8 @@ REG_OP(SliceDV2)
 * @li sorted = true
 * @li It's unstable sorted indices on the platform of Ascend310
 
-* @par Third-party framework compatibility
-* @li Compatible with the TensorFlow operator TopK.
+* @par Restrictions:
+* Warning: THIS FUNCTION IS DEPRECATED. Please use TopKV2 instead.
 */
 REG_OP(TopKD)
     .INPUT(x, TensorType::RealNumberType())
@@ -883,6 +883,44 @@ REG_OP(TopKD)
 * Number of top elements to look for along the last dimension (along each row
 * for matrices) . \n
 
+* @par Attributes:
+* @li sorted: An optional bool. Defaults to true.
+* If true, the resulting "k" elements will be sorted by the values in descending
+* order.
+* @li dim: An optional int. Defaults to -1. For reserved use.
+* @li largest: An optional bool. Defaults to true. For reserved use. \n
+
+* @par Outputs:
+* @li values: A Tensor, specifying the sorted data. Has the same type as
+* "input".
+* @li indices: A Tensor of type int32, specifying the indices of sorted data . \n
+
+* @see TopK()
+* @par Third-party framework compatibility
+* @li Compatible with the TensorFlow operator TopKV2.
+*/
+REG_OP(TopKV2)
+    .INPUT(x, TensorType::RealNumberType())
+    .INPUT(k, TensorType({DT_INT32}))
+    .OUTPUT(values, TensorType::RealNumberType())
+    .OUTPUT(indices, TensorType({DT_INT32}))
+    .ATTR(sorted, Bool, true)
+    .ATTR(dim, Int, -1)
+    .ATTR(largest, Bool, true)
+    .OP_END_FACTORY_REG(TopKV2)
+
+/**
+* @brief Finds values and indices of the "k" largest elements for the last
+* dimension . \n
+
+* @par Inputs:
+* Two inputs, including:
+* @li x: A 1D or higher tensor of type BasicType, with the last dimension
+* at least "k".
+* @li k: A 0D Tensor of type int32.
+* Number of top elements to look for along the last dimension (along each row
+* for matrices) . \n
+
 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
@@ -2103,6 +2141,34 @@ REG_OP(StridedSliceV2)
     .OUTPUT(y, TensorType::BasicType())
     .OP_END_FACTORY_REG(StridedSliceV2)
 
+/**
+*@brief Fills the elements of the input tensor with value val by selecting the indices in the order given in index. \n
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+*@li assist1: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+*@li assist2: A tensor. Must be one of the following types:
+*     float16, float32, int32. \n
+
+* @par Attributes:
+* @li dim: A required int. Used to select the dimension of this tensor. \n
+
+*@par Outputs:
+*y: A Tensor with the same type and shape of input_x's. \n
+
+*@par Third-party framework compatibility
+*Compatible with the Pytorch operator IndexFill. \n
+*/
+REG_OP(IndexFillD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist1, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(assist2, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .REQUIRED_ATTR(dim, Int)
+    .OP_END_FACTORY_REG(IndexFillD)
 } // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/sparse_ops.h b/third_party/fwkacllib/inc/ops/sparse_ops.h
index 09d8ced9..a1fc9ee6 100644
--- a/third_party/fwkacllib/inc/ops/sparse_ops.h
+++ b/third_party/fwkacllib/inc/ops/sparse_ops.h
@@ -383,11 +383,11 @@ REG_OP(SparseFillEmptyRowsGrad)
 REG_OP(SparseTensorDenseMatMul)
     .INPUT(x1_indices, TensorType({DT_INT32, DT_INT64}))
     .INPUT(x1_values, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, \
-        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16}))
+        DT_COMPLEXT64, DT_COMPLEX128, DT_FLOAT16, DT_INT64}))
     .INPUT(x1_shape, TensorType({DT_INT64}))
-    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
+    .INPUT(x2, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
         DT_COMPLEX128, DT_FLOAT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT32, DT_COMPLEXT64, \
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_DOUBLE, DT_INT64, DT_INT32, DT_COMPLEXT64, \
         DT_COMPLEX128, DT_FLOAT16}))
     .ATTR(adjoint_a, Bool, false)
     .ATTR(adjoint_b, Bool, false)
diff --git a/third_party/fwkacllib/inc/ops/spectral_ops.h b/third_party/fwkacllib/inc/ops/spectral_ops.h
index be3d7d00..82accc73 100644
--- a/third_party/fwkacllib/inc/ops/spectral_ops.h
+++ b/third_party/fwkacllib/inc/ops/spectral_ops.h
@@ -26,6 +26,24 @@
 
 namespace ge {
 
+/**
+*@brief Computes the inverse 1-dimensional discrete Fourier transform over the
+inner-most dimension of `x`. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor of the same rank as `x`. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT operator.
+*/
+REG_OP(IFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT)
+
 /**
 *@brief Real-valued fast Fourier transform . \n
 
@@ -47,6 +65,84 @@ REG_OP(RFFT)
     .OUTPUT(y, TensorType({DT_COMPLEX64}))
     .OP_END_FACTORY_REG(RFFT)
 
+/**
+*@brief Inverse real-valued fast Fourier transform . \n
+
+*@par Inputs:
+*@li x: A complex64 tensor.
+*@li fft_length: An int32 tensor of shape [1]. The FFT length . \n
+
+*@par Outputs:
+*@li y: A float32 tensor of the same rank as `input`. The inner-most
+  dimension of `input` is replaced with the `fft_length` samples of its inverse
+  1D Fourier transform . \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IRFFT operator.
+*/
+REG_OP(IRFFT)
+    .INPUT(x, TensorType({DT_COMPLEX64}))
+    .INPUT(fft_length, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT}))
+    .OP_END_FACTORY_REG(IRFFT)
+
+
+/**
+*@brief 2D fast Fourier transform. \n
+
+*@par Inputs:
+*@li x: A complex64 tensor..
+
+*@par Outputs:
+*@li y: A complex64 tensor of the same shape as `input`. The inner-most 2
+  dimensions of `input` are replaced with their 2D Fourier transform.\n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT2D operator.
+*/
+REG_OP(FFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64, DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT2D)
+
+/**
+*@brief Calculate the one-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its 1-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow FFT operator.
+*/
+REG_OP(FFT)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(FFT)
+
+/**
+*@brief Calculate the inverse 1-dimensional discrete Fourier transform on the
+innermost dimension of the input. \n
+
+*@par Inputs:
+*@li x: A Tensor. Must be the following types: complex64, complex128. \n
+
+*@par Outputs:
+*@li y: A complex tensor with the same shape as input. The innermost dimension
+of the input is replaced by its inverse two-dimensional Fourier transform. \n
+
+*@par Third-party framework compatibility
+* Compatible with TensorFlow IFFT2D operator.
+*/
+REG_OP(IFFT2D)
+    .INPUT(x, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OUTPUT(y, TensorType({DT_COMPLEX64,DT_COMPLEX128}))
+    .OP_END_FACTORY_REG(IFFT2D)
+
 }  // namespace ge
 
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SPECTRAL_OPS_H_
\ No newline at end of file
diff --git a/third_party/fwkacllib/inc/ops/split_combination_ops.h b/third_party/fwkacllib/inc/ops/split_combination_ops.h
index f1a93fa6..af2c37bc 100644
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -62,8 +62,8 @@ REG_OP(Split)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
-*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
-*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y:Dynamic output. A list of output tensors. Has the same type and format as "x" . \n
@@ -94,12 +94,12 @@ REG_OP(SplitD)
 *@par Inputs:
 * Three inputs, including:
 *@li x: An ND Tensor.
-*Must be one of the following types:
-*@li size_splits: A list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
-*@li split_dim: An int8, int16, int32, or int64. Specifies the dimension along which to split . \n
+*Must be one of the types:float16, float32, double, int64, int32, uint8, uint16, uint32, uint64, int8, int16, complex64, complex128, qint8, quint8, qint16, quint16, qint32.
+*@li size_splits: Must be one of the types:int32, int64. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: Must be the following type:int32. Specifies the dimension along which to split . \n
 
 *@par Attributes:
-*num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y:  Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
@@ -129,9 +129,9 @@ REG_OP(SplitV)
 *Must be one of the following types: float16, float32, int32, int8, int16, int64, uint8, uint16, uint32, uint64
 
 *@par Attributes:
-*@li size_splits: A required list of int8, int16, int32, or int64. Specifies a list containing the sizes of each output tensor along the split dimension.
-*@li split_dim: A required int8, int16, int32, or int64. Specifies the dimension along which to split. No default value.
-*@li num_split: A required int8, int16, int32, or int64. Specifies the number of output tensors. No default value . \n
+*@li size_splits: A required list of int32. Specifies a list containing the sizes of each output tensor along the split dimension.
+*@li split_dim: A required int32. Specifies the dimension along which to split. No default value.
+*@li num_split: A required int32. Specifies the number of output tensors. No default value . \n
 
 *@par Outputs:
 *y: Dynamic output.A list of output tensors. Has the same type and format as "x" . \n
diff --git a/third_party/fwkacllib/inc/ops/string_ops.h b/third_party/fwkacllib/inc/ops/string_ops.h
index ec84cc83..29aec302 100644
--- a/third_party/fwkacllib/inc/ops/string_ops.h
+++ b/third_party/fwkacllib/inc/ops/string_ops.h
@@ -488,7 +488,7 @@ include:
 */
 REG_OP(AsString)
     .INPUT(x, TensorType({DT_INT8, DT_INT16, DT_INT32, DT_INT64, DT_FLOAT, \
-        DT_DOUBLE, DT_BOOL}))
+        DT_DOUBLE, DT_BOOL, DT_COMPLEX64, DT_COMPLEX128}))
     .OUTPUT(y, TensorType({DT_STRING}))
     .ATTR(precision, Int, -1)
     .ATTR(scientific, Bool, false)
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index ee104693..fed7341a 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -46,6 +46,12 @@ typedef enum tagRtChipType {
     CHIP_END,
 } rtChipType_t;
 
+typedef enum tagRtAicpuScheType {
+    SCHEDULE_SOFTWARE = 0, /* Software Schedule */
+    SCHEDULE_SOFTWARE_OPT,
+    SCHEDULE_HARDWARE, /* HWTS Schedule */
+} rtAicpuScheType;
+
 typedef enum tagRtVersion {
     VER_BEGIN = 0,
     VER_NA = VER_BEGIN,
@@ -184,6 +190,19 @@ RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
  */
 RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
 
+
+/**
+ * @ingroup
+ * @brief get device feature ability by device id, such as task schedule ability.
+ * @param [in] deviceId
+ * @param [in] moduleType
+ * @param [in] featureType
+ * @param [out] value
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);
+
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index 49f6a3f6..018f4e6c 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -62,6 +62,11 @@ typedef enum tagRtFeatureType {
     FEATURE_TYPE_RSV
 } rtFeatureType_t;
 
+typedef enum tagRtDeviceFeatureType {
+  FEATURE_TYPE_SCHE,
+  FEATURE_TYPE_END,
+} rtDeviceFeatureType_t;
+
 typedef enum tagMemcpyInfo {
     MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
     MEMCPY_INFO_RSV
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index dc16ca58..0ec1a163 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData);
 #define RT_FUSION_KERNEL_DUMPFLAG (0x04)
 #define RT_KERNEL_CUSTOM_AICPU (0x08)
 
+/**
+ * @ingroup rt_kernel
+ * @brief kernel mode
+**/
+#define RT_DEFAULT_KERNEL_MODE (0x00)
+#define RT_NORMAL_KERNEL_MODE (0x01)
+#define RT_ALL_KERNEL_MODE (0x02)
+
 /**
  * @ingroup rt_kernel
  * @brief kernel L1 Fusion Dump bit flags
@@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData);
  */
 RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);
 
+/**
+ * @ingroup rt_kernel
+ * @brief register device binary with all kernel
+ * @param [in] bin   device binary description
+ * @param [out] handle   device binary handle
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle);
+
 /**
  * @ingroup rt_kernel
  * @brief register fast memeory device binary
@@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u
 RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize,
                                  rtSmDesc_t *smDesc, rtStream_t stream);
 
+/**
+ * @ingroup rt_kernel
+ * @brief launch kernel with handle to device
+ * @param [in] handle   program
+ * @param [in] devFunc   device function description.
+ * @param [in] blockDim   block dimentions
+ * @param [in] args   argments address for kernel function
+ * @param [in] argsSize   argements size
+ * @param [in] smDesc   shared memory description
+ * @param [in] stream   associated stream
+ * @param [in] kernelInfo   kernel info
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
+                                            rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);
+
 /**
  * @ingroup rt_kernel
  * @brief launch kernel to device
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index 482486a8..e6d849c8 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -50,6 +50,7 @@ typedef enum tagModelTaskType {
     RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX,
     RT_MODEL_TASK_STREAM_LABEL_GOTO,
     RT_MODEL_TASK_MODEL_EXIT,
+    RT_MODEL_TASK_ALL_KERNEL,
 } rtModelTaskType_t;
 
 typedef enum tagModelStreamType {
@@ -127,6 +128,18 @@ typedef struct tagKernelTaskInfo {
     uint16_t *argsOffset;
 } rtKernelTaskInfo_t;
 
+typedef struct tagAllKernelTaskInfo {
+    uint16_t blockDim;
+    uint16_t argsCount;
+    uint16_t argsSize;
+    uint16_t reserved;
+    void *devfunc;
+    void *handle;
+    uint8_t *smDesc;
+    uint8_t *args;
+    uint16_t *argsOffset;
+} rtAllKernelTaskInfo_t;
+
 typedef struct tagKernelTaskInfoEx {
     uint32_t flags;
     uint32_t argsSize;
@@ -251,6 +264,7 @@ typedef struct tagTaskInfo {
     union {
         rtKernelTaskInfoEx_t kernelTaskEx;
         rtKernelTaskInfo_t kernelTask;
+        rtAllKernelTaskInfo_t allKernelTask;
         rtEventTaskInfo_t eventTask;
         rtStreamSwitchTaskInfo_t streamSwitchTask;
         rtStreamActiveTaskInfo_t streamActiveTask;
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index ba286d02..7c4f7be2 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -120,15 +120,15 @@ typedef struct tagKV {
 } KeyValue;
 
 typedef enum {
-  APPLICATION = 0,
-  SYSTEM
+    APPLICATION = 0,
+    SYSTEM
 } ProcessType;
 
 typedef struct {
-  ProcessType type;
-  unsigned int pid;
-  unsigned int deviceId;
-  char reserved[RESERVERD_LENGTH];
+    ProcessType type;
+    unsigned int pid;
+    unsigned int deviceId;
+    char reserved[RESERVERD_LENGTH];
 } LogAttr;
 
 /**
@@ -381,13 +381,13 @@ DLL_EXPORT void DlogFlush(void);
  * @ingroup slog
  * @brief Internal log interface, other modules are not allowed to call this interface
  */
-void DlogErrorInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
-void DlogWarnInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
-void DlogInfoInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
-void DlogDebugInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
-void DlogEventInner(int moduleId, const char *fmt, ...) __attribute__((format(printf, 2, 3)));
-void DlogInner(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
-void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
+void DlogErrorInner(int moduleId, const char *fmt, ...);
+void DlogWarnInner(int moduleId, const char *fmt, ...);
+void DlogInfoInner(int moduleId, const char *fmt, ...);
+void DlogDebugInner(int moduleId, const char *fmt, ...);
+void DlogEventInner(int moduleId, const char *fmt, ...);
+void DlogInner(int moduleId, int level, const char *fmt, ...);
+void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
 
 #ifdef __cplusplus
 #ifndef LOG_CPP
@@ -500,8 +500,8 @@ DLL_EXPORT void DlogFlushForC(void);
  * @ingroup slog
  * @brief Internal log interface, other modules are not allowed to call this interface
  */
-void DlogInnerForC(int moduleId, int level, const char *fmt, ...) __attribute__((format(printf, 3, 4)));
-void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...) __attribute__((format(printf, 5, 6)));
+void DlogInnerForC(int moduleId, int level, const char *fmt, ...);
+void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
 
 #ifdef __cplusplus
 }
diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
index 6208f462..e436dafd 100644
--- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
+++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h
@@ -1,72 +1,137 @@
-/**
- * @file tune_api.h
- *
- * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
- * 描述：mstune调优接口头文件
- */
-/** @defgroup mstune mstune调优接口 */
-#ifndef TUNE_API_H
-#define TUNE_API_H
-#include <vector>
-#include <map>
-#include <string>
-#include "graph/graph.h"
-#include "ge/ge_api.h"
-
-/**
- * @ingroup mstune
- *
- * mstune status
- */
-enum MsTuneStatus {
-    MSTUNE_SUCCESS,  /** tune success */
-    MSTUNE_FAILED,   /** tune failed */
-};
-
-// Option key: for train options sets
-const std::string MSTUNE_SELF_KEY = "mstune";
-const std::string MSTUNE_GEINIT_KEY = "initialize";
-const std::string MSTUNE_GESESS_KEY = "session";
-
-/**
- * @ingroup mstune
- * @par 描述: 命令行调优
- *
- * @attention 无
- * @param  option [IN] 调优参数
- * @param  msg [OUT] 调优异常下返回信息
- * @retval #MSTUNE_SUCCESS 执行成功
- * @retval #MSTUNE_FAILED 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);
-
-/**
- * @ingroup mstune
- * @par 描述: 梯度调优
- *
- * @attention 无
- * @param  tuningGraph [IN] 调优图
- * @param  dependGraph [IN] 调优依赖图
- * @param  session [IN] ge连接会话
- * @param  option [IN] 参数集. 包含调优参数及ge参数
- * @retval #MSTUNE_SUCCESS 执行成功
- * @retval #MSTUNE_FAILED 执行失败
- * @par 依赖:
- * @li tune_api.cpp：该接口所属的开发包。
- * @li tune_api.h：该接口声明所在的头文件。
- * @see 无
- * @since
- */
-extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
-    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
-
-#endif
+/**
+ * @file tune_api.h
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved.\n
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n
+ * 描述：mstune调优接口头文件
+ */
+/** @defgroup mstune mstune调优接口 */
+#ifndef TUNE_API_H
+#define TUNE_API_H
+#include <vector>
+#include <map>
+#include <string>
+#include "graph/graph.h"
+#include "ge/ge_api.h"
+
+/**
+ * @ingroup mstune
+ *
+ * mstune status
+ */
+enum MsTuneStatus {
+    MSTUNE_SUCCESS,  /** tune success */
+    MSTUNE_FAILED,   /** tune failed */
+};
+
+// Option key: for train options sets
+const std::string MSTUNE_SELF_KEY = "mstune";
+const std::string MSTUNE_GEINIT_KEY = "initialize";
+const std::string MSTUNE_GESESS_KEY = "session";
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct RunnerInitConfig {
+    // onilne online
+    std::string profPath;
+    std::string parserPath;
+    // ncs only
+    std::vector<uint32_t> devList;
+};
+
+struct RunnerOpInfo {
+    std::string opName;
+    uint64_t opCostTime;
+    uint64_t aicoreCostTime;
+    // gradient_split only
+    std::string modelName;
+    std::string opType;
+    std::vector<uint64_t> start;
+    std::vector<uint64_t> end;
+};
+
+struct RunnerModelInfo {
+    uint64_t totalCostTime;
+};
+
+struct RunnerRunResult {
+    std::vector<RunnerModelInfo> modelInfo;
+    std::vector<RunnerOpInfo> opInfo;
+};
+
+struct RunnerResult {
+    uint64_t totalCostTime;
+    std::map<std::string, uint64_t> opCostTime;
+    std::map<std::string, uint64_t> aicoreCostTime;
+};
+
+struct RunnerDataBuf {
+    void *ptr = nullptr;
+    size_t size = 0;
+};
+
+struct AOEBufferData {
+    std::shared_ptr<uint8_t> data = nullptr;
+    uint64_t length;
+};
+
+struct RunnerConfig {
+    bool isProf;
+    uint32_t loop;
+    // offline only
+    std::vector<RunnerDataBuf> input;
+    std::vector<RunnerDataBuf> output;
+    std::string modelPath;
+    RunnerDataBuf modelData;
+    // online only
+    uint32_t devId;
+    std::vector<std::vector<ge::Tensor>> inputs;
+    std::vector<ge::Graph> dependGraph; // run graph (for training)
+};
+#ifdef __cplusplus
+}
+#endif
+
+/**
+ * @ingroup mstune
+ * @par 描述: 命令行调优
+ *
+ * @attention 无
+ * @param  option [IN] 调优参数
+ * @param  msg [OUT] 调优异常下返回信息
+ * @retval #MSTUNE_SUCCESS 执行成功
+ * @retval #MSTUNE_FAILED 执行失败
+ * @par 依赖:
+ * @li tune_api.cpp：该接口所属的开发包。
+ * @li tune_api.h：该接口声明所在的头文件。
+ * @see 无
+ * @since
+ */
+MsTuneStatus MsTuning(const std::map<std::string, std::string> &option, std::string &msg);
+
+/**
+ * @ingroup mstune
+ * @par 描述: 梯度调优
+ *
+ * @attention 无
+ * @param  tuningGraph [IN] 调优图
+ * @param  dependGraph [IN] 调优依赖图
+ * @param  session [IN] ge连接会话
+ * @param  option [IN] 参数集. 包含调优参数及ge参数
+ * @retval #MSTUNE_SUCCESS 执行成功
+ * @retval #MSTUNE_FAILED 执行失败
+ * @par 依赖:
+ * @li tune_api.cpp：该接口所属的开发包。
+ * @li tune_api.h：该接口声明所在的头文件。
+ * @see 无
+ * @since
+ */
+extern "C" MsTuneStatus MsTrainTuning(ge::Graph &tuningGraph, std::vector<ge::Graph> &dependGraph,
+    ge::Session *session, const std::map<std::string, std::map<std::string, std::string>> &option);
+
+#endif