!2108 upgrade Ascend package 11 Feb 22

Merge pull request !2108 from yanghaoran/release
3 years ago · 4402452f71
--- a/inc/external/acl/acl_base.h
+++ b/inc/external/acl/acl_base.h
@@ -134,6 +134,7 @@ static const int ACL_ERROR_DRV_FAILURE = 500004;
 static const int ACL_ERROR_PROFILING_FAILURE = 500005;

 #define ACL_TENSOR_SHAPE_RANGE_NUM 2
 #define ACL_TENSOR_VALUE_RANGE_NUM 2
 #define ACL_UNKNOWN_RANK 0xFFFFFFFFFFFFFFFE

 typedef enum {
@@ -336,6 +337,19 @@ ACL_FUNC_VISIBILITY void aclDestroyTensorDesc(const aclTensorDesc *desc);
 ACL_FUNC_VISIBILITY aclError aclSetTensorShapeRange(aclTensorDesc *desc, size_t dimsCount,
                                                    int64_t dimsRange[][ACL_TENSOR_SHAPE_RANGE_NUM]);

 /**
 * @ingroup AscendCL
 * @brief set value range for aclTensorDesc
 *
 * @param  desc [OUT]     pointer to the data of aclTensorDesc
 * @param  valueCount [IN]     the number of value
 * @param  valueRange [IN]     the range of value
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclSetTensorValueRange(aclTensorDesc *desc, size_t valueCount,
                                                    int64_t valueRange[][ACL_TENSOR_VALUE_RANGE_NUM]);
 /**
 * @ingroup AscendCL
 * @brief get data type specified by the tensor description
--- a/inc/external/acl/acl_op_compiler.h
+++ b/inc/external/acl/acl_op_compiler.h
@@ -41,6 +41,8 @@ typedef enum {

 typedef enum aclCompileFlag { ACL_OP_COMPILE_DEFAULT, ACL_OP_COMPILE_FUZZ } aclOpCompileFlag;

 typedef struct aclGraphDumpOption aclGraphDumpOption;

 /**
 * @ingroup AscendCL
 * @brief compile op
@@ -114,6 +116,55 @@ ACL_FUNC_VISIBILITY aclError aclSetCompileopt(aclCompileOpt opt, const char *val
 */
 ACL_FUNC_VISIBILITY aclError aclopSetCompileFlag(aclOpCompileFlag flag);

 /**
 * @ingroup AscendCL
 * @brief generate graph and dump
 *
 * @param opType [IN]           op type
 * @param numInputs [IN]        number of inputs
 * @param inputDesc [IN]        pointer to array of input tensor descriptions
 * @param inputs [IN]           pointer to array of input buffers
 * @param numOutputs [IN]       number of outputs
 * @param outputDesc [IN]       pointer to array of output tensor descriptions
 * @param outputs [IN]          pointer to array of outputs buffers
 * @param attr [IN]             pointer to instance of aclopAttr.
 *                              may pass nullptr if the op has no attribute
 * @param engineType [IN]       engine type
 * @param graphDumpPath [IN]    dump path, if the suffix is ".txt", it means file path, else it means directory path
 * @param graphDumpOpt [IN]     dump option, nullptr is supported
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclGenGraphAndDumpForOp(
    const char *opType, int numInputs, const aclTensorDesc *const inputDesc[], const aclDataBuffer *const inputs[],
    int numOutputs, const aclTensorDesc *const outputDesc[], aclDataBuffer *const outputs[], const aclopAttr *attr,
    aclopEngineType engineType, const char *graphDumpPath, const aclGraphDumpOption *graphDumpOpt);

 /**
 * @ingroup AscendCL
 * @brief Create the graph dump option
 *
 * @retval null for failed
 * @retval OtherValues success
 *
 * @see aclDestroyGraphDumpOpt
 */
 ACL_FUNC_VISIBILITY aclGraphDumpOption *aclCreateGraphDumpOpt();

 /**
 * @ingroup AscendCL
 * @brief Destroy graph dump option
 *
 * @param graphDumpOpt [IN]  pointer to the graph dump option
 *
 * @retval ACL_SUCCESS  The function is successfully executed.
 * @retval OtherValues Failure
 *
 * @see aclCreateGraphDumpOpt
 */
 ACL_FUNC_VISIBILITY aclError aclDestroyGraphDumpOpt(const aclGraphDumpOption *graphDumpOpt);

 #ifdef __cplusplus
 }
 #endif
--- a/inc/external/acl/acl_prof.h
+++ b/inc/external/acl/acl_prof.h
@@ -37,6 +37,7 @@ extern "C" {
 #define ACL_PROF_HCCL_TRACE 0x0020ULL
 #define ACL_PROF_TRAINING_TRACE 0x0040ULL
 #define ACL_PROF_MSPROFTX 0x0080ULL
 #define ACL_PROF_RUNTIME_API 0x0100ULL

 /**
 * @deprecated please use aclprofGetOpTypeLen and aclprofGetOpTNameLen instead
@@ -367,6 +368,79 @@ MSVP_PROF_API aclprofStepInfo *aclprofCreateStepInfo();
 */
 MSVP_PROF_API void aclprofDestroyStepInfo(aclprofStepInfo *stepinfo);

 /**
 * @ingroup AscendCL
 * @brief create pointer to aclprofstamp
 *
 *
 * @retval aclprofStamp pointer
 */
 MSVP_PROF_API void *aclprofCreateStamp();

 /**
 * @ingroup AscendCL
 * @brief destory stamp pointer
 *
 *
 * @retval void
 */
 MSVP_PROF_API void aclprofDestroyStamp(void *stamp);

 /**
 * @ingroup AscendCL
 * @brief Record push timestamp
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 MSVP_PROF_API aclError aclprofPush(void *stamp);

 /**
 * @ingroup AscendCL
 * @brief Record pop timestamp
 *
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 MSVP_PROF_API aclError aclprofPop();

 /**
 * @ingroup AscendCL
 * @brief Record range start timestamp
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 MSVP_PROF_API aclError aclprofRangeStart(void *stamp, uint32_t *rangeId);

 /**
 * @ingroup AscendCL
 * @brief Record range end timestamp
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 MSVP_PROF_API aclError aclprofRangeStop(uint32_t rangeId);

 /**
 * @ingroup AscendCL
 * @brief set message to stamp
 *
 *
 * @retval void
 */
 MSVP_PROF_API aclError aclprofSetStampTraceMessage(void *stamp, const char *msg, uint32_t msgLen);

 /**
 * @ingroup AscendCL
 * @brief Record mark timestamp
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 MSVP_PROF_API aclError aclprofMark(void *stamp);

 #ifdef __cplusplus
 }
 #endif
--- a/inc/external/acl/acl_rt.h
+++ b/inc/external/acl/acl_rt.h
@@ -44,6 +44,11 @@ typedef enum aclrtEventStatus {
  ACL_EVENT_STATUS_RESERVED = 2,
 } aclrtEventStatus;

 typedef enum aclrtEventRecordedStatus {
  ACL_EVENT_RECORDED_STATUS_NOT_READY = 0,
  ACL_EVENT_RECORDED_STATUS_COMPLETE = 1,
 } aclrtEventRecordedStatus;

 typedef enum aclrtEventWaitStatus {
  ACL_EVENT_WAIT_STATUS_COMPLETE = 0,
  ACL_EVENT_WAIT_STATUS_NOT_READY = 1,
@@ -503,8 +508,21 @@ ACL_FUNC_VISIBILITY aclError aclrtResetEvent(aclrtEvent event, aclrtStream strea
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_DEPRECATED_MESSAGE("aclrtQueryEvent is deprecated, use aclrtQueryEventStatus instead")
 ACL_FUNC_VISIBILITY aclError aclrtQueryEvent(aclrtEvent event, aclrtEventStatus *status);

 /**
 * @ingroup AscendCL
 * @brief Queries an event's status
 *
 * @param  event [IN]    event to query
 * @param  status [OUT]  event recorded status
 *
 * @retval ACL_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ACL_FUNC_VISIBILITY aclError aclrtQueryEventStatus(aclrtEvent event, aclrtEventRecordedStatus *status);

 /**
 * @ingroup AscendCL
 * @brief Queries an event's wait-status
--- a/inc/external/acl/error_codes/ge_error_codes.h
+++ b/inc/external/acl/error_codes/ge_error_codes.h
@@ -32,42 +32,43 @@
 #endif

 #include <stddef.h>
 #include <stdint.h>

 #ifdef __cplusplus
 extern "C" {
 #endif
 static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000;
 static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013;
 static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014;
 static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015;
 static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
 static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
 static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
 static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
 static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
 static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
 static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
 static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
 static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
 static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;
 static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005;
 static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006;
 static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007;
 static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008;
 static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009;
 static const uint32_t ACL_ERROR_GE_PARAM_INVALID = 145000U;
 static const uint32_t ACL_ERROR_GE_EXEC_NOT_INIT = 145001U;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID = 145002U;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ID_INVALID = 145003U;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID = 145006U;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID = 145007U;
 static const uint32_t ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID = 145008U;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED = 145009U;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID = 145011U;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID = 145012U;
 static const uint32_t ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID = 145013U;
 static const uint32_t ACL_ERROR_GE_AIPP_BATCH_EMPTY = 145014U;
 static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015U;
 static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016U;
 static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017U;
 static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018U;
 static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019U;
 static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020U;
 static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021U;
 static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022U;
 static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000U;
 static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001U;
 static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000U;
 static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001U;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002U;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED = 545003U;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED = 545004U;
 static const uint32_t ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED = 545005U;
 static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006U;
 static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007U;
 static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008U;
 static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009U;

 #ifdef __cplusplus
 }  // namespace ge
--- a/inc/external/acl/error_codes/rt_error_codes.h
+++ b/inc/external/acl/error_codes/rt_error_codes.h
@@ -44,6 +44,7 @@ static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015;          // callbac
 static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016;       // invalid memory type
 static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017;            // invalid handle
 static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018;       // invalid malloc type
 static const int32_t ACL_ERROR_RT_WAIT_TIMEOUT = 107019;              // wait timeout

 static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000;  // feature not support
 static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001;    // memory allocation error
@@ -61,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
 static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
 static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
 static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015;        // repeated init
 static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016;      // aivec over flow

 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
@@ -99,6 +101,11 @@ static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033;             // devic
 static const int32_t ACL_ERROR_RT_VECTOR_CORE_TIMEOUT = 507034;         // vector core timeout
 static const int32_t ACL_ERROR_RT_VECTOR_CORE_EXCEPTION = 507035;       // vector core exception
 static const int32_t ACL_ERROR_RT_VECTOR_CORE_TRAP_EXCEPTION = 507036;  // vector core trap exception
 static const int32_t ACL_ERROR_RT_CDQ_BATCH_ABNORMAL = 507037;          // cdq alloc batch abnormal
 static const int32_t ACL_ERROR_RT_DIE_MODE_CHANGE_ERROR = 507038;       // can not change die mode
 static const int32_t ACL_ERROR_RT_DIE_SET_ERROR = 507039;               // single die mode can not set die
 static const int32_t ACL_ERROR_RT_INVALID_DIEID = 507040;               // invalid die id
 static const int32_t ACL_ERROR_RT_DIE_MODE_NOT_SET = 507041;            // die mode not set

 static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899;    // drv internal error
 static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900;  // aicpu internal error
@@ -107,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901;          // hdc disconn
 #ifdef __cplusplus
 }
 #endif

 #endif  // __INC_EXTERNEL_RT_ERROR_CODES_H__
--- a/inc/external/acl/ops/acl_dvpp.h
+++ b/inc/external/acl/ops/acl_dvpp.h
@@ -158,7 +158,13 @@ enum acldvppJpegFormat {
  ACL_JPEG_CSS_UNKNOWN = 1000
 };

 enum acldvppChannelDescParamType { ACL_DVPP_CSC_MATRIX_UINT32 = 0, ACL_DVPP_MODE_UINT32, ACL_DVPP_CHANNEL_ID_UINT64 };
 enum acldvppChannelDescParamType {
  ACL_DVPP_CSC_MATRIX_UINT32 = 0,
  ACL_DVPP_MODE_UINT32,
  ACL_DVPP_CHANNEL_ID_UINT64,
  ACL_DVPP_CHANNEL_HEIGHT_UINT32,
  ACL_DVPP_CHANNEL_WIDTH_UINT32
 };

 enum aclvdecChannelDescParamType {
  ACL_VDEC_CSC_MATRIX_UINT32 = 0,
--- a/inc/external/ge/ge_api_error_codes.h
+++ b/inc/external/ge/ge_api_error_codes.h
@@ -20,15 +20,27 @@
 #include <map>
 #include <string>
 #include "ge_error_codes.h"
 #include "graph/types.h"
 #include "ge_api_types.h"

 namespace ge {
 #ifdef __GNUC__
 #define ATTRIBUTED_DEPRECATED(replacement) __attribute__((deprecated("Please use " #replacement " instead.")))
 #else
 #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead."))
 #endif

 // Code compose(4 byte), runtime: 2 bit,  type: 2 bit,   level: 3 bit,  sysid: 8 bit, modid: 5 bit, value: 12 bit
 #define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                                \
  constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) |    \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) |   \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) |   \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) |   \
                              (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value)));         \
  const ErrorNoRegisterar g_errorno_##name((name), (desc));

 #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_errorno_##name((name), (desc));

 namespace ge {
 class GE_FUNC_VISIBILITY StatusFactory {
 public:
  static StatusFactory *Instance() {
@@ -56,7 +68,7 @@ class GE_FUNC_VISIBILITY StatusFactory {
  }

  std::string GetErrDesc(const uint32_t err) {
    const auto iter_find = err_desc_.find(err);
    const std::map<uint32_t, std::string>::const_iterator iter_find = err_desc_.find(err);
    if (iter_find == err_desc_.end()) {
      return "";
    }
@@ -82,59 +94,10 @@ class GE_FUNC_VISIBILITY ErrorNoRegisterar {
  ~ErrorNoRegisterar() {}
 };

 // Code compose(4 byte), runtime: 2 bit,  type: 2 bit,   level: 3 bit,  sysid: 8 bit, modid: 5 bit, value: 12 bit
 #define GE_ERRORNO(runtime, type, level, sysid, modid, name, value, desc)                                \
  constexpr ge::Status name = (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(runtime))) << 30U) | \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(type))) << 28U) |    \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(level))) << 25U) |   \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(sysid))) << 17U) |   \
                              (static_cast<uint32_t>(0xFFU & (static_cast<uint32_t>(modid))) << 12U) |   \
                              (static_cast<uint32_t>(0x0FFFU) & (static_cast<uint32_t>(value)));         \
  const ErrorNoRegisterar g_##name##_errorno(name, desc);

 #define GE_ERRORNO_EXTERNAL(name, desc) const ErrorNoRegisterar g_##name##_errorno(name, desc);

 using Status = uint32_t;

 // General error code
 GE_ERRORNO(0, 0, 0, 0, 0, SUCCESS, 0, "success");
 GE_ERRORNO(0b11, 0b11, 0b111, 0xFFU, 0b11111, FAILED, 0xFFFU, "failed"); /*lint !e401*/

 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PARAM_INVALID, "Parameter invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_NOT_INIT, "GE executor not initialized yet.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "Model id invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "Data size of model invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_ADDR_INVALID, "Model addr invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Queue id of model invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "The model loaded repeatedly.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_ADDR_INVALID, "Dynamic input addr invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Dynamic input size invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DYNAMIC_BATCH_SIZE_INVALID, "Dynamic batch size invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_BATCH_EMPTY, "AIPP batch parameter empty.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_FORMAT_INVALID, "Format is invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_SHAPE_INVALID, "Shape is invalid.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_DATATYPE_INVALID, "Datatype is invalid.");

 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory.");

 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED, "Failed to load model partition.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, "Failed to load weight partition.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "Failed to load task partition.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED, "Failed to load op kernel partition.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA, "Failed to release the model data.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_COMMAND_HANDLE, "Command handle error.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_GET_TENSOR_INFO, "Get tensor info error.");
 GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_UNLOAD_MODEL, "Load model error.");

 }  // namespace ge

 #endif  // INC_EXTERNAL_GE_GE_API_ERROR_CODES_H_
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -28,96 +28,98 @@

 namespace ge {
 // Option key: graph run mode
 const char *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode";
 const char *const OPTION_DEVICE_TYPE = "ge.deviceType";
 const char_t *const OPTION_GRAPH_RUN_MODE = "ge.graphRunMode";
 const char_t *const OPTION_DEVICE_TYPE = "ge.deviceType";

 // Option key: ome init
 const char *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId";
 const char *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId";
 const char *const OPTION_EXEC_JOB_ID = "ge.exec.jobId";
 const char *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom";
 const char *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd";
 const char *const OPTION_EXEC_RANK_ID = "ge.exec.rankId";
 const char *const OPTION_EXEC_POD_NAME = "ge.exec.podName";
 const char *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode";
 const char *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile";
 const char *const GE_AICPU_FLAG = "ge.aicpuFlag";
 const char *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath";
 const char_t *const OPTION_EXEC_SESSION_ID = "ge.exec.sessionId";
 const char_t *const OPTION_EXEC_DEVICE_ID = "ge.exec.deviceId";
 const char_t *const OPTION_EXEC_JOB_ID = "ge.exec.jobId";
 const char_t *const OPTION_EXEC_IS_USEHCOM = "ge.exec.isUseHcom";
 const char_t *const OPTION_EXEC_IS_USEHVD = "ge.exec.isUseHvd";
 const char_t *const OPTION_EXEC_RANK_ID = "ge.exec.rankId";
 const char_t *const OPTION_EXEC_POD_NAME = "ge.exec.podName";
 const char_t *const OPTION_EXEC_DEPLOY_MODE = "ge.exec.deployMode";
 const char_t *const OPTION_EXEC_RANK_TABLE_FILE = "ge.exec.rankTableFile";
 const char_t *const GE_AICPU_FLAG = "ge.aicpuFlag";
 const char_t *const OPTION_EXEC_EXTERN_PLUGIN_PATH = "ge.soLoadPath";
 // Dump flag and para
 const char *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump";
 const char *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath";
 const char *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
 const char *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
 const char *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
 const char *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
 const char *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
 const char *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
 const char *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump";
 const char *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
 const char *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions";
 const char *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions";
 const char_t *const OPTION_EXEC_ENABLE_DUMP = "ge.exec.enableDump";
 const char_t *const OPTION_EXEC_DUMP_PATH = "ge.exec.dumpPath";
 const char_t *const OPTION_EXEC_DUMP_STEP = "ge.exec.dumpStep";
 const char_t *const OPTION_EXEC_DUMP_MODE = "ge.exec.dumpMode";
 const char_t *const OPTION_EXEC_ENABLE_DUMP_DEBUG = "ge.exec.enableDumpDebug";
 const char_t *const OPTION_EXEC_DUMP_DEBUG_MODE = "ge.exec.dumpDebugMode";
 const char_t *const OPTION_EXEC_ENABLE_INCRE_BUILD = "ge.exec.enableIncreBuild";
 const char_t *const OPTION_EXEC_INCRE_BUILD_CACHE_PATH = "ge.exec.increBuildCachePath";
 const char_t *const OPTION_EXEC_ENABLE_EXCEPTION_DUMP = "ge.exec.enable_exception_dump";
 const char_t *const OPTION_EXEC_ENABLE_SCOPE_FUSION_PASSES = "ge.exec.enableScopeFusionPasses";
 const char_t *const OPTION_EXEC_PROFILING_FPPONIT_OPTIONS = "ge.exec.profilingFpPointOptions";
 const char_t *const OPTION_EXEC_PROFILING_BPPONIT_OPTIONS = "ge.exec.profilingBpPointOptions";
 // profiling flag
 const char *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode";
 const char *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions";
 const char_t *const OPTION_EXEC_PROFILING_MODE = "ge.exec.profilingMode";
 const char_t *const OPTION_EXEC_PROFILING_OPTIONS = "ge.exec.profilingOptions";
 // Hccl flag, if ge.exec.hcclFlag =1, it means load plugin for opskernel, else:ge.exec.hcclFlag =0
 const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
 const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
 const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
 const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
 const char_t *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag";
 const char_t *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic";
 const char_t *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory";
 const char_t *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization";
 // Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input,
 // ge.exec.dynamicGraphExecuteMode, dynamic_execute[default]
 const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
 const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
 const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
 const char *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";
 const char_t *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput";
 const char_t *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode";
 const char_t *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange";
 const char_t *const OPTION_EXEC_ENABLE_COPY_OUTPUT_ADDR = "ge.exec.enableCopyOutputAddr";

 // Option key: memory init
 const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
 const char *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize";
 const char_t *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize";
 const char_t *const VARIABLE_MEMORY_MAX_SIZE = "ge.variableMemoryMaxSize";
 const char_t *const OPTION_EXEC_REUSE_ZERO_COPY_MEMORY = "ge.exec.reuseZeroCopyMemory";

 namespace configure_option {
 const char *const STREAM_NUM = "ge.streamNum";
 const char *const HEAD_STREAM = "ge.headStream";
 const char *const PERF_LEVEL = "ge.perfLevel";
 const char *const ENCRYPT_MODE = "ge.encryptMode";
 const char *const EK_FILE = "ge.ekFile";
 const char *const CERT_FILE = "ge.certFile";
 const char *const HW_KEY_FILE = "ge.hwKeyFile";
 const char *const PRIVATE_KEY_FILE = "ge.privateKeyFile";
 const char *const FRAMEWORK_TYPE = "ge.frameworkType";
 const char *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile";
 const char *const INSERT_OP_FILE = "ge.insertOpFile";
 const char *const OUTPUT_NODE_NAME = "ge.outputNodeName";
 const char *const COMPRESS_FLAG = "ge.compressFlag";
 const char *const PRECISION_MODE = "ge.exec.precision_mode";
 const char *const SINGLE_OP_FLAG = "ge.exec.single_op";
 const char *const TRAIN_FLAG = "ge.trainFlag";
 const char *const RUN_FLAG = "ge.runFlag";
 const char *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop";
 const char *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path";
 const char *const DDK_VERSION_FLAG = "ge.DDK_version";
 const char *const GE_FE_FLAG = "ge.feFlag";
 const char *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum";
 const char *const OUTPUT_DATATYPE = "ge.outputDatatype";
 const char *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";
 const char *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
 const char *const HCOM_PARALLEL = "ge.hcomParallel";
 const char *const AUTO_TUNE_MODE = "ge.autoTuneMode";
 const char *const SOC_VERSION = "ge.socVersion";
 const char *const CORE_TYPE = "ge.engineType";
 const char *const AICORE_NUM = "ge.aicoreNum";
 const char *const L1_FUSION = "ge.l1Fusion";
 const char *const BUFFER_OPTIMIZE = "ge.bufferOptimize";
 const char *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
 const char *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";
 const char *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile";
 const char *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
 const char *const ORIGINAL_MODEL_FILE = "ge.originalModelFile";
 const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
 const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel";
 const char *const PERFORMANCE_MODE = "ge.performance_mode";
 const char *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode";
 const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist";
 const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode";
 const char_t *const STREAM_NUM = "ge.streamNum";
 const char_t *const HEAD_STREAM = "ge.headStream";
 const char_t *const PERF_LEVEL = "ge.perfLevel";
 const char_t *const ENCRYPT_MODE = "ge.encryptMode";
 const char_t *const EK_FILE = "ge.ekFile";
 const char_t *const CERT_FILE = "ge.certFile";
 const char_t *const HW_KEY_FILE = "ge.hwKeyFile";
 const char_t *const PRIVATE_KEY_FILE = "ge.privateKeyFile";
 const char_t *const FRAMEWORK_TYPE = "ge.frameworkType";
 const char_t *const CALIBRATION_CONF_FILE = "ge.calibrationConfFile";
 const char_t *const INSERT_OP_FILE = "ge.insertOpFile";
 const char_t *const OUTPUT_NODE_NAME = "ge.outputNodeName";
 const char_t *const COMPRESS_FLAG = "ge.compressFlag";
 const char_t *const PRECISION_MODE = "ge.exec.precision_mode";
 const char_t *const SINGLE_OP_FLAG = "ge.exec.single_op";
 const char_t *const TRAIN_FLAG = "ge.trainFlag";
 const char_t *const RUN_FLAG = "ge.runFlag";
 const char_t *const LOCAL_FMKOP_FLAG = "ge.enabledLocalFmkop";
 const char_t *const TBE_PLUGIN_PATH_FLAG = "ge.TBE_plugin_path";
 const char_t *const DDK_VERSION_FLAG = "ge.DDK_version";
 const char_t *const GE_FE_FLAG = "ge.feFlag";
 const char_t *const STREAM_MAX_PARALLEL_NUM = "ge.streamMaxParallelNum";
 const char_t *const OUTPUT_DATATYPE = "ge.outputDatatype";
 const char_t *const OP_SELECT_IMPL_MODE = "ge.opSelectImplmode";
 const char_t *const OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
 const char_t *const HCOM_PARALLEL = "ge.hcomParallel";
 const char_t *const AUTO_TUNE_MODE = "ge.autoTuneMode";
 const char_t *const SOC_VERSION = "ge.socVersion";
 const char_t *const CORE_TYPE = "ge.engineType";
 const char_t *const AICORE_NUM = "ge.aicoreNum";
 const char_t *const L1_FUSION = "ge.l1Fusion";
 const char_t *const BUFFER_OPTIMIZE = "ge.bufferOptimize";
 const char_t *const ENABLE_SMALL_CHANNEL = "ge.enableSmallChannel";
 const char_t *const ENABLE_COMPRESS_WEIGHT = "ge.enableCompressWeight";
 const char_t *const FUSION_SWITCH_FILE = "ge.fusionSwitchFile";
 const char_t *const SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
 const char_t *const ORIGINAL_MODEL_FILE = "ge.originalModelFile";
 const char_t *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
 const char_t *const OP_DEBUG_LEVEL = "ge.opDebugLevel";
 const char_t *const PERFORMANCE_MODE = "ge.performance_mode";
 const char_t *const SHAPE_GENERALIZED_BUILD_MODE = "ge.shape_generalized_build_mode";
 const char_t *const MODIFY_MIXLIST = "ge.exec.modify_mixlist";
 const char_t *const OP_PRECISION_MODE = "ge.exec.op_precision_mode";
 }  // namespace configure_option
 // Configure stream num by Session constructor options param,
 // its value should be int32_t type, default value is "1"
@@ -227,7 +229,7 @@ const std::string OPTYPELIST_FOR_IMPLMODE = "ge.optypelistForImplmode";
 const std::string HCOM_PARALLEL = "ge.hcomParallel";

 // configure whether to use dynamic batch size
 const char *const kDynamicBatchSize = "ge.dynamicBatchSize";
 const char_t *const kDynamicBatchSize = "ge.dynamicBatchSize";

 // configure threshold of fusion data size for communication op
 const std::string FUSION_TENSOR_SIZE = "ge.fusionTensorSize";
@@ -236,10 +238,10 @@ const std::string INPUT_SHAPE = "ge.inputShape";

 const std::string DYNAMIC_NODE_TYPE = "ge.dynamicNodeType";
 // configure whether to use dynamic image size
 const char *const kDynamicImageSize = "ge.dynamicImageSize";
 const char_t *const kDynamicImageSize = "ge.dynamicImageSize";

 // Configure whether to use dynamic dims
 const char *const kDynamicDims = "ge.dynamicDims";
 const char_t *const kDynamicDims = "ge.dynamicDims";

 // Configure auto tune mode, this option only take effect while AUTO_TUNE_FLAG is Y,
 // example: GA|RL, support configure multiple, split by |
@@ -275,29 +277,29 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
 // Save original model file name
 const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile";

 const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
 const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
 const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";
 const char_t *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
 const char_t *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
 const char_t *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";

 // Configure for print op pass
 // Its value should be "0" or "1", default value is "1"
 const char *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass";
 const char_t *const ENABLE_PRINT_OP_PASS = "ge.enablePrintOpPass";

 // Configure operator compilation path
 // Its value should be file path, default value is "./"
 const char *const DEBUG_DIR = "ge.debugDir";
 const char_t *const DEBUG_DIR = "ge.debugDir";

 // Configure operator compiler cache path
 // Its value should be file path, default value is "./"
 const char *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir";
 const char_t *const OP_COMPILER_CACHE_DIR = "ge.op_compiler_cache_dir";

 // Configure operator compiler cache mode
 // Its value should be "disable", "enable" or "force", default value is "disable"
 const char *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode";
 const char_t *const OP_COMPILER_CACHE_MODE = "ge.op_compiler_cache_mode";

 // Configure whether to use single stream.
 // Its value should be "true" or "false", default value is "false"
 const char *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream";
 const char_t *const ENABLE_SINGLE_STREAM = "ge.enableSingleStream";

 // Configure input fp16 nodes
 const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
@@ -322,7 +324,7 @@ const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update";
 const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode";

 // atc and ir option
 const char *const INPUT_SHAPE_RANGE = "input_shape_range";
 const char_t *const INPUT_SHAPE_RANGE = "input_shape_range";

 // Configure express high compile performance or high execute performance
 // normal: no need to compile, used saved .o files directly
@@ -338,7 +340,11 @@ const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist";

 const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode";

 const char *const FILE_CONSTANT_PATH = "ge.exec.value_bins";
 const std::string OP_WAIT_TIMEOUT = "ge.exec.opWaitTimeout";

 const std::string OP_EXECUTE_TIMEOUT = "ge.exec.opExecuteTimeout";

 const char_t *const FILE_CONSTANT_PATH = "ge.exec.value_bins";

 // Graph run mode
 enum GraphRunMode { PREDICTION = 0, TRAIN };
@@ -378,49 +384,49 @@ using RunAsyncCallback = std::function<void(Status, std::vector<ge::Tensor> &)>;

 // for ir build
 namespace ir_option {
 static const char *const INPUT_FORMAT = "input_format";
 static const char *const INPUT_SHAPE = "input_shape";
 static const char *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE;
 static const char *const OP_NAME_MAP = "op_name_map";
 static const char *const IS_DYNAMIC_INPUT = "is_dynamic_input";
 static const char *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout";
 static const char *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout";
 static const char *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes";
 static const char *const OUTPUT = "output";
 static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
 static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
 static const char *const DYNAMIC_DIMS = kDynamicDims;
 static const char *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
 static const char *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
 static const char *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str();
 static const char *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
 static const char *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
 static const char *const CORE_TYPE = ge::CORE_TYPE.c_str();
 static const char *const SOC_VERSION = ge::SOC_VERSION.c_str();
 static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
 static const char *const AICORE_NUM = ge::AICORE_NUM.c_str();
 static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
 static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
 static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
 static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
 static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
 static const char *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str();
 static const char *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
 static const char *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
 static const char *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
 static const char *const LOG_LEVEL = "log";
 static const char *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();
 static const char *const DEBUG_DIR = ge::DEBUG_DIR;
 static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR;
 static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE;
 static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str();
 static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
 static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
 static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();
 static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str();
 static const char *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str();
 static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str();
 static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str();
 static const char_t *const INPUT_FORMAT = "input_format";
 static const char_t *const INPUT_SHAPE = "input_shape";
 static const char_t *const INPUT_SHAPE_RANGE = ge::INPUT_SHAPE_RANGE;
 static const char_t *const OP_NAME_MAP = "op_name_map";
 static const char_t *const IS_DYNAMIC_INPUT = "is_dynamic_input";
 static const char_t *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout";
 static const char_t *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout";
 static const char_t *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes";
 static const char_t *const OUTPUT = "output";
 static const char_t *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
 static const char_t *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
 static const char_t *const DYNAMIC_DIMS = kDynamicDims;
 static const char_t *const INSERT_OP_FILE = ge::INSERT_OP_FILE.c_str();
 static const char_t *const PRECISION_MODE = ge::PRECISION_MODE.c_str();
 static const char_t *const TUNE_DEVICE_IDS = ge::TUNE_DEVICE_IDS.c_str();
 static const char_t *const EXEC_DISABLE_REUSED_MEMORY = ge::OPTION_EXEC_DISABLE_REUSED_MEMORY;
 static const char_t *const AUTO_TUNE_MODE = ge::AUTO_TUNE_MODE.c_str();
 static const char_t *const CORE_TYPE = ge::CORE_TYPE.c_str();
 static const char_t *const SOC_VERSION = ge::SOC_VERSION.c_str();
 static const char_t *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
 static const char_t *const AICORE_NUM = ge::AICORE_NUM.c_str();
 static const char_t *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
 static const char_t *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
 static const char_t *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
 static const char_t *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
 static const char_t *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
 static const char_t *const ENABLE_COMPRESS_WEIGHT = ge::ENABLE_COMPRESS_WEIGHT.c_str();
 static const char_t *const COMPRESS_WEIGHT_CONF = "compress_weight_conf";
 static const char_t *const OUT_NODES = ge::OUTPUT_NODE_NAME.c_str();
 static const char_t *const INPUT_FP16_NODES = ge::INPUT_FP16_NODES.c_str();
 static const char_t *const LOG_LEVEL = "log";
 static const char_t *const OPTYPELIST_FOR_IMPLMODE = ge::OPTYPELIST_FOR_IMPLMODE.c_str();
 static const char_t *const DEBUG_DIR = ge::DEBUG_DIR;
 static const char_t *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR;
 static const char_t *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE;
 static const char_t *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str();
 static const char_t *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str();
 static const char_t *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str();
 static const char_t *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str();
 static const char_t *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str();
 static const char_t *const SHAPE_GENERALIZED_BUILD_MODE = ge::SHAPE_GENERALIZED_BUILD_MODE.c_str();
 static const char_t *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str();
 static const char_t *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str();

 // for interface: aclgrphBuildModel
 #ifdef __GNUC__
--- a/inc/external/ge/ge_ir_build.h
+++ b/inc/external/ge/ge_ir_build.h
@@ -98,10 +98,10 @@ GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph,
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &))
 ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *, const ModelBufferData &))
 GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const std::string &output_file, const ModelBufferData &model);

 GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model);
 GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char_t *output_file, const ModelBufferData &model);

 /**
 * @ingroup AscendCL
@@ -126,7 +126,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int32_t *major_version, int32
 * @retval GRAPH_SUCCESS The function is successfully executed.
 * @retval OtherValues Failure
 */
 GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len);
 GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char_t *file, const size_t len);

 /**
 * @ingroup AscendCL
@@ -150,7 +150,7 @@ GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type,
 * @param cfg_path   [IN] the config file path
 * @return graphStatus
 */
 GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char *cfg_path);
 GE_FUNC_VISIBILITY graphStatus aclgrphSetOpAttr(Graph &graph, aclgrphAttrType attr_type, const char_t *cfg_path);

 };      // namespace ge
 #endif  // INC_EXTERNAL_GE_IR_BUILD_H_
--- a/inc/external/runtime/rt_error_codes.h
+++ b/inc/external/runtime/rt_error_codes.h
@@ -62,6 +62,7 @@ static const int32_t ACL_ERROR_RT_OVER_LIMIT = 207012;           // over limit
 static const int32_t ACL_ERROR_RT_QUEUE_EMPTY = 207013;          // queue is empty
 static const int32_t ACL_ERROR_RT_QUEUE_FULL = 207014;           // queue is full
 static const int32_t ACL_ERROR_RT_REPEATED_INIT = 207015;        // repeated init
 static const int32_t ACL_ERROR_RT_AIVEC_OVER_FLOW = 207016;      // aivec over flow

 static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000;              // runtime internal error
 static const int32_t ACL_ERROR_RT_TS_ERROR = 507001;                    // ts internel error
@@ -113,5 +114,4 @@ static const int32_t ACL_ERROR_RT_SOCKET_CLOSE = 507901;          // hdc disconn
 #ifdef __cplusplus
 }
 #endif

 #endif  // __INC_EXTERNEL_RT_ERROR_CODES_H__
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -40,7 +40,7 @@ enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP };

 class GE_FUNC_VISIBILITY GeLog {
 public:
  static const uint64_t GetTid() {
  static uint64_t GetTid() {
 #ifdef __GNUC__
    const uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid));
 #else
@@ -56,11 +56,11 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {
  return (enable == 1);
 }

 #define GELOGE(ERROR_CODE, fmt, ...)                                                                            \
  do {                                                                                                          \
    dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE, \
               ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(),  \
               ##__VA_ARGS__);                                                                                  \
 #define GELOGE(ERROR_CODE, fmt, ...)                                                                              \
  do {                                                                                                            \
    dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE), \
               ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(),    \
               ##__VA_ARGS__);                                                                                    \
  } while (false)

 #define GELOGW(fmt, ...)                                                                          \
@@ -91,7 +91,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {

 #define GELOGT(VALUE, fmt, ...)                                                                                      \
  do {                                                                                                               \
    TraceStatus stat = VALUE;                                                                                        \
    TraceStatus stat = (VALUE);                                                                                      \
    const char_t *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"};                                     \
    const int32_t idx = static_cast<int32_t>(stat);                                                                  \
    char_t *k = const_cast<char_t *>("status");                                                                      \
@@ -102,7 +102,7 @@ inline bool IsLogEnable(const int32_t module_name, const int32_t log_level) {

 #define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...)                                                           \
  do {                                                                                                         \
    dlog_error(MOD_NAME, "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], ERROR_CODE,      \
    dlog_error((MOD_NAME), "%lu %s: ErrorNo: %u(%s) %s" fmt, GeLog::GetTid(), &__FUNCTION__[0], (ERROR_CODE),  \
               ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ErrorManager::GetInstance().GetLogHeader().c_str(), \
               ##__VA_ARGS__);                                                                                 \
  } while (false)
--- a/inc/framework/common/debug/log.h
+++ b/inc/framework/common/debug/log.h
@@ -69,7 +69,7 @@
  do {                                     \
    const ge::Status _chk_status = (expr); \
    if (_chk_status != ge::SUCCESS) {      \
      GELOGE((ge::FAILED), __VA_ARGS__);   \
      GELOGE(_chk_status, __VA_ARGS__);    \
    }                                      \
  } while (false)

@@ -213,9 +213,9 @@
 // If expr is not RT_ERROR_NONE, print the log
 #define GE_CHK_RT(expr)                                                \
  do {                                                                 \
    const rtError_t _rt_ret = (expr);                                  \
    if (_rt_ret != RT_ERROR_NONE) {                                    \
      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_ret); \
    const rtError_t _rt_err = (expr);                                  \
    if (_rt_err != RT_ERROR_NONE) {                                    \
      GELOGE(ge::RT_FAILED, "Call rt api failed, ret: 0x%X", _rt_err); \
    }                                                                  \
  } while (false)

@@ -278,7 +278,7 @@
      return (_status);                                                                    \
    }                                                                                      \
  } while (false)

 namespace ge {
 template <typename T>
 GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
  std::string fmt;
@@ -287,5 +287,5 @@ GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) {
  fmt = st.str();
  return fmt;
 }

 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_DEBUG_LOG_H_
--- a/inc/framework/common/file_constant_util.h
+++ b/inc/framework/common/file_constant_util.h
@@ -27,11 +27,6 @@
 #include "graph/ge_tensor.h"

 namespace ge {
 extern const int64_t kBlockSize;
 extern const std::string kBinFileValues;
 extern const std::string kBinIdValue;
 extern const std::string kBinFilePathValue;

 struct FileConstantInfo {
  std::string value_bin_file_id;
  std::string value_bin_file_path;
@@ -47,14 +42,11 @@ void from_json(const nlohmann::json &j, OptionInfo &option_info);

 Status GetFilePathFromOption(std::map<std::string, std::string> &file_id_and_path_map);

 Status CopyOneWeightFromFile(const void *curr_dev_ptr, const std::string &value, const size_t file_constant_size,
 Status CopyOneWeightFromFile(const void *const curr_dev_ptr, const std::string &value, const size_t file_constant_size,
                             size_t &left_size);

 Status GetFilePath(const OpDescPtr &op_desc, const std::map<std::string, std::string> &file_id_and_path_map,
                   std::string &file_path);

 Status GetFileConstantElementTotalSize(const GeShape &shape, const DataType data_type, int64_t &mem_size,
                                       const Format format = FORMAT_ND);
 }  // namespace ge

 #endif  // INC_FRAMEWORK_COMMON_FILE_CONSTANT_UTIL_H
--- a/inc/framework/common/fmk_error_codes.h
+++ b/inc/framework/common/fmk_error_codes.h
@@ -44,7 +44,7 @@
 // Each module uses the following four macros to define error codes:
 #define DECLARE_ERRORNO_OMG(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OMG, (name), (value))
 #define DECLARE_ERRORNO_OME(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_OME, (name), (value))
 #define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, name, value)
 #define DECLARE_ERRORNO_CALIBRATION(name, value) DECLARE_ERRORNO(SYSID_FWK, MODID_CALIBRATION, (name), (value))

 #define DEF_ERRORNO(name, desc) const ErrorNoRegisterar g_##name##_errorno((name), (desc));

@@ -74,7 +74,7 @@ class GE_FUNC_VISIBILITY StatusFactory {

 class GE_FUNC_VISIBILITY ErrorNoRegisterar {
 public:
  ErrorNoRegisterar(uint32_t err, const std::string &desc) {
  ErrorNoRegisterar(const uint32_t err, const std::string &desc) {
    StatusFactory::Instance()->RegisterErrorNo(err, desc);
  }
  ~ErrorNoRegisterar() {}
--- a/inc/framework/common/ge_inner_error_codes.h
+++ b/inc/framework/common/ge_inner_error_codes.h
@@ -22,17 +22,57 @@
 #include <string>
 #include "ge/ge_api_error_codes.h"

 // Each module defines error codes using the following macros, name can not be modified to (name)
 #define GE_ERRORNO_COMMON(name, value, desc)                                                                \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::COMMON_MODULE, name, (value), (desc))
 #define GE_ERRORNO_CLIENT(name, value, desc)                                                                \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::CLIENT_MODULE, name, (value), (desc))
 #define GE_ERRORNO_INIT(name, value, desc)                                                                  \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::INIT_MODULE, name, (value), (desc))
 #define GE_ERRORNO_SESSION(name, value, desc)                                                               \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::SESSION_MODULE, name, (value), (desc))
 #define GE_ERRORNO_GRAPH(name, value, desc)                                                                 \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GRAPH_MODULE, name, (value), (desc))
 #define GE_ERRORNO_ENGINE(name, value, desc)                                                                \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::ENGINE_MODULE, name, (value), (desc))
 #define GE_ERRORNO_OPS(name, value, desc)                                                                   \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::OPS_MODULE, name, (value), (desc))
 #define GE_ERRORNO_PLUGIN(name, value, desc)                                                                \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::PLUGIN_MODULE, name, (value), (desc))
 #define GE_ERRORNO_RUNTIME(name, value, desc)                                                               \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::RUNTIME_MODULE, name, (value), (desc))
 #define GE_ERRORNO_EXECUTOR(name, value, desc)                                                                \
  GE_ERRORNO(ge::InnLogRuntime::RT_DEVICE, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::EXECUTOR_MODULE, name, (value), (desc))
 #define GE_ERRORNO_GENERATOR(name, value, desc)                                                             \
  GE_ERRORNO(ge::InnLogRuntime::RT_HOST, ge::InnErrorCodeType::ERROR_CODE, ge::InnErrorLevel::COMMON_LEVEL, \
             ge::InnSystemIdType::SYSID_GE, ge::InnSubModuleId::GENERATOR_MODULE, name, (value), (desc))

 // Get error code description
 #define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)

 #define RT_ERROR_TO_GE_STATUS(RT_ERROR) static_cast<Status>(RT_ERROR)

 namespace ge {
 // System ID
 enum SystemIdType { SYSID_GE = 8 };
 enum class InnSystemIdType { SYSID_GE = 8 };
 // Runtime location
 enum LogRuntime {
 enum class InnLogRuntime {
  RT_HOST = 0b01,
  RT_DEVICE = 0b10,
 };

 // Sub model
 enum SubModuleId {
 enum class InnSubModuleId {
  COMMON_MODULE = 0,
  CLIENT_MODULE = 1,
  INIT_MODULE = 2,
@@ -47,13 +87,13 @@ enum SubModuleId {
 };

 // Error code type
 enum ErrorCodeType {
 enum class InnErrorCodeType {
  ERROR_CODE = 0b01,
  EXCEPTION_CODE = 0b10,
 };

 // Error level
 enum ErrorLevel {
 enum class InnErrorLevel {
  COMMON_LEVEL = 0b000,
  SUGGESTION_LEVEL = 0b001,
  MINOR_LEVEL = 0b010,
@@ -61,33 +101,6 @@ enum ErrorLevel {
  CRITICAL_LEVEL = 0b100,
 };

 // Each module defines error codes using the following macros, name can not be modified to (name)
 #define GE_ERRORNO_COMMON(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, COMMON_MODULE, name, (value), (desc))
 #define GE_ERRORNO_CLIENT(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, CLIENT_MODULE, name, (value), (desc))
 #define GE_ERRORNO_INIT(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, INIT_MODULE, name, (value), (desc))
 #define GE_ERRORNO_SESSION(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, SESSION_MODULE, name, (value), (desc))
 #define GE_ERRORNO_GRAPH(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GRAPH_MODULE, name, (value), (desc))
 #define GE_ERRORNO_ENGINE(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, ENGINE_MODULE, name, (value), (desc))
 #define GE_ERRORNO_OPS(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, OPS_MODULE, name, (value), (desc))
 #define GE_ERRORNO_PLUGIN(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, PLUGIN_MODULE, name, (value), (desc))
 #define GE_ERRORNO_RUNTIME(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, RUNTIME_MODULE, name, (value), (desc))
 #define GE_ERRORNO_EXECUTOR(name, value, desc) \
  GE_ERRORNO(RT_DEVICE, ERROR_CODE, COMMON_LEVEL, SYSID_GE, EXECUTOR_MODULE, name, (value), (desc))
 #define GE_ERRORNO_GENERATOR(name, value, desc) \
  GE_ERRORNO(RT_HOST, ERROR_CODE, COMMON_LEVEL, SYSID_GE, GENERATOR_MODULE, name, (value), (desc))

 // Get error code description
 #define GE_GET_ERRORNO_STR(value) ge::StatusFactory::Instance()->GetErrDesc(value)

 // Common module error code definition
 GE_ERRORNO_COMMON(MEMALLOC_FAILED, 0, "Failed to allocate memory!");  // 1343225856
 GE_ERRORNO_COMMON(PARAM_INVALID, 1, "Parameter's invalid!");          // 1343225857
@@ -313,10 +326,6 @@ GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED, 3, "Graph ma
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_FINALIZE_FAILED, 4, "Graph manager finalize failed.");
 GE_ERRORNO_GENERATOR(GE_GENERATOR_GRAPH_MANAGER_SAVE_MODEL_FAILED, 5, "Graph manager save model failed.");

 static inline Status TransRtErrorCode(const int32_t error_code) {
  return static_cast<Status>(error_code);
 }
 #define RT_ERROR_TO_GE_STATUS(RT_ERROR) TransRtErrorCode(RT_ERROR)
 }  // namespace ge

 #endif  // INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -40,13 +40,13 @@ enum FrameworkType {
  CAFFE = 0,
  MINDSPORE = 1,
  TENSORFLOW = 3,
  ANDROID_NN,
  ONNX,
  ANDROID_NN = 4,
  ONNX = 5,
 };

 enum class GraphStage : int64_t { GRAPH_STAGE_FUZZ = 0, GRAPH_STAGE_RESERVED };

 const char *const kGraphDumpStage = "DumpStage";
 const char_t *const kGraphDumpStage = "DumpStage";

 const std::map<std::string, std::string> kFwkTypeToStr = {
    {"0", "Caffe"}, {"1", "MindSpore"}, {"3", "TensorFlow"}, {"4", "Android_NN"}, {"5", "Onnx"}};
@@ -70,21 +70,42 @@ const std::string kTaskTypeAicore = "AI_CORE";
 const std::string kTaskTypeAicpu = "AI_CPU";
 const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID";
 const std::string kTaskTypeFftsPlus = "FFTS_PLUS";
 const std::string kEngineNameVectorCore = "VectorEngine";

 const std::string kEngineNameHccl = "ops_kernel_info_hccl";
 const std::string kEngineNameRts = "DNN_VM_RTS_OP_STORE";
 const std::string kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE";
 const std::string kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE";
 const std::string kEngineNameAiCpu = "aicpu_ascend_kernel";
 const std::string kEngineNameAiCpuTf = "aicpu_tf_kernel";
 const std::string kEngineNameAiCore = "AIcoreEngine";
 const std::string kAtomicOpType = "DynamicAtomicAddrClean";

 const std::string kShapeTypeStatic = "static";
 const std::string kShapeTypeDynamic = "dynamic";

 constexpr uint64_t kInferSessionId = 0U;
 constexpr uint64_t kReleaseFlag = 1U;
 constexpr uint32_t kInvalidModelId = 0xFFFFFFFFU;
 constexpr size_t kNumTaskWithAtomicAddrCleanTask = 2U;

 // dynamic execute mode
 const char_t *const kLazyRecompile = "lazy_recompile";

 constexpr size_t kMaxHostMemInputLen = 64U;

 // Data cache, including data address and length
 struct DataBuffer {
 public:
  void *data;       // Data address
  uint64_t length;  // Data length
  bool isDataSupportMemShare = false;
  uint32_t placement = 0U;
  DataBuffer(void *data_in, uint64_t data_len, bool is_support_mem_share, uint32_t placement = 0U)
      : data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(placement) {}

  DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false) {}
  DataBuffer(void *const data_in, const uint64_t data_len, const bool is_support_mem_share = false,
             const uint32_t data_placement = 0U)
      : data(data_in), length(data_len), isDataSupportMemShare(is_support_mem_share), placement(data_placement) {}

  DataBuffer() : data(nullptr), length(0UL), isDataSupportMemShare(false), placement(0U) {}
 };

 ///
@@ -232,6 +253,9 @@ struct ModelInfo {
 class GE_FUNC_VISIBILITY ModelListener {
 public:
  virtual ~ModelListener() {}
  ModelListener() = default;
  ModelListener(const ModelListener &) = delete;
  ModelListener &operator=(const ModelListener &) = delete;
  ///
  /// @brief Asynchronous callback interface
  /// @param [in] model_id   Model ID of the callback
@@ -241,7 +265,9 @@ class GE_FUNC_VISIBILITY ModelListener {
  virtual Status OnComputeDone(uint32_t model_id, uint32_t data_index, uint32_t result_code,
                               std::vector<ge::Tensor> &outputs) = 0;

  virtual void SetCallback(const RunAsyncCallback &callback){};
  virtual void SetCallback(const RunAsyncCallback &callback) {
    (void)callback;
  }

  virtual uint32_t GetResultCode() {
    return 0U;
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -34,12 +34,13 @@ class GE_FUNC_VISIBILITY ModelHelper {
  ~ModelHelper();

  Status SaveToOmModel(const GeModelPtr &ge_model, const SaveParam &save_param, const std::string &output_file,
                       ge::ModelBufferData &model);
                       ge::ModelBufferData &model) const;
  Status SaveToOmRootModel(const GeRootModelPtr &ge_root_model, const SaveParam &save_param,
                           const std::string &output_file, ModelBufferData &model, const bool is_unknown_shape);
  Status SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::string &output_file);
  Status LoadModel(const ge::ModelData &model_data);
  Status LoadRootModel(const ge::ModelData &model_data);
  static void SetModelToGeModel(GeModelPtr &ge_model, Model &model);

  GeModelPtr GetGeModel();
  GeRootModelPtr GetGeRootModel();
@@ -67,7 +68,6 @@ class GE_FUNC_VISIBILITY ModelHelper {
  Status GenerateGeModel(OmFileLoadHelper &om_load_helper);
  Status GenerateGeRootModel(OmFileLoadHelper &om_load_helper);
  Status LoadModelData(OmFileLoadHelper &om_load_helper);
  void SetModelToGeModel(GeModelPtr &ge_model, Model &model) const;
  Status LoadModelData(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
  Status LoadWeights(OmFileLoadHelper &om_load_helper);
  Status LoadWeights(OmFileLoadHelper &om_load_helper, GeModelPtr &cur_model, const size_t mode_index) const;
--- a/inc/framework/common/helper/om_file_helper.h
+++ b/inc/framework/common/helper/om_file_helper.h
@@ -21,25 +21,20 @@
 #include <vector>

 #include "external/ge/ge_ir_build.h"
 #include "framework/common/fmk_types.h"
 #include "framework/common/types.h"
 #include "framework/common/ge_types.h"

 using ProcParam = struct PROC_PARAM;
 using std::string;
 using std::vector;

 namespace ge {
 struct ModelPartition {
  ModelPartitionType type;
  uint8_t *data = 0;
  uint32_t size = 0;
  const uint8_t *data = nullptr;
  uint32_t size = 0U;
 };

 struct OmFileContext {
  std::vector<ModelPartition> partition_datas_;
  std::vector<char> partition_table_;
  uint32_t model_data_len_ = 0;
  std::vector<char_t> partition_table_;
  uint32_t model_data_len_ = 0U;
 };

 struct SaveParam {
@@ -55,13 +50,13 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper {
 public:
  Status Init(const ge::ModelData &model);

  Status Init(uint8_t *model_data, const uint32_t model_data_size);
  Status Init(uint8_t *const model_data, const uint32_t model_data_size);

  Status Init(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num);
  Status Init(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num);

  Status GetModelPartition(ModelPartitionType type, ModelPartition &partition);
  Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition);

  Status GetModelPartition(ModelPartitionType type, ModelPartition &partition, size_t model_index);
  Status GetModelPartition(const ModelPartitionType type, ModelPartition &partition, const size_t model_index);

  OmFileContext context_;

@@ -70,9 +65,9 @@ class GE_FUNC_VISIBILITY OmFileLoadHelper {
 private:
  Status CheckModelValid(const ge::ModelData &model) const;

  Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size);
  Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size);

  Status LoadModelPartitionTable(uint8_t *model_data, const uint32_t model_data_size, uint32_t model_num);
  Status LoadModelPartitionTable(uint8_t *const model_data, const uint32_t model_data_size, const uint32_t model_num);

  bool is_inited_{false};
 };
@@ -89,25 +84,24 @@ class GE_FUNC_VISIBILITY OmFileSaveHelper {

  ModelPartitionTable *GetPartitionTable();

  Status AddPartition(ModelPartition &partition);

  Status AddPartition(ModelPartition &partition, size_t cur_index);
  Status AddPartition(const ModelPartition &partition);

  const std::vector<ModelPartition> &GetModelPartitions() const;
  Status AddPartition(const ModelPartition &partition, const size_t cur_index);

  Status SaveModel(const SaveParam &save_param, const char *target_file, ge::ModelBufferData &model,
                   bool is_offline = true);
  Status SaveModel(const SaveParam &save_param, const char_t *const output_file, ge::ModelBufferData &model,
                   const bool is_offline = true);

  Status SaveModelToFile(const char *output_file, ge::ModelBufferData &model, bool is_offline = true);
  Status SaveModelToFile(const char_t *const output_file, ge::ModelBufferData &model, const bool is_offline = true);

  std::vector<OmFileContext> model_contexts_;

  ModelFileHeader model_header_;
  OmFileContext context_;

  ModelPartitionTable *GetPartitionTable(size_t cur_ctx_index);
  ModelPartitionTable *GetPartitionTable(const size_t cur_ctx_index);

  Status SaveRootModel(const SaveParam &save_param, const char *output_file, ModelBufferData &model, bool is_offline);
  Status SaveRootModel(const SaveParam &save_param, const char_t *const output_file, ModelBufferData &model,
                       const bool is_offline);
 };
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_
--- a/inc/framework/common/l2_cache_optimize.h
+++ b/inc/framework/common/l2_cache_optimize.h
@@ -28,97 +28,13 @@
 #include "framework/common/util.h"
 #include "graph/compute_graph.h"

 using std::vector;

 namespace ge {
 // Size of RC memory alignment, 2M
 constexpr size_t ALIGN_SIZE = 2097152;

 constexpr uint32_t RC_VALUE_DEFAULT = 1;
 constexpr uint32_t RC_VALUE_MAX = 32;

 // RC data type classification
 enum RCType {
  RC_DEFAULT,      // Such as temporary workspace memory of operator, variable (including global and local variable)
  RC_HCOM,         // Output of gradient aggregation, RC value should be set to 0
  RC_L2LOSS,       // Parameter of L2 loss operator, RC value should be set to 0
  RC_INPUTOUTPUT,  // Input and output tensor of operator, RC value is returned by FE calculation
  RC_WEIGHTS,      // The weight, fp16, RC value used by FP/BP operator should be set to 1 or the actual access numbers
  RC_DW,           // The gradient data DW and RC value output by BP operator
                   // should be set to 1 or the actual access numbers
  RC_ARGS          // Args of FlowTable, actual access numbers
 };

 enum MemType { INPUT_TENSOR, OUTPUT_TENSOR, WEIGHT, WORKSPACE };

 // Memory usage information < node, type, number >
 struct NodeInfo {
  std::string nodeName;
  MemType memType;
  size_t index;
 };

 // Memory block RC value
 struct RCMemoryBlock {
  RCType type;        // RC type
  size_t blockSize;   // memory block size
  size_t headOffset;  // Start offset from base address
  size_t tailOffset;  // End offset from base address
  uint32_t rcCount;   // RC value
  NodeInfo nodeInfo;  // Input and output indexes of node objects to which RC belongs
 };

 // L2Cache optimizer
 class GE_FUNC_VISIBILITY L2CacheOptimize {
 public:
  explicit L2CacheOptimize(ge::ComputeGraphPtr &graph);
  ~L2CacheOptimize();

  // Collect the information L2Cache Memory optimization
  Status Gath();

 private:
  ge::ComputeGraphPtr graph_;

  // Save RC block information list
  std::vector<RCMemoryBlock> weightRCs;
  std::vector<RCMemoryBlock> opRCs;

  // Extract RC information generated by FE from compiled graph
  void RetirveRCinfo();

  // Take the maximum common divisor of RC values for the duplicate address
  void Merge(std::vector<RCMemoryBlock> &blocks);

  // The RC information is aligned with the 2m address
  void Align(std::vector<RCMemoryBlock> &blocks);

  // Weight of l2loss operator, output of gradient aggregation output, RC value set to 0
  void HandleOutputZeroRC(RCType type, ge::NodePtr node, std::vector<int64_t> &outputList,
                          std::vector<RCMemoryBlock> &blocks);

  // Processing operator input Tensor's RC
  void HandOPInput(ge::NodePtr node, std::vector<int64_t> &inputList, std::vector<RCMemoryBlock> &blocks);

  // Processing operator output Tensor's RC
  void HandOPoutput(ge::NodePtr node, std::vector<int64_t> &outputList, std::vector<RCMemoryBlock> &blocks);
 constexpr size_t ALIGN_SIZE = 2097152U;

  // maximum common divisor
  uint32_t Measure(uint32_t x, uint32_t y) {
    if ((x == 0) || (y == 0)) return RC_VALUE_DEFAULT;
    uint32_t z = y;
    while (x % y != 0) {
      z = x % y;
      x = y;
      y = z;
    }
    return z;
  }
 constexpr uint32_t RC_VALUE_DEFAULT = 1U;
 constexpr uint32_t RC_VALUE_MAX = 32U;

  bool Contain(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
  bool Cross(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
  bool Connect(const RCMemoryBlock &l_block, const RCMemoryBlock &r_block);
 };
 }  // namespace ge

 #endif  // INC_FRAMEWORK_COMMON_L2_CACHE_OPTIMIZE_H_
--- a/inc/framework/common/op/attr_value_util.h
+++ b/inc/framework/common/op/attr_value_util.h
@@ -34,143 +34,11 @@
 #include <google/protobuf/map.h>
 #include <unordered_map>
 #include <string>

 #include "external/graph/types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "proto/om.pb.h"

 using domi::AttrDef;
 using domi::AttrDef_ListValue;
 using domi::ModelDef;
 using domi::NamedAttrs;
 using domi::OpDef;

 namespace ge {
 using AttrDefMap = ::google::protobuf::Map<::std::string, ::domi::AttrDef>;
 using AttrDefPair = ::google::protobuf::MapPair<std::string, domi::AttrDef>;

 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, AttrDef &attr, OpDef *opdef);
 // DEFINE_ADD_ATTR_VALUE
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, AttrDefMap *attrs);

 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, AttrDefMap *attrs);

 // DEFINE_ADD_ATTR_VALUE
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const std::string &value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const char *value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const char *key, const char *value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const uint32_t value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int32_t value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const int64_t value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const float value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const double value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const bool value, OpDef *opdef);

 GE_FUNC_VISIBILITY void AddOpAttr(const std::string &key, const AttrDef_ListValue &value, OpDef *opdef);

 GE_FUNC_VISIBILITY void AddOpBytesAttr(const std::string &key, const void *value, size_t size, OpDef *opdef);

 // DEFINE_ADD_ATTR_VALUE_LIST
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string value, AttrDefMap *attrs);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const double value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const float value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const uint32_t value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int32_t value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const bool value, OpDef *opdef);
 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const int64_t value, OpDef *opdef);

 GE_FUNC_VISIBILITY void AddOpAttrList(const std::string &key, const std::string &value, OpDef *opdef);

 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, std::string *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int32_t *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, int64_t *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, uint32_t *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, float *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, double *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, bool *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetOpAttr(const std::string &key, AttrDef_ListValue *value, const OpDef *opdef);

 GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, std::string value, const OpDef *opdef);
 GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int32_t value, const OpDef *opdef);
 GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, int64_t value, const OpDef *opdef);
 GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, uint32_t value, const OpDef *opdef);
 GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, float value, const OpDef *opdef);
 GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, double value, const OpDef *opdef);
 GE_FUNC_VISIBILITY uint32_t GetOpAttrListSize(const std::string &key, bool value, const OpDef *opdef);

 GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const OpDef *opdef);
 GE_FUNC_VISIBILITY bool GetBytesAttr(const std::string &key, std::string *value, const ModelDef *model_def);

 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const std::string &value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const char *value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const char *key, const char *value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const uint32_t value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int32_t value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const int64_t value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const float value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const double value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const bool value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const void *value, size_t size, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttr(const std::string &key, const AttrDef_ListValue &value, ModelDef *model_def);

 GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const double value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const float value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const uint32_t value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const int32_t value, ModelDef *model_def);
 GE_FUNC_VISIBILITY void AddModelAttrList(const std::string &key, const std::string &value, ModelDef *model_def);

 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, std::string *value, const ModelDef *model_def);
 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int32_t *value, const ModelDef *model_def);
 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, int64_t *value, const ModelDef *model_def);
 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, uint32_t *value, const ModelDef *model_def);
 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, float *value, const ModelDef *model_def);
 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, double *value, const ModelDef *model_def);
 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, bool *value, const ModelDef *model_def);
 GE_FUNC_VISIBILITY bool GetModelAttr(const std::string &key, AttrDef_ListValue *value, const ModelDef *model_def);

 GE_FUNC_VISIBILITY bool HasOpAttr(const OpDef *opdef, const std::string &attr_name);

 GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrDef(const char *value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrDef(const uint32_t value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrDef(const int32_t value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrDef(const float value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrDef(const double value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrDef(const bool value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrList(const std::string &value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrList(const bool value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrList(const float value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrList(const double value, AttrDef *out);
 GE_FUNC_VISIBILITY void SetAttrList(const uint32_t value, AttrDef *out);

 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, std::string *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int32_t *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, int64_t *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, uint32_t *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, float *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, double *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, bool *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, AttrDef_ListValue *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, NamedAttrs *&value, AttrDefMap *attr);
 GE_FUNC_VISIBILITY bool GetAttrDefValue(const std::string &key, const NamedAttrs *&value, const AttrDefMap &attr);

 GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, int32_t *value,
                                            const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, uint32_t *value,
                                            const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, float *value, const AttrDefMap &attr);
 GE_FUNC_VISIBILITY bool GetAttrDefListValue(const std::string &key, int32_t idx, double *value, const AttrDefMap &attr);
 }  // namespace ge

 #endif  // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
 GE_FUNC_VISIBILITY void SetAttrDef(const std::string &value, domi::AttrDef *const out);
 }
 #endif  // INC_FRAMEWORK_COMMON_OP_ATTR_VALUE_UTIL_H_
--- a/inc/framework/common/op/ge_op_utils.h
+++ b/inc/framework/common/op/ge_op_utils.h
@@ -31,18 +31,16 @@
 #include "proto/insert_op.pb.h"

 namespace ge {
 using domi::Status;

 // Add Sub Mul
 GE_FUNC_VISIBILITY extern const uint32_t ADD_INPUT_NUM;
 GE_FUNC_VISIBILITY extern const uint32_t SUB_INPUT_NUM;
 GE_FUNC_VISIBILITY extern const uint32_t MUL_INPUT_NUM;

 // Permute
 GE_FUNC_VISIBILITY extern const int32_t PERMUTE_ORDER_NUM;

 // Ssd PriroBox
 GE_FUNC_VISIBILITY extern const double SSD_PRIORBOX_ASPECT_RATIO_VALUE;
 GE_FUNC_VISIBILITY extern const float64_t SSD_PRIORBOX_ASPECT_RATIO_VALUE;

 GE_FUNC_VISIBILITY extern const uint32_t STRIDEDSLICE_INPUT_NUM;

@@ -55,8 +53,8 @@ GE_FUNC_VISIBILITY extern const uint32_t SWITCH_DATA_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t SWITCH_PRED_INPUT;

 // Merge
 GE_FUNC_VISIBILITY extern const uint32_t MERGE_DATA_OUTPUT;
 GE_FUNC_VISIBILITY extern const uint32_t MERGE_INDEX_OUTPUT;
 GE_FUNC_VISIBILITY extern const int32_t MERGE_DATA_OUTPUT;
 GE_FUNC_VISIBILITY extern const int32_t MERGE_INDEX_OUTPUT;

 // FunctionOp
 GE_FUNC_VISIBILITY extern const uint32_t IF_COND_INPUT;
@@ -66,86 +64,35 @@ GE_FUNC_VISIBILITY extern const uint32_t FOR_DELTA_INPUT;
 GE_FUNC_VISIBILITY extern const uint32_t FOR_DATA_INPUT;

 GE_FUNC_VISIBILITY extern const int32_t NORMAL_TENSOR_SIZE;

 /*lint -e148*/
 class GE_FUNC_VISIBILITY OpUtils {
 public:
  ///
  /// @ingroup domi_ome
  /// @brief Check whether check_value is in [min_enum_value, max_enum_value]
  /// @return true Within
  /// @return false out of range
  //
  static inline bool CheckEnumValid(int32_t check_value, int32_t min_enum_value, int32_t max_enum_value) {
    return check_value < min_enum_value ? false : (check_value >= max_enum_value ? false : true);
  }

  ///
  /// @ingroup domi_omg
  /// @brief Determine whether to manually calculate the tensor size based on the values of format and dim
  /// @param [in] format, Format information of the tensor
  /// @param [in] real_dim_cnt, Tensor dim
  /// @return true Manually calculate the size based on dim and datatype
  /// @return false skip
  ///
  static bool IsComputDimsSize(const int32_t format, const uint32_t real_dim_cnt);

  ///
  /// @brief Extract AIPP parameters from AttrDefMap and splice them
  /// @param [in] aipp_attr attr of operator
  /// @param [out] aipp_params aipp parameters
  /// @return enum of tagCCAippInputFormat
  ///
  static Status ConvertAippParams(const NamedAttrs &aipp_attr, domi::AippOpParams *aipp_params);
  static Status TransferDim(const std::vector<int64_t> &dim, std::vector<int64_t> &dim_vector);

  static Status ConvertAippParams(const GeAttrValue::NamedAttrs &aipp_attr, domi::AippOpParams &aipp_params);
  template <typename T>
  static void SliceData(const std::vector<char *> &input, int64_t chunk_size, std::vector<char *> &output,
                        int64_t begin, int64_t out_dim, int64_t stride);
  static void SliceData(const std::vector<char_t *> &input, const int64_t chunk_size, std::vector<char_t *> &output,
                        const int64_t begin, const int64_t out_dim, const int64_t stride);
  template <typename T>
  static Status SetDataByDataType(size_t out_size, const std::vector<char *> &chunk_input,
                                  const std::vector<char *> &chunk_output, GeTensor *output);
  static Status SetDataByDataType(const size_t out_size, const std::vector<char_t *> &chunk_input,
                                  const std::vector<char_t *> &chunk_output, GeTensor *const output);
  template <typename T>
  static Status SetOutputSliceDataByDataType(void *data, int64_t data_size, const std::vector<int64_t> &input_dims,
                                             const std::vector<int64_t> &begin, const std::vector<int64_t> &output_dims,
                                             ge::GeTensor *output, const std::vector<int64_t> &stride);
  static Status SetOutputSliceData(void *data, int64_t data_size, int32_t data_type,
  static Status SetOutputSliceDataByDataType(void *const data, const int64_t data_size,
                                             const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin,
                                             const std::vector<int64_t> &output_dims, ge::GeTensor *const output,
                                             const std::vector<int64_t> &stride);
  static Status SetOutputSliceData(void *const data, const int64_t data_size, const int32_t data_type,
                                   const std::vector<int64_t> &input_dims, const std::vector<int64_t> &begin,
                                   const std::vector<int64_t> &output_dims, ge::GeTensor *const output,
                                   const std::vector<int64_t> &output_dims, GeTensor *const output,
                                   const std::vector<int64_t> &stride);

  ///
  /// @ingroup domi_omg
  /// @brief Convert the convolutional weight data from [h, w, c, k] to [k, c, h, w]
  /// @param [in] input Weight data in HWCK format
  /// @param [in] H value of H dimension
  /// @param [in] W value of W dimension
  /// @param [in] C value of C dimension
  /// @param [in] K value of K dimension
  /// @param [out] output Data pointer after conversion. The format is KCHW.
  ///
  static void TransDataHWCK2KCHW(const void *input, int64_t h, int64_t w, int64_t c, int64_t k, void **output);
  ///
  /// @ingroup domi_omg
  /// @brief Converts the convolutional weight data from [k, c, h, w] to [h, w, c, k].
  /// @param [in] input Weight data in HWCK format
  /// @param [in] K value of K dimension
  /// @param [in] C value of C dimension
  /// @param [in] H value of H dimension
  /// @param [in] W value of W dimension
  /// @param [out] output Data pointer after conversion. The format is HWCK
  ///
  static void TransDataKCHW2HWCK(const void *input, int64_t k, int64_t c, int64_t h, int64_t w, void *output);

  static std::vector<ConstGeTensorPtr> GetWeights(const ge::Node &node);
  static std::vector<ConstGeTensorPtr> GetWeights(ge::ConstNodePtr node);
  static std::vector<GeTensorPtr> MutableWeights(const ge::Node &node);
  static std::vector<GeTensorPtr> MutableWeights(const ge::NodePtr node);
  static Status SetWeights(ge::Node &node, const std::vector<ge::GeTensorPtr> &weights);
  static Status SetWeights(const ge::NodePtr node, const std::vector<ge::GeTensorPtr> &weights);
  static Status GetShapeDataFromConstTensor(const ConstGeTensorPtr &tensor, const DataType type,
                                            std::vector<int64_t> &dims);

 private:
  static uint32_t GetRealDimCnt(const GeTensorDesc &tensor_desc);
 };
 /*lint +e148*/
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_OP_GE_OP_UTILS_H_
--- a/inc/framework/common/op_types.h
+++ b/inc/framework/common/op_types.h
@@ -20,6 +20,8 @@
 #include <set>
 #include <string>

 #include "graph/types.h"

 namespace ge {
 class GE_FUNC_VISIBILITY OpTypeContainer {
 public:
@@ -30,12 +32,11 @@ class GE_FUNC_VISIBILITY OpTypeContainer {
  ~OpTypeContainer() = default;

  void Register(const std::string &op_type) {
    op_type_list_.insert(op_type);
    static_cast<void>(op_type_list_.insert(op_type));
  }

  bool IsExisting(const std::string &op_type) {
    auto iter_find = op_type_list_.find(op_type);
    return iter_find != op_type_list_.end();
    return op_type_list_.find(op_type) != op_type_list_.end();
  }

 protected:
@@ -47,20 +48,19 @@ class GE_FUNC_VISIBILITY OpTypeContainer {

 class GE_FUNC_VISIBILITY OpTypeRegistrar {
 public:
  explicit OpTypeRegistrar(const std::string &op_type) {
  explicit OpTypeRegistrar(const std::string &op_type) noexcept {
    OpTypeContainer::Instance()->Register(op_type);
  }
  ~OpTypeRegistrar() {}
 };
 }  // namespace ge

 #define REGISTER_OPTYPE_DECLARE(var_name, str_name) \
  FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char *var_name;
  FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const char_t *var_name;

 #define REGISTER_OPTYPE_DEFINE(var_name, str_name) \
  const char *var_name = str_name;                 \
  const OpTypeRegistrar g_##var_name##_reg(str_name);

 #define IS_OPTYPE_EXISTING(str_name) (OpTypeContainer::Instance()->IsExisting(str_name))
 }  // namespace ge
  const char_t *var_name = str_name;               \
  const ge::OpTypeRegistrar g_##var_name##_reg(str_name);

 #define IS_OPTYPE_EXISTING(str_name) (ge::OpTypeContainer::Instance()->IsExisting(str_name))
 #endif  // INC_FRAMEWORK_COMMON_OP_TYPES_H_
--- a/inc/framework/common/profiling/ge_profiling.h
+++ b/inc/framework/common/profiling/ge_profiling.h
@@ -24,10 +24,8 @@
 /// @brief Output the profiling data of single operator in Pytorch, and does not support multithreading
 /// @return Status result
 ///
 GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(uint64_t index_id, uint16_t tag_id, rtStream_t stream);
 GE_FUNC_VISIBILITY ge::Status ProfSetStepInfo(const uint64_t index_id, const uint16_t tag_id, rtStream_t const stream);

 GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(uint32_t graph_id, uint32_t &device_id);

 GE_FUNC_VISIBILITY void ProfSetGraphIdToDeviceMap(uint32_t graph_id, uint32_t &device_id);
 GE_FUNC_VISIBILITY ge::Status ProfGetDeviceFormGraphId(const uint32_t graph_id, uint32_t &device_id);

 #endif  // INC_FRAMEWORK_COMMON_GE_PROFILING_H_
--- a/inc/framework/common/profiling_definitions.h
+++ b/inc/framework/common/profiling_definitions.h
@@ -0,0 +1,173 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef AIR_CXX_PROFILING_DEFINITIONS_H
 #define AIR_CXX_PROFILING_DEFINITIONS_H
 #include <string>
 #include <iostream>
 #include <mutex>
 #include <unordered_map>
 #include "graph/profiler.h"
 #include "external/ge/ge_api_types.h"
 #include "toolchain/prof_callback.h"
 namespace ge {
 namespace profiling {
 enum {
  kAclCompileAndExecute,
  kAclMatchOpModel,
  kAclMatchStaticOpModel,
  kAclMatchDynamicOpModel,
  kAclExecuteAsync,
  kAclLoadSingleOp,
  kAclBuildOpModel,
  kInferShape,
  kTiling,
  kUpdateShape,
  kConstPrepare,
  kInitHybridExecuteArgs,
  kInitInferShapeContext,
  kDestroyInferShapeContext,
  kResetSubgraphExecutor,
  kCommitInferShapeTask,
  kDeviceToHost,
  kPrepareTask,
  kLaunchTask,
  kCommitTilingTask,
  kAtomic,
  kKernelLaunchPrepare,
  kRtKernelLaunch,
  kOpExecute,
  kAllocMem,
  kCopyH2D,

  // Add new definitions here
  kProfilingIndexEnd
 };
 constexpr uint64_t kInvalidHashId = 0UL;

 class ProfilingContext {
 public:
  static bool IsDumpToStdEnabled();
  static ProfilingContext &GetInstance();
  ProfilingContext();
  ~ProfilingContext();

  /*
   * 还有一种思路是`IsEnabled`只判断profiler_是否为空指针，不再设置单独的enabled标记位，这样可以少一个标记位。
   * 但是这么做就意味着，profiler_实例在未使能profiling时，必须是空指针状态。
   * 为了性能考虑，profiling机制在编译和加载时，就会调用`RegisterString`，向profiler_注册字符串，后续执行时，只会使用注册好的index了。
   * 因此存在一种场景：编译时并未使能profiling（因为编译时间很长，使能profiling也无法真实反应执行时的耗时状态），
   * 因此编译时注册字符串的动作并没有生效。在执行时，动态的打开了profiling，这种场景下，执行时无法拿到注册后字符串
   */
  bool IsEnabled() const noexcept {
    return enabled_ && profiler_ != nullptr;
  }
  void SetEnable() noexcept {
    enabled_ = true;
  }
  void SetDisable() noexcept {
    enabled_ = false;
  }

  void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et,
                           const std::chrono::time_point<std::chrono::system_clock> time_point) {
    if (IsEnabled()) {
      profiler_->RecordCurrentThread(element, event, et, time_point);
    }
  }

  void RecordCurrentThread(const int64_t element, const int64_t event, const EventType et) {
    RecordCurrentThread(element, event, et, std::chrono::system_clock::now());
  }

  const Profiler *GetProfiler() const {
    return profiler_.get();
  }

  void Dump(std::ostream &out_stream) const {
    if (IsEnabled()) {
      profiler_->Dump(out_stream);
    } else {
      out_stream << "Profiling not enable, skip to dump" << std::endl;
    }
  }

  void DumpToStdOut() const {
    Dump(std::cout);
  }

  void Reset() {
    if (IsEnabled()) {
      profiler_->Reset();
    }
  }

  int64_t RegisterString(const std::string &str);
  int64_t RegisterStringHash(const uint64_t hash_id, const std::string &str);
  void UpdateElementHashId(const MsprofReporterCallback reporter_callback);
  static Status QueryHashId(const MsprofReporterCallback reporter_callback, const std::string &src_str,
                            uint64_t &hash_id);
  size_t GetRegisterStringNum() const {
    return strings_to_index_.size();
  }

  void Init();

 private:
  void UpdateHashByStr(const std::string &str, const uint64_t hash);

 private:
  bool inited_;
  bool enabled_;
  int64_t str_index_;
  std::unordered_map<std::string, int64_t> strings_to_index_;
  std::mutex strings_to_index_mutex_;
  std::unique_ptr<Profiler> profiler_;
 };

 class ScopeProfiler {
 public:
  ScopeProfiler(const int64_t element, const int64_t event) : element_(element), event_(event) {
    if (ProfilingContext::GetInstance().IsEnabled()) {
      start_trace_ = std::chrono::system_clock::now();
    }
  }
  ~ScopeProfiler() {
    if (ProfilingContext::GetInstance().IsEnabled()) {
      ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventStart, start_trace_);
      ProfilingContext::GetInstance().RecordCurrentThread(element_, event_, EventType::kEventEnd);
    }
  }
  void SetElement(const int64_t element) {
    element_ = element;
  }

 private:
  std::chrono::time_point<std::chrono::system_clock> start_trace_;
  int64_t element_;
  int64_t event_;
 };
 }  // namespace profiling
 }  // namespace ge
 #define PROFILING_START(element, event)                                                  \
  ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \
                                                                     ge::profiling::EventType::kEventStart)
 #define PROFILING_END(element, event)                                                    \
  ge::profiling::ProfilingContext::GetInstance().RecordCurrentThread((element), (event), \
                                                                     ge::profiling::EventType::kEventEnd)
 #define PROFILING_SCOPE(element, event) ge::profiling::ScopeProfiler profiler((element), (event))
 #define PROFILING_SCOPE_ELEMENT(element) profiler.SetElement((element))
 #endif  // AIR_CXX_PROFILING_DEFINITIONS_H
--- a/inc/framework/common/scope_guard.h
+++ b/inc/framework/common/scope_guard.h
@@ -25,9 +25,9 @@
 /// MAKE_GUARD([&] { Release Resource 1 })
 /// Acquire Resource 2
 // MAKE_GUARD([&] { Release Resource 2 })
 #define GE_MAKE_GUARD(var, callback) const ScopeGuard const_guard_##var(callback)
 #define GE_MAKE_GUARD(var, callback) const ::ge::ScopeGuard const_guard_##var(callback)

 #define GE_DISMISSABLE_GUARD(var, callback) ScopeGuard make_guard_##var(callback)
 #define GE_DISMISSABLE_GUARD(var, callback) ::ge::ScopeGuard make_guard_##var(callback)
 #define GE_DISMISS_GUARD(var) make_guard_##var.Dismiss()

 namespace ge {
@@ -44,7 +44,7 @@ class GE_FUNC_VISIBILITY ScopeGuard {
      if (on_exit_scope_ != nullptr) {
        try {
          on_exit_scope_();
        } catch (std::bad_function_call &e) {
        } catch (std::bad_function_call &) {
        } catch (...) {
        }
      }
--- a/inc/framework/common/string_util.h
+++ b/inc/framework/common/string_util.h
@@ -39,13 +39,14 @@
 #include <sstream>
 #include <string>
 #include <vector>
 #include "graph/types.h"

 namespace ge {
 class GE_FUNC_VISIBILITY StringUtils {
 public:
  static std::string &Ltrim(std::string &s) {
 #if __cplusplus >= 201103L
    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int32_t c) { return std::isspace(c) == 0; }));
    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](const int32_t c) { return std::isspace(c) == 0; }));
 #else
    (void)s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))));
 #endif
@@ -54,7 +55,8 @@ class GE_FUNC_VISIBILITY StringUtils {
  // lint -esym(551,*)
  static std::string &Rtrim(std::string &s) { /*lint !e618*/
 #if __cplusplus >= 201103L
    (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](int32_t c) { return std::isspace(c) == 0; }).base(), s.end());
    (void)s.erase(std::find_if(s.rbegin(), s.rend(), [](const int32_t c) { return std::isspace(c) == 0; }).base(),
                  s.end());
 #else
    (void)s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int32_t, int32_t>(std::isspace))).base(),
                  s.end());
@@ -79,7 +81,7 @@ class GE_FUNC_VISIBILITY StringUtils {
  ///  @param [in] delim  separator
  ///  @return string array after segmentation
  ///
  static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, char delim) {
  static std::vector<std::string, std::allocator<std::string>> Split(const std::string &str, const char_t delim) {
    std::vector<std::string, std::allocator<std::string>> elems;

    if (str.empty()) {
@@ -94,8 +96,8 @@ class GE_FUNC_VISIBILITY StringUtils {
      elems.push_back(item);
    }

    auto str_size = str.size();
    if ((str_size > 0) && (str[str_size - 1] == delim)) {
    const auto str_size = str.size();
    if ((str_size > 0U) && (str[str_size - 1U] == delim)) {
      elems.emplace_back("");
    }

@@ -107,13 +109,13 @@ class GE_FUNC_VISIBILITY StringUtils {
  ///  @param [in] s path name
  ///  @return file name
  ///
  static std::string GetFileName(std::string &s) {
  static std::string GetFileName(const std::string &s) {
    if (s.empty()) {
      return "";
    }
    std::vector<std::string> files = StringUtils::Split(s, '/');
    const std::vector<std::string> files = StringUtils::Split(s, '/');

    return files.empty() ? "" : files[files.size() - 1];
    return files.empty() ? "" : files[files.size() - 1U];
  }
  ///
  ///  @ingroup domi_common
@@ -125,12 +127,13 @@ class GE_FUNC_VISIBILITY StringUtils {
  ///  @return string after replacement
  ///
  static std::string ReplaceAll(std::string str, const std::string &old_value, const std::string &new_value) {
    std::string::size_type cur_pos = 0;
    std::string::size_type old_length = old_value.length();
    std::string::size_type new_length = new_value.length();
    std::string::size_type cur_pos = 0U;
    const std::string::size_type old_length = old_value.length();
    const std::string::size_type new_length = new_value.length();
    // cycle replace
    for (; cur_pos != std::string::npos; cur_pos += new_length) {
      if ((cur_pos = str.find(old_value, cur_pos)) != std::string::npos) {
      cur_pos = str.find(old_value, cur_pos);
      if (cur_pos != std::string::npos) {
        (void)str.replace(cur_pos, old_length, new_value);
      } else {
        break;
@@ -148,7 +151,7 @@ class GE_FUNC_VISIBILITY StringUtils {
  ///  @return if the value is a prefix, true is returned. Otherwise, false is returned
  ///
  static bool StartWith(const std::string &str, const std::string str_x) {
    return ((str.size() >= str_x.size()) && (str.compare(0, str_x.size(), str_x) == 0));
    return ((str.size() >= str_x.size()) && (str.compare(0U, str_x.size(), str_x) == 0));
  }

  ///
@@ -159,14 +162,14 @@ class GE_FUNC_VISIBILITY StringUtils {
  ///  @param [in] ... format Filling Content
  ///  @return formatted string
  ///
  static std::string FormatString(const char *format, ...) {
    const uint32_t MAX_BUFFER_LEN = 1024;  // the stack memory plint check result must be less than 1024
  static std::string FormatString(const char_t *const format, ...) {
    const uint32_t MAX_BUFFER_LEN = 1024U;  // the stack memory plint check result must be less than 1024
    va_list args;
    va_start(args, format);
    char buffer[MAX_BUFFER_LEN] = {0};
    int32_t ret = vsnprintf_s(buffer, MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1, format, args);
    char_t buffer[MAX_BUFFER_LEN] = {};
    const int32_t ret = vsnprintf_s(&buffer[0], MAX_BUFFER_LEN, MAX_BUFFER_LEN - 1U, format, args);
    va_end(args);
    return ret > 0 ? buffer : "";
    return (ret > 0) ? buffer : "";
  }
 };
 }  // namespace ge
--- a/inc/framework/common/taskdown_common.h
+++ b/inc/framework/common/taskdown_common.h
@@ -23,7 +23,7 @@ namespace ge {

 const int32_t CC_FUSION_OP_MAX = 32;

 typedef enum tagCcStatus {
 enum class ccStatus_t {
  CC_STATUS_SUCCESS = 0,         /**< succ */
  CC_STATUS_NOT_INITIALIZED = 1, /**< not init */
  CC_STATUS_ALLOC_FAILED = 2,    /**< alloc mem failed */
@@ -33,10 +33,10 @@ typedef enum tagCcStatus {
  CC_STATUS_RUNTIME_ERROR = 6,   /**< runtime error */
  CC_STATUS_NOT_SUPPORTED = 7,   /**< unsupport error */
  CC_STATUS_INVALID_VALUE = 7,   /**< invalid value error for blas*/
  CC_STATUS_RESERVED             /**< just for check */
 } ccStatus_t;
  CC_STATUS_RESERVED = 8,        /**< just for check */
 };

 typedef enum tagccKernelType {
 enum class ccKernelType {
  CCE_AI_CORE = 0, /* cce aicore */
  CCE_AI_CPU = 1,  /* cce aicpu */
  TE = 2,          /* te operator*/
@@ -47,9 +47,9 @@ typedef enum tagccKernelType {
  CUST_AI_CPU = 7, /* custom aicpu*/
  HOST_CPU = 8,    /* host cpu */
  INVALID = 10000  /* unknown kernel type */
 } ccKernelType;
 };

 typedef struct tagOpContext {
 using ccOpContext = struct tagOpContext {
  ccKernelType kernelType;
  uint32_t opId;
  uint32_t kernelFuncId;
@@ -66,7 +66,28 @@ typedef struct tagOpContext {
  uint64_t genVariableBaseAddr;
  uint64_t genVariableBaseSize;
  uint64_t l2ctrlSize;
 } ccOpContext;
 }  // namespace ge
 };

 enum class tagOpTensorFormat { OP_TENSOR_FORMAT_NC1HWC0 = 0, OP_TENSOR_FORMAT_ND, OP_TENSOR_FORMAT_RESERVED };

 enum class tagOpDataType {
  OP_DATA_FLOAT = 0,            /**< float type */
  OP_DATA_HALF,                 /**< fp16 type */
  OP_DATA_INT8,                 /**< int8 type */
  OP_DATA_INT32,                /**< int32 type */
  OP_DATA_UINT8,                /**< uint8 type */
  OP_DATA_HALF_UINT16_PROPOSAL, /**< mixed type for proposal */
  OP_DATA_RESERVED
 };

 // AICPU Tensor
 using ccAICPUTensor = struct tagOpTensor {
  // real dim info
  tagOpTensorFormat format;
  tagOpDataType data_type;
  int32_t dim_cnt;
  int32_t mm;
  int32_t dim[8];
 };
 }  // namespace ge
 #endif  // INC_FRAMEWORK_COMMON_TASKDOWN_COMMON_H_
--- a/inc/framework/common/types.h
+++ b/inc/framework/common/types.h
@@ -19,7 +19,6 @@

 #include <climits>
 #include <cstdint>
 #include <algorithm>
 #include <map>
 #include <memory>
 #include <string>
@@ -44,32 +43,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEB
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_DEBUG_ALL;

 // Profile-related constants
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OME_PROFILE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string CCE_PROFILE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string RTS_PROFILE;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string PROFILE_MODEL_ID;

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASKS;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR;
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string MODEL_ATTR_FUSION_MODEL_DEF;

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint64_t ALLOC_MEMORY_MAX_SIZE;  // Max size of 8 GB.

 template <typename K, typename V>
 static std::pair<V, K> flip_pair(const std::pair<K, V> &p) {
  return std::pair<V, K>(p.second, p.first);
 }

 template <typename K, typename V>
 static std::map<V, K> flip_map(std::map<K, V> src) {
  std::map<V, K> dst;
  std::transform(src.begin(), src.end(), std::inserter(dst, dst.begin()), flip_pair<K, V>);
  return dst;
 }

 REGISTER_OPTYPE_DECLARE(DATA, "Data");
 REGISTER_OPTYPE_DECLARE(AIPPDATA, "AippData");
 REGISTER_OPTYPE_DECLARE(QUEUE_DATA, "QueueData");
 REGISTER_OPTYPE_DECLARE(CONVOLUTION, "Convolution");
 REGISTER_OPTYPE_DECLARE(CORRELATION, "Correlation");
 REGISTER_OPTYPE_DECLARE(CORRELATIONV2, "Correlation_V2");
@@ -140,6 +124,8 @@ REGISTER_OPTYPE_DECLARE(SQUEEZE, "Squeeze");
 REGISTER_OPTYPE_DECLARE(UNSQUEEZE, "Unsqueeze");
 REGISTER_OPTYPE_DECLARE(SQUEEZEV2, "SqueezeV2");
 REGISTER_OPTYPE_DECLARE(UNSQUEEZEV2, "UnsqueezeV2");
 REGISTER_OPTYPE_DECLARE(SQUEEZEV3, "SqueezeV3");
 REGISTER_OPTYPE_DECLARE(UNSQUEEZEV3, "UnsqueezeV3");
 REGISTER_OPTYPE_DECLARE(STRIDEDSLICE, "StridedSlice");
 REGISTER_OPTYPE_DECLARE(RANGE, "Range");
 REGISTER_OPTYPE_DECLARE(RPNPROPOSALS, "GenerateRpnProposals");
@@ -438,6 +424,7 @@ REGISTER_OPTYPE_DECLARE(MODELEXIT, "ModelExit");
 REGISTER_OPTYPE_DECLARE(SEND, "Send");
 REGISTER_OPTYPE_DECLARE(RECV, "Recv");
 REGISTER_OPTYPE_DECLARE(ENDOFSEQUENCE, "EndOfSequence");
 REGISTER_OPTYPE_DECLARE(STARTOFSEQUENCE, "StartOfSequence");

 REGISTER_OPTYPE_DECLARE(LABELSET, "LabelSet");
 REGISTER_OPTYPE_DECLARE(LABELGOTO, "LabelGoto");
@@ -461,8 +448,6 @@ REGISTER_OPTYPE_DECLARE(ELU_GRAD, "EluGrad");
 REGISTER_OPTYPE_DECLARE(ADD_V2, "AddV2");
 REGISTER_OPTYPE_DECLARE(DATAFORMATDIMMAP, "DataFormatDimMap");
 REGISTER_OPTYPE_DECLARE(DATAFORMATVECPERMUTE, "DataFormatVecPermute");
 REGISTER_OPTYPE_DECLARE(BESSELI0e, "BesselI0e");
 REGISTER_OPTYPE_DECLARE(BESSELI1e, "BesselI1e");
 REGISTER_OPTYPE_DECLARE(DEQUANTIZE, "Dequantize");
 REGISTER_OPTYPE_DECLARE(APPLYADADELTA, "ApplyAdadelta");
 REGISTER_OPTYPE_DECLARE(APPLYADAGRAD, "ApplyAdagrad");
@@ -516,29 +501,11 @@ REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims");
 // profiling training trace node
 REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace");

 enum InputMode { INPUT = 0, CONST_INPUT };

 // Definition of the processing status enum of the process module
 enum ModelProcessState {
  INIT_STATE = 0,    // init status
  WAIT_EVENT_STATE,  // Wait for the event status
  IND_RSLT_STATE,    // The model execution result is being output to the high level
  STOPPED_STATE,     // Model execution completed. The model enters this state after Model Manager::Stop
  RESERVED_STATE,    // reserved
 };

 // Indicates the enun definition of the execution mode of the access module
 enum SysMode {
  INFERENCE = 0,  // Normal, that is, Inference mode
  DEBUG,          // Debug mode
  TIME,           // Model execution time mode, including the execution time of each OP
  STOP,           // STOP mode
  RESET,          // RESET mode
  PERFORMANCE,  // Impact of enabling the performance model: 1. The input data of the model is considered ready and does
                // not need to be converted
  ANDROID_DEBUG,  // Exports Android platform computing data
  RESERVED,       // reserved
 };
 // Stack series
 REGISTER_OPTYPE_DECLARE(STACK, "Stack");
 REGISTER_OPTYPE_DECLARE(STACKPUSH, "StackPush");
 REGISTER_OPTYPE_DECLARE(STACKPOP, "StackPop");
 REGISTER_OPTYPE_DECLARE(STACKCLOSE, "StackClose");

 // @brief encryption type of the model file
 enum ModelEncryptType {
@@ -577,22 +544,22 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t MODEL_FIL
 ///
 /// @brief model name length
 ///
 static constexpr uint32_t MODEL_NAME_LENGTH = 32;
 constexpr uint32_t MODEL_NAME_LENGTH = 32U;

 ///
 /// @brief length of user-defined information
 ///
 static constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32;
 constexpr uint32_t USER_DEFINE_INFO_LENGTH = 32U;

 ///
 /// @brief length of the model file signature
 ///
 static constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64;
 constexpr uint32_t MODEL_FILE_CHECKSUM_LENGTH = 64U;

 ///
 /// @brief length of the reserved field in the model file header
 ///
 static constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75;
 constexpr uint32_t MODEL_FILE_RESERVED_LENGTH = 75U;

 // DATA node type
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string DATA_TYPE;
@@ -617,7 +584,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_TYP
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string OP_CONF_DELIMITER;

 // dim default size value
 static const int32_t DIM_DEFAULT_SIZE = 4;
 constexpr int32_t DIM_DEFAULT_SIZE = 4;

 // dim extension default value
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const int32_t DIM_DEFAULT_VALUE;
@@ -650,34 +617,35 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const uint32_t STREAM_SW

 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY extern const std::string NODE_NAME_GLOBAL_STEP;

 static const uint32_t PLATFORM_VERSION_LEN = 20;
 constexpr uint32_t PLATFORM_VERSION_LEN = 20U;

 // Definition of the file header of the model file
 struct ModelFileHeader {
  uint32_t magic = MODEL_FILE_MAGIC_NUM;               // magic number of DOMI
  uint32_t headsize = MODEL_FILE_HEAD_LEN;             // length of the model header. The value is fixed at 256
  uint32_t version = MODEL_VERSION;                    // version 1.0
  uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0};  // signature
  uint32_t length = 0;  // Ciphertext length. In the non-encryption model, the length is the plaintext length.
  uint8_t is_encrypt = ModelEncryptType::UNENCRYPTED;     // whether encrypted 0:not encrypt, 1:encrypt
  uint8_t is_checksum = ModelCheckType::CHECK;            // whether to check the checksum
  uint8_t modeltype = 0;                                  // 0：IR model 1：standard model 2: OM Tiny model
  uint8_t genmode = 0;                                    // 0：offline generate 1：online generate
  uint8_t name[MODEL_NAME_LENGTH] = {0};                  // Model name, which contains 32 characters
  uint32_t ops = 0;                                       // Computing power (Kops)
  uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0};  // User-defined information. The value contains 32 characters
  uint32_t om_ir_version = 0;
  uint32_t model_num = 0;
  uint8_t platform_version[PLATFORM_VERSION_LEN] = {0};
  uint8_t platform_type = {0};
  uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0};  // Reserved field 75
  uint32_t magic = MODEL_FILE_MAGIC_NUM;                // magic number of DOMI
  uint32_t headsize = MODEL_FILE_HEAD_LEN;              // length of the model header. The value is fixed at 256
  uint32_t version = MODEL_VERSION;                     // version 1.0
  uint8_t checksum[MODEL_FILE_CHECKSUM_LENGTH] = {0U};  // signature
  uint32_t length = 0U;  // Ciphertext length. In the non-encryption model, the length is the plaintext length.
  uint8_t is_encrypt =
      static_cast<uint8_t>(ModelEncryptType::UNENCRYPTED);            // whether encrypted 0:not encrypt, 1:encrypt
  uint8_t is_checksum = static_cast<uint8_t>(ModelCheckType::CHECK);  // whether to check the checksum
  uint8_t modeltype = 0U;                                  // 0：IR model 1：standard model 2: OM Tiny model
  uint8_t genmode = 0U;                                    // 0：offline generate 1：online generate
  uint8_t name[MODEL_NAME_LENGTH] = {0U};                  // Model name, which contains 32 characters
  uint32_t ops = 0U;                                       // Computing power (Kops)
  uint8_t userdefineinfo[USER_DEFINE_INFO_LENGTH] = {0U};  // User-defined information. The value contains 32 characters
  uint32_t om_ir_version = 0U;
  uint32_t model_num = 0U;
  uint8_t platform_version[PLATFORM_VERSION_LEN] = {0U};
  uint8_t platform_type = {0U};
  uint8_t reserved[MODEL_FILE_RESERVED_LENGTH] = {0U};  // Reserved field 75
 };

 static constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0;
 static constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1;
 constexpr uint8_t TARGET_TYPE_LTTE_8BIT = 0U;
 constexpr uint8_t TARGET_TYPE_MINI_8BIT = 1U;

 // number of partitions in the current model
 static constexpr uint32_t PARTITION_SIZE = 5;
 constexpr uint32_t PARTITION_SIZE = 5U;

 enum ModelPartitionType { MODEL_DEF = 0, WEIGHTS_DATA, TASK_INFO, TBE_KERNELS, CUST_AICPU_KERNELS };

@@ -692,22 +660,9 @@ struct ModelPartitionTable {
  ModelPartitionMemInfo partition[0];
 };

 #define SIZE_OF_MODEL_PARTITION_TABLE(table) (sizeof(ModelPartitionTable) + sizeof(ModelPartitionMemInfo) * (table).num)

 // Filter format
 typedef enum tagDomiFilterFormat {
  DOMI_FILTER_KCHW,  // KCHW
  DOMI_FILTER_HWCK,  // HWCK
  DOMI_FILTER_RESERVED
 } domiFilterFormat_t;

 // Const data trans type
 typedef enum tagDomiConstDataTransType {
  DOMI_CONST_DATA_NOT_CHANGE = 0,  // No action is required
  DOMI_CONST_DATA_TRANS_MATMUL,    // The const input to MatMul and needs to be transposed
  DOMI_CONST_DATA_RESERVED
 } domiConstDataTransType_t;

 inline uint64_t SizeOfModelPartitionTable(const ModelPartitionTable &table) {
  return sizeof(ModelPartitionTable) + (sizeof(ModelPartitionMemInfo) * static_cast<uint64_t>(table.num));
 }
 // mode of activation
 typedef enum tagDomiActivationMode {
  DOMI_ACTIVATION_SIGMOID = 0,   // sigmoid
@@ -727,190 +682,6 @@ typedef enum tagDomiActivationMode {
  DOMI_ACTIVATION_RESERVED
 } domiActivationMode_t;

 // mode of batchnorm
 typedef enum tagDomiBatchNormMode {
  DOMI_BATCHNORM_PER_ACTIVATION = 0,  // bnScale, bnBias tensor dims are 1xCxHxW
  DOMI_BATCHNORM_SPATIAL,             // bnScale, bnBias tensor dims are 1xCx1x1
  DOMI_BATCHNORM_RESERVED
 } domiBatchNormMode_t;

 // eltwise mode
 typedef enum tagDomiEltwiseMode {
  DOMI_ELTWISE_PROD = 0,  // prod
  DOMI_ELTWISE_SUM,       // sum
  DOMI_ELTWISE_MAX,       // max
  DOMI_ELTWISE_RESERVED
 } domiEltwiseMode_t;

 // mode of padding
 typedef enum tagDomiPaddingMode {
  DOMI_PADDING_CEIL = 0,      // Default padding mode
  DOMI_PADDING_DIRECTASSIGN,  // Default padding mode: NOTSET
  DOMI_PADDING_VALID,         // VALID padding mode
  DOMI_PADDING_SAME,          // Padding values of 0 are always used
  DOMI_PADDING_CEIL_NEW,      // Padding values of 0 are always used
  DOMI_PADDING_VALID_NEW,     // Padding values of 0 are always used
  DOMI_PADDING_SAME_NEW,      // Padding values of 0 are always used
  DOMI_PADDING_RESERVED
 } domiPaddingMode_t;

 // algorithm of convolution forward
 typedef enum tagDomiConvolutionFwdAlgo {
  DOMI_CONVOLUTION_FWD_ALGO_GEMM = 0,           // matrix gemm algo
  DOMI_CONVOLUTION_FWD_ALGO_WINOGRAD,           // Winograd Transform algo
  DOMI_CONVOLUTION_FWD_ALGO_GEMM_ACCU_FLOAT32,  // accumulate in L0c with FP32
  DOMI_CONVOLUTION_FWD_ALGO_RESERVED
 } domiConvolutionFwdAlgo_t;

 typedef enum tagDomiFullConnectFwdAlgo {
  DOMI_FULLCONNECT_FWD_ALGO_HALF = 0,  // accumulate in L0c with FP16
  DOMI_FULLCONNECT_FWD_ALGO_FLOAT32    // accumulate in L0c with FP32
 } domiFullConnectFwdAlgo_t;

 typedef enum tagDomiPooingFwdAlgo {
  DOMI_POOLING_FWD_ALGO_HALF = 0,  // accumulate in L0c with FP16
  DOMI_POOLING_FWD_ALGO_FLOAT32    // accumulate in L0c with FP32
 } domiPooingFwdAlgo_t;

 // mode of convolution
 typedef enum tagDomiConvolutionMode {
  DOMI_CONV_CONVOLUTION = 0,    // math convolution
  DOMI_CONV_CROSS_CORRELATION,  // cross-correlation convolution
  DOMI_CONV_DECONVOLUTION,      // deconvolution, also named transposed convolution
  DOMI_CONV_MODE_DEPTHWISE,     // depthwise convolution
  DOMI_CONV_MODE_RESERVED
 } domiConvolutionMode_t;

 // softmax mode
 typedef enum tagDomiSoftmaxMode {
  DOMI_SOFTMAX_MODE_INSTANCE = 0,  // compute the softmax over all C, H, W for each N
  DOMI_SOFTMAX_MODE_CHANNEL,       // compute the softmax over all C for each H, W, N
  DOMI_SOFTMAX_MODE_HEIGHT,        // compute the softmax over all H for each N, C, W
  DOMI_SOFTMAX_MODE_WIDTH,         // compute the softmax over all W for each N, C, H
  DOMI_SOFTMAX_MODE_RESERVED
 } domiSoftmaxMode_t;

 // softmax algorithm
 typedef enum tagDomiSoftmaxAlgo {
  DOMI_SOFTMAX_FAST = 0,  // straightforward implementation
  DOMI_SOFTMAX_ACCURATE,  // subtract max from every point to avoid overflow
  DOMI_SOFTMAX_LOG,       // perform the Log softmax operation to avoid overflow
  DOMI_SOFTMAX_ACCURATE_FP32,
  DOMI_SOFTMAX_RESERVED
 } domiSoftmaxAlgo_t;

 // algorithm of convolution backward
 typedef enum tagDomiConvolutionBwdAlgo {
  DOMI_CONVOLUTION_BWD_ALGO_GEMM = 0,  // matrix gemm algo
  DOMI_CONVOLUTION_BWD_ALGO_WINOGRAD,  // Winograd Transform algo
  DOMI_CONVOLUTION_BWD_ALGO_RESERVED
 } domiConvolutionBwdAlgo_t;

 // mode of pooling
 typedef enum tagDomiPoolingMode {
  DOMI_POOLING_MAX = 0,  // max pooling
  DOMI_POOLING_AVG,      // average pooling
  DOMI_POOLING_L2,       // L2 pooling
  DOMI_POOLING_RESERVED
 } domiPoolingMode_t;

 // propagate Nan
 typedef enum tagDomiNanPropagation {
  DOMI_NAN_NOT_PROPAGATE = 0,  // Nan numbers are not propagated
  DOMI_NAN_PROPAGATE,          // Nan numbers are propagated
  DOMI_NAN_PROPAGATE_RESERVED
 } domiNanPropagation_t;

 // mode of cropandresize
 typedef enum tagDomiCropAndResizeMode {
  DOMI_RESIZE_METHOD_BILINEAR = 0,  // resize bilinear
  DOMI_RESIZE_METHOD_NEAREST,       // resize nearest
  DOMI_RESIZE_RESERVED
 } domiCropAndResizeMode_t;

 // yolo version
 typedef enum tagDomiYoloVersion { DOMI_YOLO_V2 = 1, DOMI_YOLO_V3, DOMI_YOLO_TRSERVED } domiYoloVersion_t;

 typedef enum tagDomiRNNScopePassType {
  DOMI_STATIC_BIDIRECTIONAL_RNN_GENERAL_PASS = 0,
  DOMI_DYNAMIC_BIDIRECTIONAL_RNN_GENERAL_PASS,
  DOMI_DYNAMIC_BIDIRECTIONAL_RNN_BIDAF_PASS
 } domiRNNScopePassType;

 // RNNDataLayout
 typedef enum tagDomiRNNDataLayout {
  DOMI_RNN_ND_TBX = 0,  // data[max_time,batch_size,Xt]
  DOMI_RNN_ND_BTX,      // data[batch_size,max_time,Xt]
  DOMI_RNN_5D_TX1BX,    // data[max_time,Xt,1,batch_size,Xt]
  DOMI_RNN_5D_BX1TX,    // dataa[batch_size,Xt,1,max_time,Xt]
  DOMI_RNN_4DTBX1,
  DOMI_ENN_DL_RESERVED
 } domiRNNDataLayout_t;

 // RNNInputMode
 typedef enum tagDomiRNNInputMode { DOMI_RNN_LINEAR_INPUT = 0, DOMI_RNN_SKIP_INPUT } domiRNNInputMode_t;

 // RNNDirectionMode
 typedef enum tagDomiRNNDirectionMode { DOMI_RNN_UNIDIRECTIONAL = 0, DOMI_RNN_BIDIRECTIONAL } domiDirectionMode_t;

 typedef enum tagDomiPoolingCeilMode { DOMI_POOLING_FLOOR = 0, DOMI_POOLING_CEIL } domiPoolingCeilMode_t;

 // RNNMode
 typedef enum tagDomiRNNActivationMode {
  DOMI_RNN_ACTIVATION_SIGMOID = 0,  // sigmoid
  DOMI_RNN_ACTIVATION_TANH,         // tanh
  DOMI_RNN_ACTIVATION_RELU,         // ReLU
  DOMI_RNN_ACTIVATION_RELU1,        //  ReLU1
  DOMI_RNN_ACTIVATION_RELU6,        //  ReLU6
  DOMI_RNN_ACTIVATION_RESERVED
 } domiRNNActivationMode_t;

 typedef enum tagDomiRNNLSTMOutMode {
  DOMI_RNN_LSTM_OUT_SEPARATE = 0,
  DOMI_RNN_LSTM_OUT_CONCAT,
  DOMI_RNN_LSTM_OUT_RESERVED
 } domiRNNLSTMOutPutMode_t;
 typedef enum tagDomiRNNLSTMStateOutMode {
  DOMI_RNN_LSTM_STATE_OUT_SEPARATE = 0,
  DOMI_RNN_LSTM_STATE_OUT_CONCAT_ALL,
  DOMI_RNN_LSTM_STATE_OUT_RESERVED
 } domiRNNLSTMStateOutMode_t;

 typedef enum tagDomiRNNMode {
  DOMI_RNN_RELU = 0,
  DOMI_RNN_TANH,
  DOMI_LSTM,
  DOMI_GRU,
  DOMI_RNN_MODE_RESERVED
 } domiRNNMode_t;

 typedef enum tagDomiResizeBilinearMode {
  DOMI_RESIZE_OUTPUT_DIM_BY_ZOOM_FACTOR = 0,  // Output dimension specified by zoom factor
  DOMI_RESIZE_OUTPUT_DIM_BY_SHRINK_FACTOR,    // specified by shrink factor
  DOMI_RESIZE_OUTPUT_DIM_EXPLICIT,            // specified explicitly
  DOMI_RESIZE_OUTPUT_DIM_RESERVED
 } domiResizeOutputDimMode_t;

 #pragma pack(1)  // single-byte alignment
 // DUMP file struct
 struct FileHeader {
  int32_t Version;          // version
  int32_t Output_Offset;    // output offset address
  char Reserved[24] = {0};  // 24 bytes reserved
 };

 struct BasicInfo {
  struct FileHeader header;  // file header
  int32_t stream_id;         // stread id
  uint64_t start_time;       // start time
  uint64_t end_time;         // end time
  uint32_t input_size;       // input memory size
  uint32_t output_size;      // output memory size
  uint32_t weight_size;      // weight Memory Size
  uint32_t workspace_size;   // workspace
  uint32_t total_size;       // total memory size
 };
 #pragma pack()  // Cancels single-byte alignment
 enum class MemorySizeCalcType { NORMAL = 0, ALWAYS_EMPTY };
 }  // namespace ge

--- a/inc/framework/common/util.h
+++ b/inc/framework/common/util.h
@@ -14,8 +14,8 @@
 * limitations under the License.
 */

 #ifndef INC_FRAMEWORK_COMMON_UTIL_H_
 #define INC_FRAMEWORK_COMMON_UTIL_H_
 #ifndef AIR_INC_FRAMEWORK_COMMON_UTIL_H_
 #define AIR_INC_FRAMEWORK_COMMON_UTIL_H_

 #include <climits>
 #include <cmath>
@@ -24,13 +24,16 @@
 #include <vector>

 #include <google/protobuf/text_format.h>
 #include "external/graph/types.h"
 #include "external/register/register.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/scope_guard.h"
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/detail/attributes_holder.h"

 #define GE_CHECK_POSITIVE_SIZE_RANGE(size)                             \
  do {                                                                 \
    if (size <= 0) {                                                   \
    if ((size) <= 0) {                                                 \
      GELOGE(ge::FAILED, "param[%s] is not a positive number", #size); \
      return PARAM_INVALID;                                            \
    }                                                                  \
@@ -46,15 +49,19 @@

 // new ge marco
 // Encapsulate common resource releases
 #define GE_MAKE_GUARD_RTMEM(var)         \
  GE_MAKE_GUARD(var, [&] {               \
    if (var) GE_CHK_RT(rtFreeHost(var)); \
  });
 #define GE_MAKE_GUARD_RTMEM(var)  \
  GE_MAKE_GUARD(var, [&] {        \
    if ((var) != nullptr) {       \
      GE_CHK_RT(rtFreeHost(var)); \
    }                             \
  })

 #define GE_MAKE_GUARD_RTSTREAM(var)           \
  GE_MAKE_GUARD(var, [&] {                    \
    if (var) GE_CHK_RT(rtStreamDestroy(var)); \
  });
 #define GE_MAKE_GUARD_RTSTREAM(var)    \
  GE_MAKE_GUARD(var, [&] {             \
    if ((var) != nullptr) {            \
      GE_CHK_RT(rtStreamDestroy(var)); \
    }                                  \
  })

 // For propagating errors when calling a function.
 #define GE_RETURN_IF_ERROR(expr)           \
@@ -115,7 +122,7 @@
 // Check if the parameter is null. If yes, return PARAM_INVALID and record the error
 #define GE_CHECK_NOTNULL(val)                                                   \
  do {                                                                          \
    if (val == nullptr) {                                                       \
    if ((val) == nullptr) {                                                     \
      REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid", #val); \
      GELOGE(ge::FAILED, "[Check][Param:%s]null is invalid.", #val);            \
      return ge::PARAM_INVALID;                                                 \
@@ -125,7 +132,7 @@
 // Check if the parameter is null. If yes, just return and record the error
 #define GE_CHECK_NOTNULL_JUST_RETURN(val)                      \
  do {                                                         \
    if (val == nullptr) {                                      \
    if ((val) == nullptr) {                                    \
      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
      return;                                                  \
    }                                                          \
@@ -134,7 +141,7 @@
 // Check whether the parameter is null. If so, execute the exec_expr expression and record the error log
 #define GE_CHECK_NOTNULL_EXEC(val, exec_expr)                  \
  do {                                                         \
    if (val == nullptr) {                                      \
    if ((val) == nullptr) {                                    \
      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
      exec_expr;                                               \
    }                                                          \
@@ -143,7 +150,7 @@
 // Check whether the parameter is null. If yes, return directly and record the error log
 #define GE_RT_VOID_CHECK_NOTNULL(val)                          \
  do {                                                         \
    if (val == nullptr) {                                      \
    if ((val) == nullptr) {                                    \
      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
      return;                                                  \
    }                                                          \
@@ -152,7 +159,7 @@
 // Check if the parameter is null. If yes, return false and record the error log
 #define GE_RT_FALSE_CHECK_NOTNULL(val)                         \
  do {                                                         \
    if (val == nullptr) {                                      \
    if ((val) == nullptr) {                                    \
      GELOGE(ge::FAILED, "param[%s] must not be null.", #val); \
      return false;                                            \
    }                                                          \
@@ -161,7 +168,7 @@
 // Check if the parameter is out of bounds
 #define GE_CHECK_SIZE(size)                                   \
  do {                                                        \
    if (size == 0) {                                          \
    if ((size) == 0U) {                                       \
      GELOGE(ge::FAILED, "param[%s] is out of range", #size); \
      return ge::PARAM_INVALID;                               \
    }                                                         \
@@ -170,7 +177,7 @@
 // Check if the value on the left is greater than or equal to the value on the right
 #define GE_CHECK_GE(lhs, rhs)                                       \
  do {                                                              \
    if (lhs < rhs) {                                                \
    if ((lhs) < (rhs)) {                                            \
      GELOGE(ge::FAILED, "param[%s] is less than[%s]", #lhs, #rhs); \
      return ge::PARAM_INVALID;                                     \
    }                                                               \
@@ -179,7 +186,7 @@
 // Check if the value on the left is less than or equal to the value on the right
 #define GE_CHECK_LE(lhs, rhs)                                          \
  do {                                                                 \
    if (lhs > rhs) {                                                   \
    if ((lhs) > (rhs)) {                                               \
      GELOGE(ge::FAILED, "param[%s] is greater than[%s]", #lhs, #rhs); \
      return ge::PARAM_INVALID;                                        \
    }                                                                  \
@@ -187,102 +194,37 @@

 #define GE_DELETE_NEW_SINGLE(var) \
  do {                            \
    if (var != nullptr) {         \
      delete var;                 \
      var = nullptr;              \
    if ((var) != nullptr) {       \
      delete (var);               \
      (var) = nullptr;            \
    }                             \
  } while (false)

 #define GE_DELETE_NEW_ARRAY(var) \
  do {                           \
    if (var != nullptr) {        \
      delete[] var;              \
      var = nullptr;             \
    if ((var) != nullptr) {      \
      delete[](var);             \
      (var) = nullptr;           \
    }                            \
  } while (false)

 #define GE_FREE_RT_LOG(addr)                                        \
  do {                                                              \
    if (addr != nullptr) {                                          \
    if ((addr) != nullptr) {                                        \
      const rtError_t error = rtFree(addr);                         \
      if (error != RT_ERROR_NONE) {                                 \
        GELOGE(RT_FAILED, "Call rtFree failed, error: %#x", error); \
      }                                                             \
      addr = nullptr;                                               \
      (addr) = nullptr;                                             \
    }                                                               \
  } while (false)

 namespace ge {
 /**
 * @ingroup domi_common
 * @brief version of om.proto file
 */
 static constexpr int32_t OM_PROTO_VERSION = 2;

 /**
 * Finding an Integer Ceiling Value Without Precision Loss
 */
 #define CEIL(N, n) (((N) + (n)-1) / (n))

 namespace ge {
 using google::protobuf::Message;

 ///
 /// @ingroup domi_common
 /// @brief Reads the proto structure from an array.
 /// @param [in] data proto data to be read
 /// @param [in] size proto data size
 /// @param [out] proto Memory for storing the proto file
 /// @return true success
 /// @return false fail
 ///
 GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *data, int32_t size, Message *proto);

 ///
 /// @ingroup domi_proto
 /// @brief Reads the proto file in the text format.
 /// @param [in] file path of proto file
 /// @param [out] message Memory for storing the proto file
 /// @return true success
 /// @return false fail
 ///
 GE_FUNC_VISIBILITY bool ReadProtoFromText(const char *file, google::protobuf::Message *message);

 ///
 /// @ingroup: domi_common
 /// @brief: get length of file
 /// @param [in] input_file: path of file
 /// @return long： File length. If the file length fails to be obtained, the value -1 is returned.
 ///
 GE_FUNC_VISIBILITY extern long GetFileLength(const std::string &input_file);

 ///
 /// @ingroup domi_common
 /// @brief Reads all data from a binary file.
 /// @param [in] file_name  path of file
 /// @param [out] buffer  Output memory address, which needs to be released by the caller.
 /// @param [out] length  Output memory size
 /// @return false fail
 /// @return true success
 ///
 GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *const file_name, char **buffer, int32_t &length);

 GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char *file_name, std::vector<char> &buffer);

 ///
 /// @ingroup domi_common
 /// @brief Recursively Creating a Directory
 /// @param [in] directory_path  Path, which can be a multi-level directory.
 /// @return 0 success
 /// @return -1 fail
 ///
 GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path);

 ///
 /// @ingroup domi_common
 /// @brief Obtains the current time string.
 /// @return Time character string in the format ： %Y%m%d%H%M%S, eg: 20171011083555
 ///
 GE_FUNC_VISIBILITY std::string CurrentTimeInStr();
 constexpr int32_t OM_PROTO_VERSION = 2;

 ///
 /// @ingroup domi_common
@@ -294,7 +236,7 @@ template <typename T>
 GE_FUNC_VISIBILITY std::string ToString(std::vector<T> &v) {
  std::stringstream ss;
  ss << "[";
  for (T x : v) {
  for (const T x : v) {
    ss << x;
    ss << ", ";
  }
@@ -314,7 +256,7 @@ template <typename T>
 GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedField<T> &rpd_field) {
  std::stringstream ss;
  ss << "[";
  for (T x : rpd_field) {
  for (const T x : rpd_field) {
    ss << x;
    ss << ", ";
  }
@@ -343,6 +285,63 @@ GE_FUNC_VISIBILITY std::string ToString(const google::protobuf::RepeatedPtrField
  return str_ret;
 }

 ///
 /// @ingroup domi_common
 /// @brief Reads the proto structure from an array.
 /// @param [in] data proto data to be read
 /// @param [in] size proto data size
 /// @param [out] proto Memory for storing the proto file
 /// @return true success
 /// @return false fail
 ///
 GE_FUNC_VISIBILITY bool ReadProtoFromArray(const void *const data, const int32_t size,
                                           google::protobuf::Message *const proto);

 ///
 /// @ingroup domi_proto
 /// @brief Reads the proto file in the text format.
 /// @param [in] file path of proto file
 /// @param [out] message Memory for storing the proto file
 /// @return true success
 /// @return false fail
 ///
 GE_FUNC_VISIBILITY bool ReadProtoFromText(const char_t *const file, google::protobuf::Message *const message);

 ///
 /// @ingroup: domi_common
 /// @brief: get length of file
 /// @param [in] input_file: path of file
 /// @return int64_t： File length. If the file length fails to be obtained, the value -1 is returned.
 ///
 GE_FUNC_VISIBILITY extern int64_t GetFileLength(const std::string &input_file);

 ///
 /// @ingroup domi_common
 /// @brief Reads all data from a binary file.
 /// @param [in] file_name  path of file
 /// @param [out] buffer  Output memory address, which needs to be released by the caller.
 /// @param [out] length  Output memory size
 /// @return false fail
 /// @return true success
 ///
 GE_FUNC_VISIBILITY bool ReadBytesFromBinaryFile(const char_t *const file_name, char_t **const buffer, int32_t &length);

 ///
 /// @ingroup domi_common
 /// @brief Recursively Creating a Directory
 /// @param [in] directory_path  Path, which can be a multi-level directory.
 /// @return 0 success
 /// @return -1 fail
 ///
 GE_FUNC_VISIBILITY extern int32_t CreateDirectory(const std::string &directory_path);

 ///
 /// @ingroup domi_common
 /// @brief Obtains the current time string.
 /// @return Time character string in the format ： %Y%m%d%H%M%S, eg: 20171011083555
 ///
 GE_FUNC_VISIBILITY std::string CurrentTimeInStr();

 ///
 /// @ingroup domi_common
 /// @brief Obtains the absolute time (timestamp) of the current system.
@@ -366,7 +365,7 @@ GE_FUNC_VISIBILITY uint32_t GetCurrentSecondTimestap();
 /// @param [in] b
 /// @return false: true: The result is within the normal int64 range.
 ///
 GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b);
 GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(const int64_t a, const int64_t b);

 ///
 /// @ingroup domi_common
@@ -374,7 +373,7 @@ GE_FUNC_VISIBILITY bool CheckInt64MulOverflow(int64_t a, int64_t b);
 /// @param [in] path of input file
 /// @param [out] Absolute path of a file. If the absolute path cannot be obtained, an empty string is returned
 ///
 GE_FUNC_VISIBILITY std::string RealPath(const char *path);
 GE_FUNC_VISIBILITY std::string RealPath(const char_t *path);

 ///
 /// @ingroup domi_common
@@ -401,17 +400,9 @@ GE_FUNC_VISIBILITY bool CheckOutputPathValid(const std::string &file_path, const
 /// @param [in] str file path
 /// @param [out] result
 ///
 GE_FUNC_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode);
 GE_FUNC_VISIBILITY bool ValidateStr(const std::string &file_path, const std::string &mode);

 ///
 /// @ingroup domi_common
 /// @brief Check path invalid
 /// @param [in] path, path to be checked
 /// @param [in] length, length of path
 /// @return 0 success
 /// @return -1 fail
 ///
 GE_FUNC_VISIBILITY Status CheckPath(const char *path, size_t length);
 GE_FUNC_VISIBILITY Status ConvertToInt32(const std::string &str, int32_t &val);
 }  // namespace ge

 #endif  // INC_FRAMEWORK_COMMON_UTIL_H_
 #endif  // AIR_INC_FRAMEWORK_COMMON_UTIL_H_
--- a/inc/framework/engine/dnnengine.h
+++ b/inc/framework/engine/dnnengine.h
@@ -26,11 +26,11 @@
 #include "graph/types.h"

 namespace ge {
 enum PriorityEnum {
 enum class PriorityEnum {
  COST_0 = 0,
  COST_1,
  COST_2,
  COST_3,
  COST_1 = 1,
  COST_2 = 2,
  COST_3 = 3,
  COST_9 = 9,
  COST_10 = 10,
 };
@@ -38,7 +38,7 @@ enum PriorityEnum {
 struct DNNEngineAttribute {
  std::string engine_name;
  std::vector<std::string> mem_type;
  uint32_t compute_cost;
  PriorityEnum compute_cost;
  enum RuntimeType runtime_type;  // HOST, DEVICE
  // If engine input format must be specific, set this attribute, else set FORMAT_RESERVED
  Format engine_input_format;
@@ -53,10 +53,11 @@ class GE_FUNC_VISIBILITY DNNEngine {
    engine_attribute_ = attrs;
  }
  virtual ~DNNEngine() = default;
  Status Initialize(const std::map<std::string, std::string> &options) {
  Status Initialize(const std::map<std::string, std::string> &options) const {
    (void)options;
    return SUCCESS;
  }
  Status Finalize() {
  Status Finalize() const {
    return SUCCESS;
  }
  void GetAttributes(DNNEngineAttribute &attr) const {
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -32,18 +32,19 @@
 namespace ge {
 class SingleOp;
 class DynamicSingleOp;
 class GeRootModel;

 struct RunModelData {
  uint32_t index;  // Data index
  uint32_t modelId;
  std::vector<DataBuffer> blobs;       // All input/output data buffer
  uint32_t timestamp;                  // Data creation time
  uint32_t timeout;                    // Processing timeout
  uint64_t request_id = 0;             // Request ID
  uint64_t dynamic_batch_size = 0;     // Dynamic batch size scene, set dynamic size, not supported by default:0
  uint64_t dynamic_image_height = 0;   // Dynamic image size scene, set image height, not supported by default:0
  uint64_t dynamic_image_width = 0;    // Dynamic image size scene, set image width, not supported by default:0
  std::vector<uint64_t> dynamic_dims;  // Dynamic dims scene, set dynamic dims, not supported by default:empty
  std::vector<DataBuffer> blobs;        // All input/output data buffer
  uint32_t timestamp;                   // Data creation time
  uint32_t timeout;                     // Processing timeout
  uint64_t request_id = 0UL;            // Request ID
  uint64_t dynamic_batch_size = 0UL;    // Dynamic batch size scene, set dynamic size, not supported by default:0
  uint64_t dynamic_image_height = 0UL;  // Dynamic image size scene, set image height, not supported by default:0
  uint64_t dynamic_image_width = 0UL;   // Dynamic image size scene, set image width, not supported by default:0
  std::vector<uint64_t> dynamic_dims;   // Dynamic dims scene, set dynamic dims, not supported by default:empty
 };

 class GE_FUNC_VISIBILITY GeExecutor {
@@ -69,11 +70,11 @@ class GE_FUNC_VISIBILITY GeExecutor {
  ///
  static Status FinalizeEx();

  Status UnloadModel(uint32_t modelId);
  Status UnloadModel(const uint32_t model_id);

  // Get input and output descriptor
  Status GetModelDescInfo(uint32_t model_id, std::vector<TensorDesc> &input_desc, std::vector<TensorDesc> &output_desc,
                          bool new_model_desc = false);
  Status GetModelDescInfo(const uint32_t model_id, std::vector<TensorDesc> &input_desc,
                          std::vector<TensorDesc> &output_desc, const bool new_model_desc = false);

  ///
  /// @ingroup ge
@@ -84,7 +85,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] batch_size: batch size entered by user in dynamic multi-batch scenario
  /// @return execute result
  ///
  Status SetDynamicBatchSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t batch_size);
  Status SetDynamicBatchSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
                             const uint64_t batch_size);

  ///
  /// @ingroup ge
@@ -96,8 +98,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] image_width: image width entered by user in dynamic multi-resolution scenario
  /// @return execute result
  ///
  Status SetDynamicImageSize(uint32_t model_id, void *dynamic_input_addr, uint64_t length, uint64_t image_height,
                             uint64_t image_width);
  Status SetDynamicImageSize(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
                             const uint64_t image_height, const uint64_t image_width);

  ///
  /// @ingroup ge
@@ -109,7 +111,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] dynamic_dims: array of dynamic dimensions
  /// @return execute result
  ///
  Status SetDynamicDims(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
  Status SetDynamicDims(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
                        const std::vector<uint64_t> &dynamic_dims);

  ///
@@ -120,7 +122,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] cur_dynamic_dims: current dynamic dims
  /// @return execute result
  ///
  Status GetCurDynamicDims(uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
  Status GetCurDynamicDims(const uint32_t model_id, const std::vector<uint64_t> &dynamic_dims,
                           std::vector<uint64_t> &cur_dynamic_dims);

  ///
@@ -131,7 +133,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] dynamic_type
  /// @return execute result
  ///
  Status GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);
  Status GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
                             int32_t &dynamic_type);

  ///
  /// @ingroup ge
@@ -140,7 +143,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] batch_info
  /// @return execute result
  ///
  Status GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);
  Status GetCombinedDynamicDims(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info);

  ///
  /// @ingroup ge
@@ -149,7 +152,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] user_designate_shape_order
  /// @return execute result
  ///
  Status GetUserDesignateShapeOrder(uint32_t model_id, std::vector<std::string> &user_designate_shape_order);
  Status GetUserDesignateShapeOrder(const uint32_t model_id, std::vector<std::string> &user_designate_shape_order);

  Status GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type);

@@ -163,18 +166,18 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [in] aippParms: kAippDynamicPara by user in dynamic aipp
  /// @return execute result
  ///
  Status SetDynamicAippData(uint32_t model_id, void *dynamic_input_addr, uint64_t length,
  Status SetDynamicAippData(const uint32_t model_id, void *const dynamic_input_addr, const uint64_t length,
                            const std::vector<kAippDynamicBatchPara> &aipp_batch_para,
                            const kAippDynamicPara &aippParms);
                            const kAippDynamicPara &aipp_parms);

  Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
  Status GetAIPPInfo(const uint32_t model_id, const uint32_t index, AippConfigInfo &aipp_info);

  Status GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
  Status GetOpAttr(const uint32_t model_id, const std::string &op_name, const std::string &attr_name,
                   std::string &attr_value);

  Status GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);
  Status GetModelAttr(const uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info);

  Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index);
  Status GetAippType(const uint32_t model_id, const uint32_t index, InputAippType &type, size_t &aipp_index);

  Status CommandHandle(const Command &command);

@@ -188,7 +191,7 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @return SUCCESS
  /// @return FAILED
  ///
  Status GetMaxUsedMemory(uint32_t model_id, uint32_t &max_size);
  Status GetMaxUsedMemory(const uint32_t model_id, uint32_t &max_size);

  ///
  /// @ingroup ge
@@ -210,8 +213,8 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] uint32_t &model_id: Corresponding identification after model loading
  /// @return SUCCESS handle successfully / others handle failed
  ///
  Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size,
                           void *weight_ptr, size_t weight_size);
  Status LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *const dev_ptr, const size_t mem_size,
                           void *const weight_ptr, const size_t weight_size);

  ///
  /// @ingroup ge
@@ -225,6 +228,18 @@ class GE_FUNC_VISIBILITY GeExecutor {
  Status LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector<uint32_t> &input_queue_ids,
                        const std::vector<uint32_t> &output_queue_ids);

  ///
  /// @ingroup ge
  /// @brief Load task list from ModelData with queue.
  /// @param [out] model_id: model id allocate from manager.
  /// @param [in] root_model: Instance of GeRootModel.
  /// @param [in] input_queue_ids: input queue ids create from user.
  /// @param [in] output_queue_ids: input queue ids create from user.
  /// @return: 0 for success / others for fail
  ///
  Status LoadModelWithQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model,
                        const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids);

  ///
  /// @ingroup ge
  /// @brief Synchronous execution of offline model(Do not create thread)
@@ -235,8 +250,17 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] domi::OutputData *output_data: Model output data
  /// @return SUCCESS handle successfully / others handle failed
  ///
  Status ExecModel(uint32_t model_id, void *stream, const RunModelData &input_data, RunModelData &output_data,
                   bool async_mode = false);
  Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &input_data,
                   RunModelData &output_data, const bool async_mode = false);

  ///
  /// @ingroup ge
  /// @brief Load task list from root_model without input queue or output queue.
  /// @param [out] model_id: model id allocate from manager.
  /// @param [in] root_model: Instance of GeRootModel.
  /// @return: 0 for success / others for fail
  ///
  Status LoadModelWithoutQ(uint32_t &model_id, const std::shared_ptr<GeRootModel> &root_model) const;

  ///
  /// @ingroup ge
@@ -250,9 +274,9 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
  /// @return SUCCESS handle successfully / others handle failed
  ///
  Status ExecModel(uint32_t model_id, void *stream, const RunModelData &run_input_data,
  Status ExecModel(const uint32_t model_id, void *const stream, const RunModelData &run_input_data,
                   const std::vector<GeTensorDesc> &input_desc, RunModelData &run_output_data,
                   std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
                   std::vector<GeTensorDesc> &output_desc, const bool async_mode = false);

  ///
  /// @ingroup ge
@@ -273,36 +297,38 @@ class GE_FUNC_VISIBILITY GeExecutor {
  /// @param [out] size_t &weight_size Weight memory space size
  /// @return SUCCESS handle successfully / others handle failed
  ///
  Status GetMemAndWeightSize(const void *model_data, size_t model_size, size_t &mem_size, size_t &weight_size);
  Status GetMemAndWeightSize(const void *const model_data, const size_t model_size, size_t &mem_size,
                             size_t &weight_size);

  static Status LoadSingleOp(const std::string &modelName, const ModelData &modelData, void *stream,
                             SingleOp **single_op);
  static Status LoadSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream,
                             SingleOp **const single_op);

  static Status LoadSingleOpV2(const std::string &modelName, const ModelData &modelData, void *stream,
                               SingleOp **single_op, const uint64_t model_id);
  static Status LoadSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream,
                               SingleOp **const single_op, const uint64_t model_id);

  static Status ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,
  static Status ExecuteAsync(SingleOp *const executor, const std::vector<DataBuffer> &inputs,
                             std::vector<DataBuffer> &outputs);

  static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &modelData, void *stream,
                                    DynamicSingleOp **single_op);
  static Status LoadDynamicSingleOp(const std::string &model_name, const ModelData &model_data, void *const stream,
                                    DynamicSingleOp **const single_op);

  static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &modelData, void *stream,
                                      DynamicSingleOp **single_op, const uint64_t model_id);
  static Status LoadDynamicSingleOpV2(const std::string &model_name, const ModelData &model_data, void *const stream,
                                      DynamicSingleOp **const single_op, const uint64_t model_id);

  static Status ExecuteAsync(DynamicSingleOp *executor, const std::vector<GeTensorDesc> &input_desc,
  static Status ExecuteAsync(DynamicSingleOp *const executor, const std::vector<GeTensorDesc> &input_desc,
                             const std::vector<DataBuffer> &inputs, std::vector<GeTensorDesc> &output_desc,
                             std::vector<DataBuffer> &outputs);

  static Status ReleaseSingleOpResource(void *stream);
  static Status ReleaseSingleOpResource(void *const stream);

  static Status GetDeviceIdByModelId(uint32_t model_id, uint32_t &device_id);
  static Status GetDeviceIdByModelId(const uint32_t model_id, uint32_t &device_id);

  Status GetBatchInfoSize(uint32_t model_id, size_t &shape_count);
  Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
  Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector<InputOutputDims> &input_dims,
                                   std::vector<InputOutputDims> &output_dims);
  Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);
  Status GetBatchInfoSize(const uint32_t model_id, size_t &shape_count);
  Status GetOrigInputInfo(const uint32_t model_id, const uint32_t index, OriginInputInfo &orig_input_info);
  Status GetAllAippInputOutputDims(const uint32_t model_id, const uint32_t index,
                                   std::vector<InputOutputDims> &input_dims, std::vector<InputOutputDims> &output_dims);
  Status GetOpDescInfo(const uint32_t device_id, const uint32_t stream_id, const uint32_t task_id,
                       OpDescInfo &op_desc_info);

 private:
  static std::atomic_bool is_inited_;
--- a/inc/framework/generator/ge_generator.h
+++ b/inc/framework/generator/ge_generator.h
@@ -31,6 +31,8 @@
 #include "framework/omg/omg_inner_types.h"

 namespace ge {
 const std::string kAttrSupportDynamicShape = "support_dynamicshape";

 class GeRootModel;
 class GE_FUNC_VISIBILITY GeGenerator {
 public:
@@ -103,8 +105,8 @@ class GE_FUNC_VISIBILITY GeGenerator {
  /// @param [in] graph_name: graph name.
  /// @param [out] graph: graph of single op.
  /// @return SUCCESS or FAILED
  Status BuildSingleOpGraph(OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
                            Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type);
  Status BuildSingleOpGraph(const OpDescPtr &op_desc, const InOutTensorRef &inputs_outputs, std::string graph_name,
                            Graph &graph, std::vector<std::pair<std::string, std::string>> &inputs_name_type) const;
  Status BuildOriginalGraphInfo(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
                                const std::vector<GeTensor> &outputs, const std::string &model_file_name,
                                bool is_offline, int32_t compile_flag, GraphStage graph_stage, Graph &graph,
@@ -116,20 +118,20 @@ class GE_FUNC_VISIBILITY GeGenerator {
                       ge::ModelBufferData &model, bool is_offline = true);
  Status BuildSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs, const std::vector<GeTensor> &outputs,
                       const std::string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
                       ComputeGraphPtr &compute_graph, bool is_offline = true, int32_t compile_flag = 0,
                       ComputeGraphPtr &comp_graph, bool is_offline = true, int32_t compile_flag = 0,
                       GraphStage graph_stage = GraphStage::GRAPH_STAGE_RESERVED);
  bool CheckNoAicore(const ComputeGraphPtr &graph);
  void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);
  Status CheckForSingleOp(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
                          const std::vector<GeTensor> &outputs);
  Status InferFormatForSingleOp(OpDescPtr &op_desc, Graph &graph);
  void RemoveConst(const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs) const;
  Status CheckForSingleOp(const OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
                          const std::vector<GeTensor> &outputs) const;
  Status InferFormatForSingleOp(const OpDescPtr &op_desc, const Graph &graph) const;

  using GeRootModelPtr = std::shared_ptr<ge::GeRootModel>;
  Status SetModelNameForDump(const GeRootModelPtr &ge_root_model);
  Status CreateGeneralizedBuildAttrs(const GeRootModelPtr &ge_root_model, const std::vector<GeTensor> &inputs,
                                     const std::vector<GeTensor> &outputs,
                                     const std::vector<std::pair<std::string, std::string>> &inputs_name_type,
                                     std::vector<ge::NamedAttrs> &generalized_build_attrs);
                                     std::vector<ge::NamedAttrs> &generalized_build_attrs) const;

  class Impl;

--- a/inc/framework/memory/memory_api.h
+++ b/inc/framework/memory/memory_api.h
@@ -17,11 +17,7 @@
 #ifndef INC_FRAMEWORK_MEMORY_MEMORY_API_H_
 #define INC_FRAMEWORK_MEMORY_MEMORY_API_H_

 #include <string>
 #include <vector>

 #include "external/ge/ge_api_error_codes.h"
 #include "graph/types.h"
 #include "runtime/mem.h"

 namespace ge {
--- a/inc/framework/memory/memory_assigner.h
+++ b/inc/framework/memory/memory_assigner.h
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner {

  MemoryAssigner &operator=(const MemoryAssigner &) = delete;

  Status AssignMemory(bool is_loop_graph, std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);
  Status AssignMemory(std::map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size);

 private:
  ge::ComputeGraphPtr compute_graph_;
--- a/inc/framework/omg/omg.h
+++ b/inc/framework/omg/omg.h
@@ -64,7 +64,7 @@ GE_FUNC_VISIBILITY Status InitDomiOmgContext(const std::string &input_shape, con
 GE_FUNC_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<std::string, std::string> &atc_params,
                                     const char *model_file, const char *weights_file, domi::FrameworkType type,
                                     const char *op_conf = nullptr, const char *target = nullptr,
                                     RunMode run_mode = GEN_OM_MODEL, bool is_dynamic_input = false);
                                     RunMode run_mode = RunMode::GEN_OM_MODEL, bool is_dynamic_input = false);

 /**
 * @ingroup domi_omg
@@ -89,15 +89,15 @@ GE_FUNC_VISIBILITY Status ConvertPbtxtToJson(const char *model_file, const char
 GE_FUNC_VISIBILITY Status ConvertFwkModelToJson(domi::FrameworkType framework, const char *model_file,
                                                const char *json_file);

 GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model);
 GE_FUNC_VISIBILITY void GetGroupName(ge::proto::ModelDef &model_def);

 GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector<std::string> &fileList,
 GE_FUNC_VISIBILITY void FindParserSo(const std::string &path, std::vector<std::string> &file_list,
                                     std::string &caffe_parser_path);

 GE_FUNC_VISIBILITY Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);

 GE_FUNC_VISIBILITY Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type,
                                            const std::string &output_format);
                                            const std::string &output);

 GE_FUNC_VISIBILITY Status GetOutputLeaf(ge::NodePtr node,
                                        std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -31,12 +31,7 @@
 using domi::DOMI_TENSOR_ND;
 using domi::DOMI_TENSOR_RESERVED;
 using domi::domiTensorFormat_t;
 using domi::FRAMEWORK_RESERVED;
 using domi::FrameworkType;
 using std::map;
 using std::string;
 using std::unordered_map;
 using std::vector;

 namespace ge {
 /**
@@ -51,36 +46,13 @@ enum RunMode {
  DISPLAY_OM_INFO = 6  // display model info
 };

 ///
 /// @ingroup domi_omg
 /// @brief high-precision mode
 ///
 enum HighPrecisionMode {
  // the FP16 high-precision function is disabled in common mode
  HIGH_PRECISION_DEFAULT = 0,

  // high-precision mode, enabling FP16 high-precision mode (Convolution/FullConnect/AvgPooling are involved)
  HIGH_PRECISION_FP16 = 1
 };

 ///
 /// @ingroup domi_omg
 /// @brief description buffer data
 ///
 struct OMGBufferData {
  void *data;
  uint32_t length;
 };

 struct OmgContext {
  OmgContext() {
    format = DOMI_TENSOR_ND;
  }
  domiTensorFormat_t format;
  OmgContext() : format(domi::DOMI_TENSOR_ND) {}
  domi::domiTensorFormat_t format;

  // format of the input specified by the command line
  std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
  std::vector<domiTensorFormat_t> output_formats;
  std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map;
  std::vector<domi::domiTensorFormat_t> output_formats;

  // user-designate input dims
  std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
@@ -107,9 +79,9 @@ struct OmgContext {
  // net data nodes tensor names(caffe or onnx)
  std::vector<std::string> data_tensor_names;
  // preferential format used by the entire network
  domiTensorFormat_t net_format = DOMI_TENSOR_RESERVED;
  domi::domiTensorFormat_t net_format = domi::DOMI_TENSOR_RESERVED;
  domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
  RunMode run_mode = ONLY_PRE_CHECK;
  RunMode run_mode = RunMode::ONLY_PRE_CHECK;
  bool train_flag = false;

  std::string output_type;
--- a/inc/framework/omg/parser/model_parser.h
+++ b/inc/framework/omg/parser/model_parser.h
@@ -108,6 +108,8 @@ class GE_FUNC_VISIBILITY ModelParser {
   * @return Others failed
   */
  virtual domi::Status ToJson(const char *model_file, const char *json_file) {
    (void)model_file;
    (void)json_file;
    return domi::SUCCESS;
  }

@@ -130,6 +132,8 @@ class GE_FUNC_VISIBILITY ModelParser {
   * @return Others failed
   */
  virtual domi::Status ParseProto(const std::string &serialized_proto, ge::ComputeGraphPtr &graph) {
    (void)serialized_proto;
    (void)graph;
    return UNSUPPORTED;
  }

@@ -144,6 +148,9 @@ class GE_FUNC_VISIBILITY ModelParser {
   */
  virtual domi::Status ParseProtoWithSubgraph(const std::string &serialized_proto, GetGraphCallbackV2 callback,
                                              ge::ComputeGraphPtr &graph) {
    (void)serialized_proto;
    (void)callback;
    (void)graph;
    return UNSUPPORTED;
  }
 };
--- a/inc/framework/omg/parser/op_parser.h
+++ b/inc/framework/omg/parser/op_parser.h
@@ -50,7 +50,7 @@ class GE_FUNC_VISIBILITY OpParser {
   * @return SUCCESS
   * @return FAILED
   */
  virtual domi::Status ParseParams(const Message *op_src, ge::OpDescPtr &op_desc) = 0;
  virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::OpDescPtr &op_desc) = 0;

  /**
   * @ingroup domi_omg
@@ -60,7 +60,7 @@ class GE_FUNC_VISIBILITY OpParser {
   * @return SUCCESS
   * @return FAILED
   */
  virtual domi::Status ParseParams(const Message *op_src, ge::Operator &op_dest) = 0;
  virtual domi::Status ParseParams(const google::protobuf::Message *op_src, ge::Operator &op_dest) = 0;

  /**
   * @ingroup domi_omg
@@ -70,7 +70,7 @@ class GE_FUNC_VISIBILITY OpParser {
   * @return SUCCESS
   * @return FAILED
   */
  virtual domi::Status ParseWeights(const Message *op_src, ge::NodePtr &node) = 0;
  virtual domi::Status ParseWeights(const google::protobuf::Message *op_src, ge::NodePtr &node) = 0;

  /**
   * @ingroup domi_omg
@@ -80,7 +80,7 @@ class GE_FUNC_VISIBILITY OpParser {
   * @return SUCCESS
   * @return FAILED
   */
  virtual domi::Status GetFormat(const Message *op_src, domi::domiTensorFormat_t &format) {
  virtual domi::Status GetFormat(const google::protobuf::Message *op_src, domi::domiTensorFormat_t &format) {
    (void)op_src;
    // Indicates that the op does not provide a value for format
    format = domi::DOMI_TENSOR_RESERVED;
--- a/inc/framework/omg/parser/parser_factory.h
+++ b/inc/framework/omg/parser/parser_factory.h
@@ -24,13 +24,11 @@
 #include "framework/omg/omg_inner_types.h"
 #include "framework/omg/parser/parser_types.h"

 using Status = domi::Status;

 namespace domi {
 class WeightsParser;
 class ModelParser;

 typedef std::shared_ptr<ModelParser> (*MODEL_PARSER_CREATOR_FUN)(void);
 using MODEL_PARSER_CREATOR_FUN = std::shared_ptr<ModelParser> (*)(void);

 // Create modelparser for different frameworks
 class GE_FUNC_VISIBILITY ModelParserFactory {
@@ -82,7 +80,7 @@ class GE_FUNC_VISIBILITY ModelParserRegisterar {
  }                                                              \
  ModelParserRegisterar g_##type##_Model_Parser_Creator(type, Creator_##type##_Model_Parser)

 typedef std::shared_ptr<WeightsParser> (*WEIGHTS_PARSER_CREATOR_FUN)(void);
 using WEIGHTS_PARSER_CREATOR_FUN = std::shared_ptr<WeightsParser> (*)(void);

 // Create weightsparser for different frameworks
 class GE_FUNC_VISIBILITY WeightsParserFactory {
--- a/inc/framework/omg/parser/parser_inner_ctx.h
+++ b/inc/framework/omg/parser/parser_inner_ctx.h
@@ -29,8 +29,8 @@
 namespace ge {
 struct ParserContext {
  // format of the input specified by the command line
  std::unordered_map<std::string, domiTensorFormat_t> input_nodes_format_map;
  std::vector<domiTensorFormat_t> output_formats;
  std::unordered_map<std::string, domi::domiTensorFormat_t> input_nodes_format_map;
  std::vector<domi::domiTensorFormat_t> output_formats;
  // user-designate input dims
  std::vector<std::pair<std::string, std::vector<int64_t>>> user_input_dims;
  std::map<std::string, std::vector<int64_t>> input_dims;
@@ -58,7 +58,7 @@ struct ParserContext {
  bool train_flag = false;
  domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND;
  domi::FrameworkType type = domi::FRAMEWORK_RESERVED;
  RunMode run_mode = GEN_OM_MODEL;
  RunMode run_mode = RunMode::GEN_OM_MODEL;
  // save caffe custom proto path, used by caffe parse
  std::string custom_proto_path;
  // save caffe proto path, used by caffe parse
--- a/inc/framework/omg/version.h
+++ b/inc/framework/omg/version.h
@@ -19,8 +19,6 @@

 #include <memory>
 #include <set>
 #include <string>
 #include <vector>

 #include "framework/common/debug/log.h"
 #include "framework/common/string_util.h"
@@ -34,7 +32,7 @@ class GE_FUNC_VISIBILITY PlatformVersionManager {
  static Status GetPlatformVersion(std::string &ver) {
    ver = "1.11.z";
    const std::vector<std::string> version_splits = StringUtils::Split(ver, '.');
    GE_IF_BOOL_EXEC(version_splits.size() < 3, GELOGW("Read platform version error!"); return FAILED;);
    GE_IF_BOOL_EXEC(version_splits.size() < 3U, GELOGW("Read platform version error!"); return FAILED;);

    GELOGI("Read current platform version: %s.", ver.c_str());
    return SUCCESS;
--- a/+ 1
+++ b/+ 1
@@ -1 +1 @@
 Subproject commit 1d99928bfcb02e45acc7db73e3ee57304ff1131a
 Subproject commit 0a2335712484f85cd44a0f2402eac6932b22b40a
--- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
+++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
@@ -1,57 +1,57 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef AICPU_OP_TYPE_LIST_H_
 #define AICPU_OP_TYPE_LIST_H_

 extern "C" {
 enum OpKernelType {
    TF_KERNEL,
    CPU_KERNEL
 };

 enum ReturnCode {
    OP_TYPE_NOT_SUPPORT,
    FORMAT_NOT_SUPPORT,
    DTYPE_NOT_SUPPORT
 };

 #pragma pack(push, 1)
 //One byte alignment
 struct SysOpInfo {
    uint64_t opLen;
    uint64_t opType;
    OpKernelType kernelsType;
 };

 struct SysOpCheckInfo {
    uint64_t opListNum;
    uint64_t offSetLen;
    uint64_t sysOpInfoList;
    uint64_t opParamInfoList;
 };

 struct SysOpCheckResp {
    uint64_t opListNum;
    bool isWithoutJson;
    uint64_t returnCodeList;
    uint64_t sysOpInfoList;
    uint64_t opParamInfoList;
 };
 #pragma pack(pop)
 }

 #endif  // AICPU_OP_TYPE_LIST_H_
 /**
 * Copyright (c) Huawei Technologies Co., Ltd. 2019-2020. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef AICPU_OP_TYPE_LIST_H_
 #define AICPU_OP_TYPE_LIST_H_

 extern "C" {
 enum OpKernelType {
    TF_KERNEL,
    CPU_KERNEL
 };

 enum ReturnCode {
    OP_TYPE_NOT_SUPPORT,
    FORMAT_NOT_SUPPORT,
    DTYPE_NOT_SUPPORT
 };

 #pragma pack(push, 1)
 // One byte alignment
 struct SysOpInfo {
    uint64_t opLen;
    uint64_t opType;
    OpKernelType kernelsType;
 };

 struct SysOpCheckInfo {
    uint64_t opListNum;
    uint64_t offSetLen;
    uint64_t sysOpInfoList;
    uint64_t opParamInfoList;
 };

 struct SysOpCheckResp {
    uint64_t opListNum;
    bool isWithoutJson;
    uint64_t returnCodeList;
    uint64_t sysOpInfoList;
    uint64_t opParamInfoList;
 };
 #pragma pack(pop)
 }

 #endif  // AICPU_OP_TYPE_LIST_H_
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -29,6 +29,53 @@ struct AicpuParamHead
    uint32_t        extInfoLength;             // extInfo struct Length
    uint64_t        extInfoAddr;               // extInfo address
 };

 enum class AicpuConfigMsgType {
    AICPU_CONFIG_MSG_TYPE_BUF_FREE      = 0,  /* free buf */
    AICPU_CONFIG_MSG_TYPE_BUF_RESET     = 1,  /* reset buf */
    AICPU_CONFIG_MSG_TYPE_BUF_SET_ADDR  = 2,  /* set buf addr to aicpu */
 };

 enum class AicpuErrMsgType {
    ERR_MSG_TYPE_NULL   = 0,
    ERR_MSG_TYPE_AICORE = 1,
    ERR_MSG_TYPE_AICPU  = 2,
 };

 typedef struct tagAicpuConfigMsg {
    uint8_t msgType;
    uint8_t reserved1;
    uint16_t bufLen;
    uint32_t offset;
    uint64_t bufAddr;
    uint32_t tsId;
    uint32_t reserved2;
 } AicpuConfigMsg;

 typedef struct tagAicoreErrMsgInfo {
    uint8_t errType;
    uint8_t version;
    uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
    uint32_t errorCode;
    uint32_t modelId;
    uint32_t taskId;
    uint32_t streamId;
    uint64_t transactionId;
    uint8_t reserved2[228];  /* the total byte is 256, reserved2 len = 256 - other lens */
 } AicoreErrMsgInfo;

 typedef struct tagAicpuErrMsgInfo {
    uint8_t errType;
    uint8_t version;
    uint8_t reserved1[2];    /* reserved1, 4 byte alignment */
    uint32_t errorCode;
    uint32_t modelId;
    uint32_t streamId;
    uint64_t transactionId;
    char opName[64];        /* op name str */
    char errDesc[128];      /* err msg desc info */
    uint8_t reserved2[40];  /* the total byte is 256, reserved2 len = 256 - other lens */
 } AicpuErrMsgInfo;
 #pragma pack(pop)

 }  // namespace aicpu
--- a/third_party/fwkacllib/inc/cce/aicpu_engine.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h
@@ -13,7 +13,6 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef AICPU_ENGINE_H__
 #define AICPU_ENGINE_H__

--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -21,7 +21,7 @@

 namespace aicpu {
 namespace FWKAdapter {

 using char_t = char;
 // API RETURN CODE
 enum FWKAdptAPIRetCode {
  FWK_ADPT_SUCCESS = 0,                  // success
@@ -63,6 +63,8 @@ enum FWKTaskExtInfoType {
  FWK_ADPT_EXT_BITMAP,
  FWK_ADPT_EXT_TOPIC_TYPE,
  FWK_ADPT_EXT_ASYNCWAIT,
  FWK_ADPT_EXT_UNKNOWN_SHAPE_INPUT_INDEX,
  FWK_ADPT_EXT_UNKNOWN_SHAPE_OUTPUT_INDEX,
  FWK_ADPT_EXT_INVALID
 };

@@ -113,7 +115,7 @@ struct StrFWKKernel {
 typedef StrFWKKernel FWKOperateParam;

 // Extent info ShapeAndType
 const uint32_t kMaxShapeDims = 8;
 const uint32_t kMaxShapeDims = 8U;
 #pragma pack(push, 1)
 struct ShapeAndType {
  int32_t type;
@@ -122,13 +124,13 @@ struct ShapeAndType {
 #pragma pack(pop)

 // Extend info structure for extInfoAddr
 const uint32_t kExtInfoHeadSize = 8;
 const uint32_t kExtInfoHeadSize = 8U;

 #pragma pack(push, 1)
 struct ExtInfo {
  int32_t  infoType;    // extend type
  uint32_t infoLen;     // length for infoMsg
  char     infoMsg[0];  // extend value
  char_t  infoMsg[0];  // extend value
 };
 #pragma pack(pop)

@@ -143,9 +145,9 @@ struct ResultSummary {

 #pragma pack(push, 1)
 struct AsyncWait {
  uint8_t waitType; // wait type, FWK_ADPT_WAIT_TYPE_EVENT: event wait
  uint32_t waitId; // wait id, GE refresh
  uint32_t timeOut; // reserved
  uint8_t waitType;  // wait type, FWk_ADPT_WAIT_TPYE_EVENT: event wait
  uint32_t waitId;  // wait id, GE refresh
  uint32_t timeOut;  // reserved
  uint64_t reserved;
 };
 #pragma pack(pop)
--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -94,13 +94,13 @@ enum HcclEventType {
    HCCL_EVENT_RESERVED /**< reserved */
 };

 const u32 TAG_MAX_LEN = 127; // 脳卯麓贸碌脛tag 鲁陇露脠
 const u32 TAG_MAX_LEN = 127; // 最大的tag 长度
 using TagAttr = struct TagAttrDef {
    char name[TAG_MAX_LEN + 1]; // tag卤锚脢露
    // tag卤锚脢露碌脛陆脫脢脮脢媒戮脻拢卢碌梅脫脙脮脽脢脟路帽禄谩脰梅露炉碌梅脫脙陆脫脢脮陆脫驴脷拢卢0 = 路帽, 1 = 禄谩(脭陇脕么拢卢脭脻虏禄脰搂鲁脰)隆拢
    // 露脭脫脷activeRecv = 0拢卢碌卤陆脫脢脮虏脿脢脮碌陆脢媒戮脻禄貌脮脽路垄脣脥脟毛脟贸脢卤拢卢脰梅露炉脥篓脰陋碌梅脫脙脮脽隆拢
    char name[TAG_MAX_LEN + 1]; // tag标识
    // tag标识的接收数据，调用者是否会主动调用接收接口，0 = 否, 1 = 会(预留，暂不支持)。
    // 对于activeRecv = 0，当接收侧收到数据或者发送请求时，主动通知调用者。
    uint32_t activeRecv;
    uint32_t sendCredit; // 脜盲脰脙赂脙tag脭脢脨铆inflight碌脛send赂枚脢媒
    uint32_t sendCredit; // 配置该tag允许inflight的send个数
    uint32_t eventId;
 };

@@ -188,6 +188,15 @@ struct HcomGatherAllToAllVParams {
    const char *group;  // not used now
 };

 typedef enum workMode {
 HCCL_MODE_NORMAL = 0, // 不支持任何Probe any，仅支持精确的probe
 HCCL_MODE_ANY = 1     // 仅支持ANY_SOURCE + ANY_TAG的probe
 } WorkMode;

 typedef struct tagCommAttr {
    WorkMode mode;  // 通信域内的probe工作模式
    uint32_t deviceId = 0;
 } CommAttr;
 #ifdef __cplusplus
 }
 #endif // __cplusplus
--- a/third_party/fwkacllib/inc/hccl/hcom.h
+++ b/third_party/fwkacllib/inc/hccl/hcom.h
@@ -126,72 +126,6 @@ extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, co
 * @return HcclResult
 */
 extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);

 /**
 * @brief Initialize hcom executor.
 *
 * @param void
 * @return HcclResult
 */
 HcclResult HcomExecInitialize();

 /**
 * @brief Finalize hcom executor.
 *
 * @param void
 * @return HcclResult
 */
 HcclResult HcomExecFinalize();

 /**
 * @brief Put collective communication operation into hcom executor.
 *
 * @param opInfo information about collective communication operation.
 * @param callback callback after collective communication operation.
 * @return HcclResult
 */
 HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);

 /**
 * @brief Put remote access operation into hcom executor.
 *
 * @param remoteAccessType operation type (read or write).
 * @param addrInfos address information about collective communication operation.
 * @param callback callback after collective communication operation.
 * @return HcclResult
 */
 HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
                                       const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
                                       std::function<void(HcclResult status)> callback);

 /**
 * @brief Put alltoallv communication operation into hcom executor.
 *
 * @param params information about alltoallv communication operation.
 * @param callback callback after collective communication operation.
 * @return HcclResult
 */
 HcclResult HcomExecEnqueueAllToAllV(HcomAllToAllVParams params, std::function<void(HcclResult status)> callback);

 /**
 * @brief Put agther alltoallv communication operation into hcom executor.
 *
 * @param params information about agther alltoallv communication operation.
 * @param callback callback after collective communication operation.
 * @return HcclResult
 */
 HcclResult HcomExecEnqueueGatherAllToAllV(HcomGatherAllToAllVParams params,
                                          std::function<void(HcclResult status)> callback);

 /**
 * @brief Register memories and init resources for remote access.
 *
 * @param addrList memory addresses for remote access.
 * @param count number of remote memory addresses.
 * @return HcclResult
 */
 extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);

 #ifdef __cplusplus
 }
 #endif // __cplusplus
--- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h
+++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h
@@ -1,18 +1,12 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /*
 * @file mmpa_api.h
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */

 #ifndef _MMPA_API_H_
 #define _MMPA_API_H_
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
@@ -1,18 +1,12 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /*
 * @file mmpa_linux.h
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */

 #ifndef MMPA_LINUX_MMPA_LINUX_H
 #define MMPA_LINUX_MMPA_LINUX_H
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_linux.h
@@ -79,6 +79,9 @@ typedef long LONG;
 #define MMPA_THREAD_SCHED_OTHER SCHED_OTHER
 #define MMPA_THREAD_MIN_STACK_SIZE PTHREAD_STACK_MIN

 #define MMPA_PATH_SEPARATOR_STR "/"
 #define MMPA_PATH_SEPARATOR_CHAR '/'

 #define MM_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER

 #define MMPA_MAX_NI 19
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_typedef_win.h
@@ -1,83 +1,86 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MMPA_TYPEDEF_WIN_H
 #define MMPA_TYPEDEF_WIN_H

 #ifdef __cplusplus
 #if __cplusplus
 extern "C" {
 #endif  // __cpluscplus
 #endif  // __cpluscplus

 #ifndef FALSE
 #define FALSE 0
 #endif

 #ifndef TRUE
 #define TRUE 1
 #endif

 #define EN_OK 0
 #define EN_ERR 1
 #define EN_ERROR (-1)
 #define EN_INVALID_PARAM (-2)
 #define EN_TIMEOUT (-3)

 #define HANDLE_INVALID_VALUE (-1)
 #define INVALID_SOCKET_HANDLE INVALID_SOCKET
 #define MMPA_MEM_MAX_LEN (0x7fffffff)
 #define MMPA_PROCESS_ERROR (0x7fffffff)

 #define MMPA_ONE_THOUSAND 1000
 #define MMPA_COMPUTER_BEGIN_YEAR 1900
 #define SUMMER_TIME_OR_NOT (-1)
 #define MMPA_ZERO 0
 #define MMPA_VALUE_ONE 1
 #define MMPA_SOCKET_MAIN_EDITION 2
 #define MMPA_SOCKET_SECOND_EDITION 0
 #define MMPA_PIPE_BUF_SIZE 1024
 #define MMPA_MAX_SCANDIR_COUNT 1024
 #define MAX_IOVEC_SIZE 32
 #define MMPA_PIPE_COUNT 2
 #define MMPA_THREADNAME_SIZE 16
 #define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
 #define MMPA_MIN_OS_VERSION_SIZE 64

 #define MMPA_MAX_NI 19
 #define MMPA_MIDDLE_NI 5
 #define MMPA_LOW_NI (-5)
 #define MMPA_MIN_NI (-20)
 #define MMPA_MAX_FILE 128

 #define MMPA_MAX_THREAD_PIO 99
 #define MMPA_MIDDLE_THREAD_PIO 66
 #define MMPA_LOW_THREAD_PIO 33
 #define MMPA_MIN_THREAD_PIO 1

 #define MMPA_THREAD_SCHED_RR 0
 #define MMPA_THREAD_SCHED_FIFO 0
 #define MMPA_THREAD_SCHED_OTHER 0
 #define MMPA_THREAD_MIN_STACK_SIZE 0

 #define MM_MUTEX_INITIALIZER NULL

 #ifdef __cplusplus
 #if __cplusplus
 }
 #endif  // __cpluscplus
 #endif  // __cpluscplus
 #endif  // _MMPA_TYPEDEF_WIN_H_
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef MMPA_TYPEDEF_WIN_H
 #define MMPA_TYPEDEF_WIN_H

 #ifdef __cplusplus
 #if __cplusplus
 extern "C" {
 #endif  // __cpluscplus
 #endif  // __cpluscplus

 #ifndef FALSE
 #define FALSE 0
 #endif

 #ifndef TRUE
 #define TRUE 1
 #endif

 #define EN_OK 0
 #define EN_ERR 1
 #define EN_ERROR (-1)
 #define EN_INVALID_PARAM (-2)
 #define EN_TIMEOUT (-3)

 #define HANDLE_INVALID_VALUE (-1)
 #define INVALID_SOCKET_HANDLE INVALID_SOCKET
 #define MMPA_MEM_MAX_LEN (0x7fffffff)
 #define MMPA_PROCESS_ERROR (0x7fffffff)

 #define MMPA_ONE_THOUSAND 1000
 #define MMPA_COMPUTER_BEGIN_YEAR 1900
 #define SUMMER_TIME_OR_NOT (-1)
 #define MMPA_ZERO 0
 #define MMPA_VALUE_ONE 1
 #define MMPA_SOCKET_MAIN_EDITION 2
 #define MMPA_SOCKET_SECOND_EDITION 0
 #define MMPA_PIPE_BUF_SIZE 1024
 #define MMPA_MAX_SCANDIR_COUNT 1024
 #define MAX_IOVEC_SIZE 32
 #define MMPA_PIPE_COUNT 2
 #define MMPA_THREADNAME_SIZE 16
 #define MMPA_MIN_OS_NAME_SIZE (MAX_COMPUTERNAME_LENGTH + 1)
 #define MMPA_MIN_OS_VERSION_SIZE 64

 #define MMPA_MAX_NI 19
 #define MMPA_MIDDLE_NI 5
 #define MMPA_LOW_NI (-5)
 #define MMPA_MIN_NI (-20)
 #define MMPA_MAX_FILE 128

 #define MMPA_PATH_SEPARATOR_STR "\\"
 #define MMPA_PATH_SEPARATOR_CHAR '\\'

 #define MMPA_MAX_THREAD_PIO 99
 #define MMPA_MIDDLE_THREAD_PIO 66
 #define MMPA_LOW_THREAD_PIO 33
 #define MMPA_MIN_THREAD_PIO 1

 #define MMPA_THREAD_SCHED_RR 0
 #define MMPA_THREAD_SCHED_FIFO 0
 #define MMPA_THREAD_SCHED_OTHER 0
 #define MMPA_THREAD_MIN_STACK_SIZE 0

 #define MM_MUTEX_INITIALIZER NULL

 #ifdef __cplusplus
 #if __cplusplus
 }
 #endif  // __cpluscplus
 #endif  // __cpluscplus
 #endif  // _MMPA_TYPEDEF_WIN_H_
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
@@ -1,18 +1,12 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /*
 * @file mmpa_win.h
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2019-2021. All Rights Reserved.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 */

 #ifndef MMPA_WIN_MMPA_WIN_H
 #define MMPA_WIN_MMPA_WIN_H
--- a/third_party/fwkacllib/inc/ops/OWNERS
+++ b/third_party/fwkacllib/inc/ops/OWNERS
@@ -0,0 +1,65 @@
 approvers:
 - gegenhua
 - qiaohairong
 reviewers:
 - chuqingxi
 - wang-jintang
 - luanma_bl
 - chen-kang30
 - li-xulong
 - Allan_Yu
 - minshen
 - pan-jixing
 - yl_wang
 - lijie176
 - mabing726
 - miao-fangzheng
 - huang-qiang002
 - su-yueming
 - chenpeng-hw
 - wang_jianle
 - luanma_bl
 - LDLD0524
 - wywismygod2020
 - lipeiyang3699
 - koala-zhang
 - zhu-jingjing
 - zhaozhihui5
 - simbaliuxx
 - lyxyz
 - zhou-qilong
 - block0219
 - hanfuwei
 - xchu42
 - sheng-nan
 - yangjing88
 - alexlak
 - xig514
 - jellylj
 - brightlyking
 - liuzhenyuhw
 - djh602
 - wangjiangben_hw
 - li1jie
 - clinglai
 - liujun2014
 - soupkey
 - wu-shengji
 - cimeng
 - ccl_ligang
 - xiaozhedeng
 - granpad7
 - tc1qaz
 - Ronnie_zheng
 - xiexianhu
 - zhouyujoe
 - zhaoping12
 - tanshengshun
 - fanqirui
 - xu-binglin
 - yangyang016
 - zhangzhongzt
 - gegenhua
 - qiaohairong
 options:
  no_parent_owners: true
--- a/third_party/fwkacllib/inc/ops/array_ops.h
+++ b/third_party/fwkacllib/inc/ops/array_ops.h
@@ -745,6 +745,28 @@ REG_OP(UnsqueezeV2)
    .ATTR(axis, ListInt, {})
    .OP_END_FACTORY_REG(UnsqueezeV2)


 /**
 *@brief Inserts a dimension of 1 into a tensor's shape. Only the tensor shape
 is changed, but the data is not changed. \n

 *@par Inputs:
 *x: A tensor.
 *axes: A list of int64, which indicates the dimensions to be inserted. \n

 *@par Outputs:
 *y: Reshape tensor with same data as input. \n

 *@par Third-party framework compatibility
 *Compatible with the Onnx operator Unsqueeze in V13. \n
 */

 REG_OP(UnsqueezeV3)
    .INPUT(x, TensorType::ALL())
    .INPUT(axes, ListInt)
    .OUTPUT(y, TensorType::ALL())
    .OP_END_FACTORY_REG(UnsqueezeV3)

 /**
 *@brief Reshapes a tensor. Only the tensor shape is changed, without changing the data. \n

@@ -821,6 +843,28 @@ REG_OP(SqueezeV2)
    .ATTR(axis, ListInt, {})
    .OP_END_FACTORY_REG(SqueezeV2)

 /**
 *@brief Removes dimensions of size 1 from the shape of a tensor according to axes. \n

 *@par Inputs:
 *x: A tensor.
 *axes: An optional list of int64. If not specified, squeezes all dimensions of
 size 1. If specified, only squeezes the dimensions listed. It is an error to
 squeeze a dimension that is not 1. \n 

 *@par Outputs:
 *y: Reshape tensor with same data as input. \n

 *@par Third-party framework compatibility
 *Compatible with the onnx operator Squeeze in V13. \n
 */

 REG_OP(SqueezeV3)
    .INPUT(x, TensorType::ALL())
    .OPTIONAL_INPUT(axes, ListInt)
    .OUTPUT(y, TensorType::ALL())
    .OP_END_FACTORY_REG(SqueezeV3)

 /**
 *@brief Returns an integer representing the rank of input tensor. The rank of a tensor is the number of indices required to uniquely select each element of the tensor, that is, the dimension size of the tensor. \n

@@ -1273,7 +1317,7 @@ REG_OP(SortV2)
 * @par Inputs:
 * One inputs, including:
 * @li x: A Tensor. Must be one of the following types:
 *     float16, float32, int32, int8 ,uint8. \n
 *     float16, float32, int32, int8, uint8, bool. \n
 * @li shape: A Tensor to specify the shape that the input tensor expanded to. \n

 * @par Outputs:
@@ -1284,9 +1328,9 @@ REG_OP(SortV2)
 */

 REG_OP(Expand)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL}))
    .INPUT(shape, TensorType({DT_INT16, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32,DT_INT64, DT_INT8, DT_UINT8, DT_BOOL}))
    .OP_END_FACTORY_REG(Expand)

 /**
@@ -1342,13 +1386,37 @@ REG_OP(NonZeroWithValue)
    .ATTR(dtype, Type, DT_INT32)
    .OP_END_FACTORY_REG(NonZeroWithValue)



 /**
 *@Returns a tensor with updated shape from NonZeroWithValue. \n

 *@par Inputs:
 *value: A Tensor. The output of NonZeroWithValue. \n
 *index: A Tensor. The output of NonZeroWithValue. \n
 *count: A Tensor. The type is INT32, means count for non_zero ele in input. \n

 * out_value: A Tensor. Has the same type as "value" . \n
 * out_index: A Tensor. Has the same type as "index". \n
 */
 REG_OP(NonZeroWithValueShape)
    .INPUT(value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
                            DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
    .INPUT(index, TensorType({DT_INT32}))
    .INPUT(count, TensorType({DT_INT32}))
    .OUTPUT(out_value, TensorType({DT_DOUBLE, DT_FLOAT, DT_FLOAT16, DT_INT8, DT_UINT8, DT_INT16,
                            DT_UINT16, DT_INT32, DT_UINT32, DT_INT64, DT_UINT64, DT_BOOL}))
    .OUTPUT(out_index, TensorType({DT_INT32}))
    .OP_END_FACTORY_REG(NonZeroWithValueShape)


 /**
 * @brief Expand the input tensor to a compatible shape. \n

 * @par Inputs:
 * One inputs, including:
 * x: A Tensor. Must be one of the following types:
 *     float16, float32, int32, int8 ,uint8. \n
 *     float16, float32, int32, int8, uint8, bool. \n

 * @par Attributes:
 * shape: A required listInt to specify the shape that the input tensor expanded to. \n
@@ -1362,8 +1430,8 @@ REG_OP(NonZeroWithValue)
 */

 REG_OP(ExpandD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT8, DT_UINT8, DT_BOOL}))
    .REQUIRED_ATTR(shape, ListInt)
    .OP_END_FACTORY_REG(ExpandD)

@@ -1404,6 +1472,43 @@ REG_OP(UpdateTensorDesc)
                           DT_INT64, DT_UINT64, DT_INT16, DT_UINT16, DT_DOUBLE}))
    .REQUIRED_ATTR(shape, ListInt)
    .OP_END_FACTORY_REG(UpdateTensorDesc)

 /**
 *@brief Queue data for other operators. \n
 *@par Attributes:
 *index: Index of the input tensor.The data type must be int32 or int64.
 Assume that net has three data nodes, one should be set 0, another should
 be set 1, and the left should be set 2. \n
 *queue_name: queue name
 *output_types: types of outputs data
 *output_shapes: shapes of outputs data
 *@par Outputs:
 *y: A DT_UINT8 tensor. \n
 */
 REG_OP(QueueData)
    .OUTPUT(y, TensorType({DT_UINT8}))
    .ATTR(index, Int, 0)
    .ATTR(queue_name, String, "")
    .ATTR(output_types, ListType, {})
    .ATTR(output_shapes, ListListInt, {{}, {}})
    .OP_END_FACTORY_REG(QueueData)

 /**
 * @brief Ensures that the tensor's shape matches the expected shape. \n
 * @par Inputs:
 * x: A Tensor. \n
 * @par Attributes:
 * shape:  The shape that needs to be checked \n
 * @par Outputs:
 * y: A tensor. \n
 */
 REG_OP(EnsureShape)
    .INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
                            DT_FLOAT,DT_DOUBLE}))
    .OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
                            DT_FLOAT,DT_DOUBLE}))
    .REQUIRED_ATTR(shape, ListInt)
    .OP_END_FACTORY_REG(EnsureShape)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_ARRAY_OPS_H_
--- a/third_party/fwkacllib/inc/ops/ctc_ops.h
+++ b/third_party/fwkacllib/inc/ops/ctc_ops.h
@@ -146,7 +146,7 @@ REG_OP(CTCBeamSearchDecoder)
 *@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
                and C = number of classes (including blank).
                It represent the logarithmized probabilities of the outputs.
 *@li targets: Tensor of size (N, S), where S= max target length.
 *@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length.
             It represent the target sequences.
 *@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
 *@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
@@ -159,11 +159,12 @@ REG_OP(CTCBeamSearchDecoder)
 *@li blank : Blank label. Default 0.
 *@li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
 *@li zero_infinity : Whether to zero infinite losses and the associated gradients.
 *@li label_max : The max length of targets.

 *@par Third-party framework compatibility
 * Compatible with Pytorch CTCLoss operator.

 *@par Restrictions:
 *@attention Constraints:
 *The limit of Label’s length is 1K.
 */
 REG_OP(CTCLossV2)
@@ -176,6 +177,7 @@ REG_OP(CTCLossV2)
    .ATTR(blank, Int, 0)
    .ATTR(reduction, String, "mean")
    .ATTR(zero_infinity, Bool, false)
    .ATTR(label_max, Int, 0)
    .OP_END_FACTORY_REG(CTCLossV2)

 /**
@@ -186,7 +188,7 @@ REG_OP(CTCLossV2)
 *@li log_probs: Tensor of size (T, N, C), where T =input length, N =batch size,
                and C = number of classes (including blank).
                It represent the logarithmized probabilities of the outputs.
 *@li targets: Tensor of size (N, S), where S= max target length.
 *@li targets: Tensor of size (N, S) or sum(target_lengths), where S = max target length.
             It represent the target sequences.
 *@li input_lengths: Tuple or tensor of size (N). It represent the lengths of the inputs.
 *@li target_lengths: Tuple or tensor of size (N). It represent lengths of the targets.
@@ -200,11 +202,12 @@ REG_OP(CTCLossV2)
 *@li blank : Blank label. Default 0.
 *@li reduction: Specifies the reduction to apply to the output. Default: 'mean'.
 *@li zero_infinity : Whether to zero infinite losses and the associated gradients.
 *@li label_max : The max length of targets.

 *@par Third-party framework compatibility
 * Compatible with Pytorch CTCLoss operator.

 *@par Restrictions:
 *@attention Constraints:
 *The limit of Label’s length is 1K.
 */
 REG_OP(CTCLossV2Grad)
@@ -219,6 +222,7 @@ REG_OP(CTCLossV2Grad)
    .ATTR(blank, Int, 0)
    .ATTR(reduction, String, "mean")
    .ATTR(zero_infinity, Bool, false)
    .ATTR(label_max, Int, 0)
    .OP_END_FACTORY_REG(CTCLossV2Grad)
 }  // namespace ge

--- a/third_party/fwkacllib/inc/ops/data_flow_ops.h
+++ b/third_party/fwkacllib/inc/ops/data_flow_ops.h
@@ -2398,6 +2398,32 @@ REG_OP(DynamicGetNext)
  .ATTR(_getnext_inputs_shape_range, String, "")
  .OP_END_FACTORY_REG(DynamicGetNext)

 /**
@brief DynamicGetNextV2, dynamic get next data
 * @par Inputs:
 *x: the iterator, all types are available
 * @par Outputs:
 * y: the date in iterator, all types are available
 * @par Attributes:
 * output_types: types of all outputs
 * output_shapes: shapes of all outputs
 *_dynamic_graph_execute_mode: dynamic graph execution mode,
 value is one of lazy_recompile and dynamic_execute
 *_getnext_inputs_shape_range: shape ranges of outputs,
 it works where _dynamic_graph_execute_mode is dynamic_execute
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */

 REG_OP(DynamicGetNextV2)
  .DYNAMIC_OUTPUT(y, TensorType::ALL())
  .ATTR(output_types, ListType, {})
  .ATTR(channel_name, String, "")
  .ATTR(output_shapes, ListListInt, {{}, {}})
  .ATTR(_dynamic_graph_execute_mode, String, "lazy_recompile")
  .ATTR(_getnext_inputs_shape_range, String, "")
  .OP_END_FACTORY_REG(DynamicGetNextV2)

 /**
 *@brief AdpGetNext
 *@par Outputs:
@@ -2433,5 +2459,24 @@ REG_OP(GetNextV2)
  .ATTR(output_shapes, ListListInt, {{}, {}})
  .ATTR(channel_name, String, "")
  .OP_END_FACTORY_REG(GetNextV2)

 /**
 *@brief GetNextFromQueue
 *@par Inputs:
 *x: the data, only support uint8
 *@par Outputs:
 *y: the data in iterator, all types are available
 *@par Attributes:
 *output_types: types of all outputs
 *output_shapes: shapes of all outputs
 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(GetNextFromQueue)
  .INPUT(x, TensorType({DT_UINT8}))
  .DYNAMIC_OUTPUT(y, TensorType::ALL())
  .ATTR(output_types, ListType, {})
  .ATTR(output_shapes, ListListInt, {{}, {}})
  .OP_END_FACTORY_REG(GetNextFromQueue)
 } // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DATA_FLOW_OPS_H_
--- a/third_party/fwkacllib/inc/ops/deep_md.h
+++ b/third_party/fwkacllib/inc/ops/deep_md.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 * CCopyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -24,6 +24,87 @@
 #include "graph/operator_reg.h"

 namespace ge {

 /**
 * @brief Calculate TabulateFusion. \n
 *
 * @par Inputs:
 * Five inputs, including:
 * @li table: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li table_info: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li em_x: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li em: A Tensor. Must be one of the following types: float16, float32, float64. \n
 *
 * @par Outputs:
 * descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n
 *
 * @par Attributes:
 * Three attributes, including:
 * @li last_layer_size: int value.
 * @li split_count: int value.
 * @li split_index: int value. \n
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(TabulateFusion)
    .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(last_layer_size, Int)
    .ATTR(split_count, Int, 1)
    .ATTR(split_index, Int, 0)
    .OP_END_FACTORY_REG(TabulateFusion)

 /**
 * @brief Calculate ProdEnvMatA. \n
 *
 * @par Inputs:
 * @li coord: A Tensor. Must be one of the following types: float32, float64.
 * @li type: A Tensor. Must be one of the following types: int32.
 * @li natoms: A Tensor. Must be one of the following types: int32.
 * @li box: A Tensor. Must be one of the following types: float32, float64.
 * @li mesh: A Tensor. Must be one of the following types: int32.
 * @li davg: A Tensor. Must be one of the following types: float32, float64.
 * @li dstd: A Tensor. Must be one of the following types: float32, float64.
 *
 * @par Outputs:
 * descrpt: A Tensor. Must be one of the following types: float32, float64.
 * descrpt_deriv: A Tensor. Must be one of the following types: float32, float64.
 * rij: A Tensor. Must be one of the following types: float32, float64.
 * nlist: A Tensor. Must be one of the following types: int32. \n
 *
 * @par Attributes:
 * @li rcut_a: A Float.
 * @li rcut_r: A Float.
 * @li rcut_r_smth: A Float.
 * @li sel_a: A ListInt.
 * @li split_count: A Int.
 * @li split_index: A Int.\n
 *
 */
 REG_OP(ProdEnvMatA)
    .INPUT(coord, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(type, TensorType({DT_INT32}))
    .INPUT(natoms, TensorType({DT_INT32}))
    .INPUT(box, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(mesh, TensorType({DT_INT32}))
    .INPUT(davg, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(dstd, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(descrpt, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(descrpt_deriv, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(rij, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(nlist, TensorType({DT_INT32}))
    .ATTR(rcut_a, Float, 1.0)
    .ATTR(rcut_r, Float, 1.0)
    .ATTR(rcut_r_smth, Float, 1.0)
    .ATTR(sel_a, ListInt, {})
    .ATTR(sel_r, ListInt, {})
    .ATTR(split_count, Int, 1)
    .ATTR(split_index, Int, 0)
    .OP_END_FACTORY_REG(ProdEnvMatA)
 /**
 * @brief Calculate ProdForceSeA. \n
 *
@@ -53,7 +134,80 @@ REG_OP(ProdForceSeA)
    .OUTPUT(atom_force, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(n_a_sel, Int)
    .REQUIRED_ATTR(n_r_sel, Int)
    .ATTR(split_count, Int, 1)
    .ATTR(split_index, Int, 0)
    .OP_END_FACTORY_REG(ProdForceSeA)

 /**
 * @brief Calculate ProdVirialSeA. \n
 *
 * @par Inputs:
 * Five inputs, including:
 * @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li rij: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li nlist: A Tensor. dtype is int32.
 * @li natoms: A Tensor. dtype is int32. \n
 *
 * @par Outputs:
 * Two outputs, including:
 * @li virial: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n
 *
 * @par Attributes:
 * Two attributes, including:
 * @li n_a_sel: Int value.
 * @li n_r_sel: Int value.
 * @li split_count: Int value.
 * @li split_index: Int value. \n
 */
 REG_OP(ProdVirialSeA)
    .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(nlist, TensorType({DT_INT32}))
    .INPUT(natoms, TensorType({DT_INT32}))
    .OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(n_a_sel, Int)
    .REQUIRED_ATTR(n_r_sel, Int)
    .ATTR(split_count, Int, 1)
    .ATTR(split_index, Int, 0)
    .OP_END_FACTORY_REG(ProdVirialSeA)

 /**
 * @brief Calculate TabulateFusionGrad. \n
 *
 * @par Inputs:
 * Five inputs, including:
 * @li table: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li table_info: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li em_x: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li em: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li dy: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li descriptor: A Tensor. Must be one of the following types: float16, float32, float64. \n
 *
 * @par Outputs:
 * @li dy_dem_x: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li dy_dem: A Tensor. Must be one of the following types: float16, float32, float64. \n
 *
 * @par Attributes:
 * Two attributes, including:
 * @li split_count: A Scalar. 
 * @li split_index: A Scalar. \n
 */
 REG_OP(TabulateFusionGrad)
  .INPUT(table, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .INPUT(table_info, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .INPUT(em_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .INPUT(em, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .INPUT(dy, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .INPUT(descriptor, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .OUTPUT(dy_dem_x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .OUTPUT(dy_dem, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
  .ATTR(split_count, Int, 1)
  .ATTR(split_index, Int, 0)
  .OP_END_FACTORY_REG(TabulateFusionGrad)
 } // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_DEEP_MD_H_
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -331,7 +331,7 @@ REG_OP(Sub)

 *@par Inputs:
 *One input, including: \n
 *x: A Tensor. Must be one of the following types: float16, float32, double, int32, int64. \n
 *x: A Tensor. Must be one of the following types: float16, float32, double, int8, int16, int32, int64. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x". \n
@@ -340,8 +340,10 @@ REG_OP(Sub)
 *Compatible with the TensorFlow operator Abs.
 */
 REG_OP(Abs)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32, DT_INT64}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16,
                          DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8, DT_INT16,
                           DT_INT32, DT_INT64}))
    .OP_END_FACTORY_REG(Abs)

 /**
@@ -3821,6 +3823,10 @@ REG_OP(CosineSimilarity)
 * @li max_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li global_grad_norm: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li weight_decay: A Tensor. Datatype is same as exp_avg. Shape (1, ).\n
 * @li step_size: A Optional Tensor. Datatype is same as exp_avg. Shape (1, ).\n

 * @par Attributes:
 * @li adam_mode: An optional bool. Defaults to "adam". \n

 *@par Outputs:
 *three inputs, including:
@@ -3840,9 +3846,11 @@ REG_OP(ApplyAdamV2)
    .INPUT(max_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(global_grad_norm, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .INPUT(weight_decay, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OPTIONAL_INPUT(step_size, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(var, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(m, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .OUTPUT(v, TensorType({ DT_FLOAT, DT_FLOAT16 }))
    .ATTR(adam_mode, String, "adam")
    .OP_END_FACTORY_REG(ApplyAdamV2)
 }  // namespace ge

--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -132,7 +132,7 @@ nearest neighbor sampling to a common output size specified by crop_size . \n
 *@li x:A Tensor. Must be one of the following types:uint8, uint16, int8,
 int16, int32, int64, float16, float, double. A 4-D tensor of shape
 [batch, image_height, image_width, depth]. The format must be NHWC.
 *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
 *@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4].
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with
 int32 values in [0, batch).
 *@li crop_size: A Tensor of type int32. A 1-D tensor of 2 elements, crop_size
@@ -146,7 +146,7 @@ extrapolation, when applicable.
 NearestNeighbor . \n

 *@par Outputs:
 *y:A Tensor of type float. The format must be NHWC. \n
 *y: A Tensor. Must be one of the following types: float16, float. The format must be NHWC. \n

 *@attention Constraints:
 *Input images must be a 4-D tensor . \n
@@ -158,10 +158,10 @@ NearestNeighbor . \n
 REG_OP(CropAndResize)
    .INPUT(x, TensorType({DT_UINT8, DT_UINT16, DT_INT8, \
        DT_INT16, DT_INT32, DT_INT64, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(boxes, TensorType({DT_FLOAT}))
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(box_index, TensorType({DT_INT32}))
    .INPUT(crop_size, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(extrapolation_value, Float, 0)
    .ATTR(method, String, "bilinear")
    .OP_END_FACTORY_REG(CropAndResize)
@@ -175,7 +175,7 @@ REG_OP(CropAndResize)
 *Input images must be a 5HD tensor. Inputs include:
 *@li x:A Tensor. Must be one of the following types:float16, float. A 5HD tensor of shape
 * [batch, C1, image_height, image_width, C0].
 *@li boxes: A Tensor of type float. A 2-D tensor of shape [num_boxes, 4].
 *@li boxes: A Tensor. Must be one of the following types: float16, float. A 2-D tensor of shape [num_boxes, 4].
 *@li box_index: A Tensor of type int32. A 1-D tensor of shape [num_boxes] with int32 values in [0, batch) . \n

 *@par Attributes:
@@ -184,7 +184,7 @@ REG_OP(CropAndResize)
 *@li method: An optional string from: '"bilinear"'. Defaults to "bilinear" . \n

 *@par Outputs:
 *y:A Tensor of type float . \n
 *y: A Tensor. Must be one of the following types: float16, float. \n

 *@attention Constraints:
 *Input images must be a 5HD tensor . \n
@@ -197,9 +197,9 @@ REG_OP(CropAndResize)
 */
 REG_OP(CropAndResizeD)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(boxes, TensorType({DT_FLOAT}))
    .INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(box_index, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(crop_size, ListInt)
    .ATTR(extrapolation_value, Float, 0)
    .ATTR(method, String, "bilinear")
@@ -888,10 +888,10 @@ Defaults to false . \n
 *@li half_pixel_centers: An optional bool. Defaults to False . \n

 *@par Outputs:
 *y: 4-D with shape [batch, new_height, new_width, channels] . \n
 *y: A Tensor with the same type and format as input "images" . \n

 *@par Third-party framework compatibility
 *Compatible with tensorflow ResizeNearestNeighborV2 operator.
 *Compatible with tensorflow ResizeNearestNeighbor operator.
 */

 REG_OP(ResizeNearestNeighborV2)
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -378,7 +378,7 @@ to each component of an element of this dataset.
 REG_OP(GetNext)
    .DYNAMIC_OUTPUT(y, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64,
                                        DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL}))
    .ATTR(output_types, ListInt, {})
    .ATTR(output_types, ListType, {})
    .ATTR(output_shapes, ListListInt, {})
    .ATTR(output_num, Int, 1)
    .ATTR(channel_name, String, "")
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -213,9 +213,9 @@ REG_OP(GEMM)
 */

 REG_OP(BatchMatMul)
    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
    .ATTR(adj_x1, Bool, false)
    .ATTR(adj_x2, Bool, false)
    .OP_END_FACTORY_REG(BatchMatMul)
@@ -246,11 +246,11 @@ REG_OP(BatchMatMul)
 */

 REG_OP(BatchMatMulV2)
    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4}))
    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
    .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8, DT_INT4, DT_BF16}))
    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_BF16}))
    .ATTR(adj_x1, Bool, false)
    .ATTR(adj_x2, Bool, false)
    .ATTR(offset_x, Int, 0)
@@ -505,17 +505,17 @@ REG_OP(ScatterElements)
 * Three inputs, including:
 *@li var: An ND Tensor .

 *Must be one of the following types: float16, float32, int32, int8, uint8
 *@li indices: An ND Tensor of type int32 or int64

 *Must be one of the following types: float16, float, int32, int8, uint8
 *@li indices: An ND Tensor . \n

 *@li updates: An Tensor. format:NCHW, NHWC .
 *Must be one of the following types: int32 or int64
 *@li updates: An ND Tensor .

 *Must be one of the following types: float16, float32, int32, int8, uint8
 *Must be one of the following types: float16, float, int32, int8, uint8

 *@par Attributes:
 * use_locking: An optional bool. Defaults to "False". If "True", the operation
 * will be protected by a lock . \n
 *use_locking: An optional bool. Defaults to "False". If "True",
 * the operation will be protected by a lock . \n

 *@par Outputs:
 *var: A Tensor. Has the same type and format as input "var" . \n
@@ -792,13 +792,13 @@ REG_OP(DiagPart)
 * Four inputs, including:
 *@li x: A Tensor of type float16, int8.
 *@li w: A weight matrix of type float16, int8.
 *@li b: A Tensor of type float16, int32, float32.
 *@li offset_w: A Tensor of type int8 . \n
 *@li b: An optional Tensor of type float16, int32, float32.
 *@li offset_w: An optional Tensor of type int8. Reserved. Only None Supported. \n

 *@par Attributes:
 *@li num_output: Reserved.
 *@li num_output: Required. An int, output neuron number. Reserved.
 *@li transpose: A bool, specifying weight whether to transpose input w, either "true" or "false". Defaults to "false".
 *@li axis: Optional. A int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
 *@li axis: Optional. An int, 1 or 2, specifying which dimension the input "K" starts from. Defaults to 1.
 * The product of the subsequent dimensions starting form first dimension or the second dimension is "K".
 *@li offset_x: An optional integer for quantized FullyConnection.
 *The negative offset added to the input image for int8 type. Ensure offset_x within the
@@ -814,11 +814,11 @@ REG_OP(DiagPart)
 * Yes
 */
 REG_OP(FullyConnection)
    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
    .INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16}))
    .INPUT(w, TensorType({DT_FLOAT16, DT_INT8, DT_INT4, DT_FLOAT32, DT_BF16}))
    .OPTIONAL_INPUT(b, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16}))
    .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8, DT_INT4}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32,DT_FLOAT32, DT_BF16}))
    .REQUIRED_ATTR(num_output, Int)
    .ATTR(transpose, Bool, false)
    .ATTR(axis, Int, 1)
@@ -1360,6 +1360,45 @@ REG_OP(FillDiagonal)
    .ATTR(wrap, Bool, false)
    .OP_END_FACTORY_REG(FillDiagonal)

 /**
 *@brief: Returns the sum of the elements of the diagonal of the input 2-D matrix. \n

 *@par Inputs:
 *x: A Tensor. Must be one of the following types:
 *    float16, float. \n

 *@par Outputs:
 *y: A Tensor. Has the same type as "x" . \n

 *@par Third-party framework compatibility
 * Compatible with the Pytorch operator Trace.
 */

 REG_OP(Trace)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(Trace)

 /**
 *@brief  Computes the generalized inverse of any matrix. \n

 *@par Inputs:
 * @li x: input matrix. Must be one of the following types:
 *     double, float. \n

 *@par Attributes:
 * @li rcond: An optional float >= 0 or inf. Defaults to 1e-15. \n

 *@par Outputs:
 * y: A Tensor with the same type and shape of x's transpose. \n

 */
 REG_OP(Pinverse)
    .INPUT(x, TensorType({ DT_FLOAT, DT_DOUBLE }))        
    .OUTPUT(y, TensorType({ DT_FLOAT, DT_DOUBLE })) 
    .ATTR(rcond, Float, 1e-15)
    .OP_END_FACTORY_REG(Pinverse)

 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -142,6 +142,74 @@ REG_OP(BatchNorm)
    .ATTR(is_training, Bool, true)
    .OP_END_FACTORY_REG(BatchNorm)

 /**
 * @brief After the mean and reciprocal of standard deviation(invert_std) are separately calculated on each device,
 * the mena and reciprocal of standard deviation(invert_std) data on each device are normlized,
 * a total mean and reciprocal of standard deviation(invert_std) are returned, and running_var are updated.

 * @par Inputs:
 * include:
 * @li mean_all: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
 * @li invert_std_all: A Tensor. Reciprocal of the variances of each device. Must be one of the following types: float16, float32.
 * @li count_all: A Tensor. Number of data for each device. Must be one of the following types: float16, float32.
 * @li mean_broadcast: A Tensor. The overall average and broadcast. Must be one of the following types: float16, float32.
 * @li count_sum: A Tensor. General statistics. Must be one of the following types: float16, float32.
 * @li running_var: A Tensor. Runtime variance. Must be one of the following types: float16, float32. \n

 * @par Attributes:
 * Two Attributes, including:
 * @li momentum: A optional float. Defaults to 0.01. \n
 * @li epsilon: An optional float. Defaults to 0.00001. \n

 * @par Outputs:
 * include:
 * @li invert_std: A Tensor. It's inverse of total variance.
 * @li running_var_update: A Tensor. It's moving variance of each device after the update. \n

 * @par Third-party framework compatibility
 * ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
 * compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
 */
 REG_OP(SyncBatchNormGatherStatsWithCounts)
    .INPUT(mean_all, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(invert_std_all, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(count_all, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(mean_broadcast, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(running_var, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(invert_std, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(running_var_update, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(momentum, Float, 0.1)
    .ATTR(epsilon, Float, 0.001)
    .OP_END_FACTORY_REG(SyncBatchNormGatherStatsWithCounts)

 /**
 * @brief update running_mean.

 * @par Inputs:
 * include:
 * @li mean: A Tensor. The mean of each device. Must be one of the following types: float16, float32.
 * @li running_mean: A Tensor. Runtime Mean. Must be one of the following types: float16, float32. \n

 * @par Attributes:
 * One Attribute, including:
 * @li momentum: A optional float. Defaults to 0.01. \n

 * @par Outputs:
 * include:
 * @li running_mean_update: A Tensor. It's moving mean of each device after the update. \n

 * @par Third-party framework compatibility
 * ReduceMeanWithCount and SyncBatchNormGatherStatsWithCounts and SyncBNTrainingUpdate
 * compatible with the Pytorch operator BatchNormGatherStatsWithCounts.
 */
 REG_OP(SyncBNTrainingUpdate)
    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(running_mean, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(running_mean_update, TensorType({DT_FLOAT, DT_FLOAT16}))
    .ATTR(momentum, Float, 0.1)
    .OP_END_FACTORY_REG(SyncBNTrainingUpdate)

 /**
 *@brief part of SyncBatchNormBackward . \n

--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -134,9 +134,9 @@ REG_OP(DepthwiseConv2DBackpropFilter)
 * instead.
 */
 REG_OP(DepthwiseConv2DBackpropFilterD)
    .INPUT(input, TensorType({float16}))
    .INPUT(out_backprop, TensorType({float16}))
    .OUTPUT(filter_grad, TensorType({float32}))
    .INPUT(input, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
    .INPUT(out_backprop, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
    .OUTPUT(filter_grad, TensorType({DT_FLOAT32}))
    .REQUIRED_ATTR(filter_size, ListInt)
    .REQUIRED_ATTR(strides, ListInt)
    .ATTR(dilations, ListInt, {1, 1, 1, 1})
@@ -764,7 +764,7 @@ REG_OP(Conv2DBackpropFilterD)
 |           | float32 | float32 | float32 | float32 |\n
 |           | int8    | int8    | int32   | int32   |\n
 | Format    | NCHW    | NCHW    | ND      | NCHW    |\n
 |           | NHWC    | HWCN    |         | NHWC    |\n
 |           | NHWC    | HWCN    | ND      | NHWC    |\n
 *\n
 * For float32 type, the actual calculation on the chip is based on
 * float16.
@@ -1650,5 +1650,43 @@ REG_OP(Dilation)
    .ATTR(padding_value, Float, 0.0)
    .OP_END_FACTORY_REG(Dilation)

 /**
 *@brief Computes the post-cube processing output with the expected input
 *@par Inputs:
 * Ten inputs:
 * x1: A Tensor of type float16, bfloat16, float32, int32
 * x2: A Tensor of type float16, int8, int4
 * quant_scale_0: A Tensor of type uint64
 * relu_weight_0: A Tensor of type float32
 * clip_value_0: A Tensor of type float16, int8, int4
 * quant_scale_1: A Tensor of type uint64
 * relu_weight_1: A Tensor of type float32
 * clip_value_1: A Tensor of type float16
 * anti_quant_scale: A Tensor of type float16
 * anti_quant_offset: A Tensor of type int8, int4
 *@par Attributes:
 * @li fusion_op_list: A list of String.
 * @li unit_list: A list of String
 * @li eltwise_mode: An optional string from "ADD", "SUB" and "".
 *@par Outputs:
 * output: A Tensor. A Tensor of type float16, bfloat16, float32, int32, int8, int4.
 */
 REG_OP(FixPipe)
    .INPUT(x1, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32}))
    .OPTIONAL_INPUT(x2, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
    .OPTIONAL_INPUT(quant_scale_0, TensorType({DT_UINT64}))
    .OPTIONAL_INPUT(relu_weight_0, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(clip_value_0, TensorType({DT_FLOAT16, DT_INT8, DT_INT4}))
    .OPTIONAL_INPUT(quant_scale_1, TensorType({DT_UINT64}))
    .OPTIONAL_INPUT(relu_weight_1, TensorType({DT_FLOAT}))
    .OPTIONAL_INPUT(clip_value_1, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(anti_quant_scale, TensorType({DT_FLOAT16}))
    .OPTIONAL_INPUT(anti_quant_offset, TensorType({DT_INT8, DT_INT4}))
    .OUTPUT(output, TensorType({DT_FLOAT16, DT_BF16, DT_FLOAT, DT_INT32, DT_INT8, DT_INT4}))
    .REQUIRED_ATTR(fusion_op_list, ListString)
    .REQUIRED_ATTR(unit_list, ListString)
    .ATTR(eltwise_mode, String, "")
    .OP_END_FACTORY_REG(FixPipe)

 }  // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_CALCULATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -1179,6 +1179,8 @@ REG_OP(SPP)
 * the index of the input feature map, "x1", "y1", "x2", or "y2" must be
 * greater than or equal to "0.0".
 * roi_max_num must be less than or equal to 6000 and must be divided by 16.
 * The input data of the rois cannot exceed the width and height range of the x,
 * otherwise, the accuracy of the output result may not be as expected.
 *@li roi_actual_num: A  optional tensor of type int32, with shape [batch, 8], specifying
 * the number of ROIs per batch . \n

@@ -2076,7 +2078,7 @@ REG_OP(GIoUGrad)
 * trans: An optional attr, true for 'xyxyt', false for 'xywht'.

 *@par Outputs:
 * overlaps: A 3D Tensor of type float16 or float32 with shape [B, N, K].
 * overlaps: A 3D Tensor of type float32 with shape [B, N, K].

 *@attention Constraints:
 * In each batch, the invalid box cannot appear before the valid box.
@@ -2087,6 +2089,100 @@ REG_OP(RotatedOverlaps)
    .OUTPUT(overlaps, TensorType({DT_FLOAT}))
    .ATTR(trans, Bool, false)
    .OP_END_FACTORY_REG(RotatedOverlaps)

 /**
 *@brief RotatedIou . \n

 *@par Inputs:
 *@li boxes : data of grad increment, a 3D Tensor of type float32 with
 * shape (B, 5, N). "N" indicates the number of boxes, and the value
 * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].
 *@li query_boxes: Bounding boxes, a 3D Tensor of type float32 with
 * shape (B, 5, K). "K" indicates the number of boxes, and the value
 * "5" refers to [x1, y1, x2, y2, theta] or [x, y, w, h, theta].

 *@par Attributes:
 *@li trans: An optional attr, true for 'xyxyt', false for 'xywht'.
 *@li mode: An optional attr, a character string with the value range of ['iou', 'iof'],
 * only support 'iou' now.
 *@li is_cross: Cross calculation when it is True, and one-to-one calculation when it is False.
 *@li v_threshold: An optional attr, provide condition relaxation for intersection calculation.
 *@li e_threshold: An optional attr, provide condition relaxation for intersection calculation.

 *@par Outputs:
 * iou: A 3D Tensor of float32 with shape [B, N, K].

 *@attention Constraints:
 * In each batch, the invalid box cannot appear before the valid box.
 */
 REG_OP(RotatedIou)
    .INPUT(boxes, TensorType({DT_FLOAT}))
    .INPUT(query_boxes, TensorType({DT_FLOAT}))
    .OUTPUT(iou, TensorType({DT_FLOAT}))
    .ATTR(trans, Bool, false)
    .ATTR(mode, String, "iou")
    .ATTR(is_cross, Bool, true)
    .ATTR(v_threshold, Float, 0)
    .ATTR(e_threshold, Float, 0)
    .OP_END_FACTORY_REG(RotatedIou)

 /**
 *@brief RotatedBoxEncode. \n

 *@par Inputs:
 * Two inputs, including:
 *@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
 * "B" indicates the number of batch size
 * "N" indicates the number of bounding boxes, and the value "5" refers to
 * "x0", "x1", "y0", "y1" and "angle".
 *@li gt_box: A 3D Tensor of float32 (float16) with shape (B, 5, N). 
 * "B" indicates the number of batch size 
 * "N" indicates the number of bounding boxes, and the value "5" refers to
 * "x0", "x1", "y0", "y1" and "angle". \n

 *@par Attributes:
 *@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
 * defaults to [1.0, 1.0, 1.0, 1.0, 1.0].

 *@par Outputs:
 *@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
 * specifying the variations between all anchor boxes and ground truth boxes.
 */
 REG_OP(RotatedBoxEncode)
    .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(gt_box, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
    .OP_END_FACTORY_REG(RotatedBoxEncode)

 /**
 *@brief RotatedBoxDecode. \n

 *@par Inputs:
 * Two inputs, including:
 *@li anchor_box: A 3D Tensor of float32 (float16) with shape (B, 5, N).
 * "B" indicates the number of batch size
 * "N" indicates the number of bounding boxes, and the value "5" refers to
 * "x0", "x1", "y0", "y1" and "angle".
 *@li deltas: A 3D Tensor of float32 (float16) with shape (B, 5, N). 
 * "B" indicates the number of batch size 
 * "N" indicates the number of bounding boxes, and the value "5" refers to
 * "x0", "x1", "y0", "y1" and "angle". \n

 *@par Attributes:
 *@li weight: A float list for "x0", "x1", "y0", "y1" and "angle",
 * defaults to [1.0, 1.0, 1.0, 1.0, 1.0].

 *@par Outputs:
 *@li y: A 3D Tensor of type float32 (float16) with shape (B, 5, N),
 * specifying the variations between all anchor boxes and ground truth boxes.
 */
 REG_OP(RotatedBoxDecode)
    .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(weight, ListFloat, {1.0, 1.0, 1.0, 1.0, 1.0})
    .OP_END_FACTORY_REG(RotatedBoxDecode)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -1487,25 +1487,51 @@ REG_OP(Roll)
    .OP_END_FACTORY_REG(Roll)

 /**
 *@brief Calculate the loss. Creates a criterion that optimizes a two-class classification
 logistic loss between input_x and input_y (containing 1 or -1). \n
 * @brief Roll the tensor along the given dimension(s).

 *@par Inputs:
 *Tow inputs, including:
 * @par Inputs:
 * One inputs, including:
 * x: A tensor

 * @par Attributes:
 * @li shift: The number of places by which the elements of the tensor are shifted. \n
 * @li axes: Axis along which to roll. \n

 * @par Outputs:
 * y: A Tensor with the same type and shape of x's. \n

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Roll. \n
 */
 REG_OP(RollV2)
    .INPUT(input, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
                            DT_FLOAT,DT_DOUBLE}))
    .INPUT(shift, TensorType({DT_INT32,DT_INT64}))
    .INPUT(axes, TensorType({DT_INT32,DT_INT64}))
    .OUTPUT(output, TensorType({DT_INT8,DT_UINT8,DT_INT16,DT_UINT16,DT_INT32,DT_INT64,DT_FLOAT16, \
                            DT_FLOAT,DT_DOUBLE}))
    .OP_END_FACTORY_REG(RollV2)

 /**
 * @brief Calculate the loss. Creates a criterion that optimizes a two-class classification
 * logistic loss between input_x and input_y (containing 1 or -1). \n

 * @par Inputs:
 * Tow inputs, including:
 * @li input_x: A tensor. Must be one of the following types:
 *     float16, float32. \n
 * @li input_y: A tensor. Must be one of the following types:
 *     float16, float32. \n

 *@par Attributes:
 *reduction: An optional string.Defaults to "mean". \n
 * @par Attributes:
 * reduction: An optional string.Defaults to "mean". \n

 *@par Outputs:
 *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
 * @par Outputs:
 * output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n
 *          while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,)

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator SoftMarginLoss. \n
 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator SoftMarginLoss. \n
 */
 REG_OP(SoftMarginLoss)
    .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
@@ -1624,18 +1650,18 @@ REG_OP(MultilabelMarginLoss)
    .OP_END_FACTORY_REG(MultilabelMarginLoss)

 /**
 *@brief Performs batch normalization . \n
 *@par Inputs:
 * @brief Performs batch normalization . \n
 * @par Inputs:
 * Two inputs
 *@li input_x: A Tensor. Support float32. shape (n, c, d).
 *@li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n
 *@par Attributes:
 *@li normalize_type: Str. Support "per_feature" or "all_features".
 *@li epsilon: An optional float32, specifying the small value added to
 variance to avoid dividing by zero. Defaults to "0.00001" . \n
 *@par Outputs:
 * @li input_x: A Tensor. Support float32. shape (n, c, d).
 * @li seq_len: A Tensor. Each batch normalize data num. Support Int32. Shape (n, ). \n
 * @par Attributes:
 * @li normalize_type: Str. Support "per_feature" or "all_features".
 * @li epsilon: An optional float32, specifying the small value added to
 * variance to avoid dividing by zero. Defaults to "0.00001" . \n
 * @par Outputs:
 * One outputs
 *@li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n
 * @li output_y: A Tensor for the normalized "x".Support float32. shape (n, c, d).\n
 */
 REG_OP(NormalizeBatch)
    .INPUT(input_x, TensorType({ DT_FLOAT }))
@@ -1644,6 +1670,36 @@ REG_OP(NormalizeBatch)
    .REQUIRED_ATTR(normalize_type, String)
    .ATTR(epsilon, Float, 0.00001)
    .OP_END_FACTORY_REG(NormalizeBatch)

 /**
 *@brief GroupNorm and Reul operator
 *  calculating: x, gamma, beta
 *  y = relu(gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta)

 * @par Inputs:
 * Three inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32.
 * @li gamma: A Tensor. Must be one of the following types: float16, float32.
 * @li beta: A Tensor. Must be one of the following types: float16, float32 . \n

 * @par Attributes:
 * @li num_groups: A require attribute, the type is int32.
 * @li eps: A optional attribute, the type is float32. Defaults to 0.00001. \n

 * @par Outputs:
 * One outputs, including:
 * @li y: A Tensor. Must be one of the following types: float16, float32.
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use/
 */
 REG_OP(GroupNormRelu)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(num_groups, Int)
    .ATTR(eps, Float, 0.00001)
    .OP_END_FACTORY_REG(GroupNormRelu)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NN_NORM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -1747,7 +1747,8 @@ included in the sample.\n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator SubSample.
 *@par Restrictions:

 *@attention Constraints:
 *Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
 */
 REG_OP(SubSample)
@@ -1776,7 +1777,8 @@ included in the sample.\n

 *@par Third-party framework compatibility
 *Compatible with the Pytorch operator SubSampleLabels.
 *@par Restrictions:

 *@attention Constraints:
 *Warning: This operator can be integrated only by MaskRcnn. Please do not use it directly.
 */
 REG_OP(SubSampleLabels)
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -25,7 +25,8 @@

 namespace ge {
 /**
 *@brief Computes the for the gelu of "x" . \n
 *@brief The GELU activation function is x*Φ(x),
 *       where Φ(x) the standard Gaussian cumulative distribution function. \n

 *@par Inputs:
 *One input, including:
@@ -144,7 +145,7 @@ REG_OP(GeluGrad)
    .OP_END_FACTORY_REG(GeluGrad)

 /**
 *@brief Computes the for the fast_gelu of "x" . \n
 *@brief The FastGelu activation function is x*e^(0.851*x)*(x-|x|)/(1+e^(-1.702|x|)). \n

 *@par Inputs:
 *One input, including:
@@ -159,7 +160,23 @@ REG_OP(FastGelu)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(FastGelu)
 /**
 *@brief The FastGeluV2 activation function is x*(sgn(x)*[(a/2)*(clip(|x|,max=-b)+b)^2+0.5]+0.5),
 *       where sgn(x) function is (x+0.000000000001)/|(x+0.000000000001)|. \n

 *@par Inputs:
 *One input, including:
 *x: A Tensor. Must be one of the following types: float16, float32

 *@par Outputs:
 *y: A Tensor. Has the same type as "x".
 *@par Third-party framework compatibility
 *Compatible with the TensorFlow operator FastGeluV2
 */
 REG_OP(FastGeluV2)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OP_END_FACTORY_REG(FastGeluV2)
 /**
 *@brief Computes the gradient for the fast_gelu of "x" . \n

@@ -623,9 +640,7 @@ REG_OP(Elu)
 *x: A float16, float32, for the input data type . \n

 *@par Attributes:
 *@li alpha1: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
 *@li alpha2: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .
 *@li alpha3: A float32. Defines at which positive value the ELU saturates. Defaults to "1.0" . \n
 *li alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" .

 *@par Outputs:
 *y: A float16, float32, for the normalized result . \n
@@ -641,9 +656,7 @@ REG_OP(Elu)
 REG_OP(Celu)
    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
    .ATTR(alpha1, Float, 1.0)
    .ATTR(alpha2, Float, 1.0)
    .ATTR(alpha3, Float, 1.0)
    .ATTR(alpha, Float, 1.0)
    .OP_END_FACTORY_REG(Celu)

 /**
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -117,6 +117,33 @@ REG_OP(NPUGetFloatStatus)
    .INPUT(addr, TensorType{DT_FLOAT})
    .OUTPUT(data, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(NPUGetFloatStatus)


 /**
 *@brief Set the value of global workspace to 0. \n

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUClearFloatStatusV2)
    .OP_END_FACTORY_REG(NPUClearFloatStatusV2)

 /**
 *@brief Set the value of global workspace to 0. \n

 *@par Inputs:
 *addr: A nested structure of Tensors of type float32 . \n

 *@par Outputs:
 *data: A Tensor of type float32.

 *@par Restrictions:
 *Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(NPUGetFloatStatusV2)
    .DYNAMIC_INPUT(addr, TensorType{DT_FLOAT})
    .OUTPUT(data, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(NPUGetFloatStatusV2)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_NPU_LOSS_SCALE_OPS_H_
--- a/third_party/fwkacllib/inc/ops/ocr_ops.h
+++ b/third_party/fwkacllib/inc/ops/ocr_ops.h
@@ -81,6 +81,7 @@ REG_OP(OCRRecognitionPreHandle)
    .OUTPUT(imgs, TensorType({DT_UINT8}))
    .OUTPUT(imgs_relation, TensorType({DT_INT32}))
    .OUTPUT(imgs_lang, TensorType({DT_INT32}))
    .OUTPUT(imgs_piece_fillers, TensorType({DT_INT32}))
    .ATTR(batch_size, Int, 8)
    .ATTR(data_format, String, "NHWC")
    .ATTR(pad_mode, String, "REPLICATE")
--- a/third_party/fwkacllib/inc/ops/random_ops.h
+++ b/third_party/fwkacllib/inc/ops/random_ops.h
@@ -59,6 +59,65 @@ REG_OP(Multinomial)
    .ATTR(seed2, Int, 0)
    .OP_END_FACTORY_REG(Multinomial)

 /**
 *@brief Creates a multinomial distribution. \n

 *@par Inputs:
 *Inputs include:
 * @li q: A Tensor. Must be one of the following types: float, double.
 1-D Tensor with shape [num_classes].
 * @li j: A Tensor. Must be one of the following types: int64.
 1-D Tensor with shape [num_classes].
 * @li num_samples: A Tensor of type int32. 0-D. Number of independent samples to draw for each row slice . \n

 *@par Attributes:
 *@li output_dtype: An optional type from: int32, int64. Defaults to int64.
 *@li seed: An optional int. Defaults to 0.
 *@li seed2: An optional int. Defaults to 0. \n

 *@par Outputs:
 *y: A Tensor of type int32 or int64. \n

 *@attention Constraints:
 *The implementation for MultinomialAliasDraw on Ascend uses AICPU, with bad performance.

 *@par Third-party framework compatibility
 *@li compatible with torch _multinomial_alias_draw operator.
 */
 REG_OP(MultinomialAliasDraw)
    .INPUT(q, TensorType({DT_FLOAT, DT_DOUBLE}))
    .INPUT(j, TensorType({DT_INT64}))
    .OUTPUT(y, TensorType({DT_INT64}))
    .REQUIRED_ATTR(num_samples, Int)
    .ATTR(seed, Int, 0)
    .OP_END_FACTORY_REG(MultinomialAliasDraw)

 /**
 *@brief Prepares for MultinomialAliasDraw to create a multinomial distribution. \n

 *@par Inputs:
 *Inputs include:
 * @li probs: A Tensor. Must be one of the following types: float, double.
 1-D Tensor with shape [num_classes]. \n

 *@par Outputs:
 *j: A Tensor. Must be one of the following types: int64.
 1-D Tensor with shape [num_classes].
 *q: A Tensor. Must be one of the following types: float, double.
 1-D Tensor with shape [num_classes]. \n

 *@attention Constraints:
 *The implementation for MultinomialAliasSetup on Ascend uses AICPU, with bad performance.

 *@par Third-party framework compatibility
 *@li compatible with torch _multinomial_alias_setup operator.
 */
 REG_OP(MultinomialAliasSetup)
    .INPUT(probs, TensorType({DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(j, TensorType({DT_INT64}))
    .OUTPUT(q, TensorType({DT_FLOAT, DT_DOUBLE})) 
    .OP_END_FACTORY_REG(MultinomialAliasSetup)

 /**
 *@brief Outputs random values from a normal distribution . \n

@@ -173,6 +232,27 @@ REG_OP(Randperm)
    .ATTR(dtype, Type, DT_INT64)
    .OP_END_FACTORY_REG(Randperm)

 /**
 *@brief Fills a tensor with elements drawn from the poisson distribution. \n

 *@par Inputs:
 *x:  A Tensor. Must be one of the following types: float16, float. \n

 *@par Attributes:
 *@li seed: An optional int. Defaults to 0. \n

 *@par Outputs:
 *y: A Tensor list with same type as "x" . \n

 *@par Third-party framework compatibility
 *@ Compatible with the Pytorch operator Poisson.
 */
 REG_OP(Poisson)
    .INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT }))
    .OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT }))
    .ATTR(seed, Int, 0)
    .OP_END_FACTORY_REG(Poisson)   
 
 /**
 *@brief Outputs random values from the Poisson distribution(s) described by rate . \n

@@ -446,6 +526,34 @@ REG_OP(DropOutGenMaskV3)
    .ATTR(seed2, Int, 0)
    .OP_END_FACTORY_REG(DropOutGenMaskV3)

    
 /**
 *@brief Generate stateless random bit mask for dropout . \n

 *@par Inputs:
 include:
 *@li shape:The shape of the output tensor.
 *@li prob:0-D. Number of bit 1 . \n
 *@li seed:If either seed or seed2 are set to be non-zero, the random number
 *generator is seeded by the given seed. Otherwise, it is seeded by a random seed.
 *@li seed2:A second seed to avoid seed collision . \n

 *@par Outputs:
 *y:Output (1-D) random number using uint data format . \n

 *@attention Constraints:
 *The output is aligned with 128 bits

 *@see StatelessDropOutGenMask()
 */
 REG_OP(StatelessDropOutGenMask)
    .INPUT(shape, TensorType({ DT_INT32, DT_INT64 }))
    .INPUT(prob, TensorType({ DT_FLOAT16, DT_FLOAT }))
    .INPUT(seed, TensorType({ DT_INT32, DT_INT64 }))
    .INPUT(seed1, TensorType({ DT_INT32, DT_INT64 }))
    .OUTPUT(y, TensorType({ DT_UINT8 }))
    .OP_END_FACTORY_REG(StatelessDropOutGenMask)

 /**
 *@brief Generates values in an interval . \n

@@ -698,11 +806,62 @@ REG_OP(Uniform)
 *@attention Constraints:
 * Compatible with the Caffe operator ContinuationIndicator.
 */

 REG_OP(ContinuationIndicator)
    .REQUIRED_ATTR(time_step, Int)
    .REQUIRED_ATTR(batch_size, Int)
    .OUTPUT(y, TensorType({DT_FLOAT}))
    .OP_END_FACTORY_REG(ContinuationIndicator)

 /**
 *@brief Outputs random values from the Exponential distribution(s) described by rate . \n

 *@par Inputs:
 *Inputs include:
 * @li x: A Tensor. Must be one of the following types: half, float32, float64. \n

 *@par Attributes:
 *@li lambda: An optional float. Defaults to 1.
 *@li seed: An optional int. Defaults to 0.The random number generator is seeded by the given seed.
 Otherwise, it is seeded by a random seed. \n

 *@par Outputs:
 *y: A Tensor of type dtype float16, float, double. \n

 *@attention Constraints:
 *The implementation for Exponential on Ascend uses AICPU, with bad performance.

 *@par Third-party framework compatibility
 *@li compatible with tensorflow Exponential operator.
 */
 REG_OP(Exponential)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .ATTR(lambda, Float, 1)
    .ATTR(seed, Int, 0)
    .OP_END_FACTORY_REG(Exponential)

 /**
 *@brief Fills a tensor with elements drawn from the geometric distribution. \n

 *@par Inputs:
 *x:  A Tensor. Must be one of the following types: float16, float. \n

 *@par Attributes:
 *@li p: The probability of experimental success in Bernoulli's experiment.
 *@li seed: An optional int. Defaults to 0. \n

 *@par Outputs:
 *y: A Tensor list with same type as "x" . \n

 *@par Third-party framework compatibility
 *@ Compatible with the Pytorch operator Geometric.
 */
 REG_OP(Geometric)
    .INPUT(x, TensorType({ DT_FLOAT16,DT_FLOAT }))
    .OUTPUT(y, TensorType({ DT_FLOAT16,DT_FLOAT }))
    .REQUIRED_ATTR(p, Float)
    .ATTR(seed, Int, 0)
    .OP_END_FACTORY_REG(Geometric)

 }   // namespace ge
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOM_OPS_H_
--- a/third_party/fwkacllib/inc/ops/randomdsa_ops.h
+++ b/third_party/fwkacllib/inc/ops/randomdsa_ops.h
@@ -0,0 +1,139 @@
 /**
 * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 /*!
 * \file randomdsa_ops.h
 * \brief
 */
 #ifndef OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_
 #define OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H_

 #include <vector>
 #include "graph/operator_reg.h"
 #include "graph/operator.h"

 namespace ge {
 /**
 * @brief Generate DSA random bit mask for dropout. \n

 * @par Inputs:
 include:
 * @li count:The shape of the input tensor.
 * @li seed:If seed is set to be non-zero, the random number
 * generator is seeded by the given seed. Otherwise, it is seeded by a random seed
 * @li dropout:0-D. Number of bit 1 . \n

 * @par Attributes:
 * @li random_algorithm:The default value is "Philox". \n

 * @par Outputs:
 * y:Output (1-D) random number using uint data format . \n

 * @see DSAGenBitMask()
 */
 REG_OP(DSAGenBitMask)
    .INPUT(count, TensorType({DT_INT64}))
    .INPUT(seed, TensorType({DT_UINT64}))
    .INPUT(dropout, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
    .OUTPUT(out, TensorType({DT_UINT8}))
    .ATTR(random_algorithm, String, "Philox")
    .OP_END_FACTORY_REG(DSAGenBitMask)

 /**
 * @brief Generate DSA truncatenormal data in random. \n

 * @par Inputs:
 include:
 * @li count: The shape of the input tensor.
 * @li seed: If seed is set to be non-zero, the random number
 * generator is seeded by the given seed. Otherwise, it is seeded by a random seed
 * @li mean: A Tensor. Must be one of the following types: float16, float32, double
 * @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n

 * @par Attributes:
 * @li random_algorithm:The default value is "Philox". \n

 * @par Outputs:
 * y:Output (1-D) random number using float and bf data format . \n

 * @see DSARandomTruncatedNormal()
 */
 REG_OP(DSARandomTruncatedNormal)
    .INPUT(count, TensorType({DT_INT64}))
    .INPUT(seed, TensorType({DT_UINT64}))
    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
    .INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
    .OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
    .ATTR(random_algorithm, String, "Philox")
    .OP_END_FACTORY_REG(DSARandomTruncatedNormal)

 /**
 * @brief Generate DSA normal data in random. \n

 * @par Inputs:
 include:
 * @li count: The shape of the input tensor.
 * @li seed: If seed is set to be non-zero, the random number
 * generator is seeded by the given seed. Otherwise, it is seeded by a random seed
 * @li mean: A Tensor. Must be one of the following types: float16, float32, double
 * @li stdev: A Tensor. Must be one of the following types: float16, float32, double. \n

 * @par Attributes:
 * @li random_algorithm:The default value is "Philox". \n

 * @par Outputs:
 * y:Output (1-D) random number using float and bf data format . \n

 * @see DSARandomNormal()
 */
 REG_OP(DSARandomNormal)
    .INPUT(count, TensorType({DT_INT64}))
    .INPUT(seed, TensorType({DT_UINT64}))
    .INPUT(mean, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
    .INPUT(stdev, TensorType({DT_FLOAT16, DT_FLOAT, DT_BF16}))
    .OUTPUT(out, TensorType({DT_FLOAT16, DT_FLOAT32, DT_BF16}))
    .ATTR(random_algorithm, String, "Philox")
    .OP_END_FACTORY_REG(DSARandomNormal)

 /**
 * @brief Generate DSA uniform data in random. \n

 * @par Inputs:
 include:
 * @li count: The shape of the input tensor.
 * @li seed: If seed is set to be non-zero, the random number
 * generator is seeded by the given seed. Otherwise, it is seeded by a random seed
 * @li low: A Tensor. Must be one of the following types: int, float, bf
 * @li high: A Tensor. Must be one of the following types: int, float, bf. \n

 * @par Attributes:
 * @li random_algorithm:The default value is "Philox". \n

 * @par Outputs:
 * y:Output (1-D) random number using float int and bf data format . \n

 * @see DSARandomUniform()
 */
 REG_OP(DSARandomUniform)
    .INPUT(count, TensorType({DT_INT64}))
    .INPUT(seed, TensorType({DT_UINT64}))
    .INPUT(low, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
    .INPUT(high, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
    .OUTPUT(out, TensorType({DT_BF16, DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64, DT_UINT32, DT_UINT64}))
    .ATTR(random_algorithm, String, "Philox")
    .OP_END_FACTORY_REG(DSARandomUniform)
 }
 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RANDOMDSA_OPS_H
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -515,6 +515,34 @@ REG_OP(ReduceSumD)
    .ATTR(keep_dims, Bool, false)
    .OP_END_FACTORY_REG(ReduceSumD)

 /**
 *@brief Calculate the total mean based on the mean of each device . \n

 *@par Inputs:
 * Three inputs, including:
 *@li x: A Tensor. Must be one of the following types: float16, float32 .
 *@li count: A Tensor. Must be one of the following types: float16, float32 .
 *@li count_sum: A Tensor. Must be one of the following types: float16, float32 . \n

 *@par Attributes:
 *@li axes: A required 1D list or tuple of int32 or int64. Specifies the dimensions to reduce.
 *@li keepdims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n

 *@par Outputs:
 *y: The reduced tensor. Has the same type and format as input "x" . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator Sum.
 */
 REG_OP(ReduceMeanWithCount)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(count, TensorType({DT_FLOAT, DT_FLOAT16}))
    .INPUT(count_sum, TensorType({DT_FLOAT, DT_FLOAT16}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
    .REQUIRED_ATTR(axes, ListInt)
    .ATTR(keep_dims, Bool, false)
    .OP_END_FACTORY_REG(ReduceMeanWithCount)

 /**
 *@brief Calculates the "logical sum" of elements of a tensor in a dimension . \n

@@ -1326,6 +1354,101 @@ REG_OP(ReduceMeanVariance)
    .ATTR(axes, ListInt, {})
    .ATTR(keep_dims, Bool, true)
    .OP_END_FACTORY_REG(ReduceMeanVariance)

 /**
 * @brief Calculates the standard deviation or the variance of Tensors with the average value.

 * @par Inputs:
 * Two inputs, including:
 * @li x: A Tensor. Must be one of the following types: float16, float32. \n
 * @li mean: A Tensor. It's the mean of X. Has the same shape and type as "x" \n

 * @par Attributes:
 * Four Attributes, including:
 * @li dim: An listint. \n
 * @li if_std: An optional bool. Defaults to "False"
 *     If "True", Calculate the standard deviation
 *     If "False", Calculate the variance
 * @li unbiased: An optional bool. Defaults to "True".
 *     If "True", Use Bessel Correction.
 *     If "False", Do not use Bessel Correction. \n
 * @li keepdim: An optional bool. Defaults to "False".
 *     If "True", Keep the original tensor dimension.
 *     If "False", Do not keep the original tensor dimension. \n

 * @par Outputs:
 * @li output_var: A Tensor. It's the standard deviation or the variance of X. Has the same type as "x".

 * @par Third-party framework compatibility
 * Compatible with the Pytorch operator Var_mean.
 */
 REG_OP(ReduceStdV2Update)
    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
    .INPUT(mean, TensorType({DT_FLOAT,DT_FLOAT16}))
    .OUTPUT(output_var, TensorType({DT_FLOAT,DT_FLOAT16}))
    .REQUIRED_ATTR(dim, ListInt)
    .ATTR(if_std, Bool, false)
    .ATTR(unbiased, Bool, true)
    .ATTR(keepdim, Bool, false)
    .OP_END_FACTORY_REG(ReduceStdV2Update)
    
 /**
 *@brief Computes the log and sum and exp of elements across dimensions of a tensor.
 * Reduces "x" along the dimensions given in "axes".
 * Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
 * entry in "axes". If "keep_dims" is true, the reduced dimensions
 * are retained with length 1.
 *
 *@par Inputs:
 * Two inputs, including:
 *@li x: A Tensor. Must be one of the following types:
 *     float32, float16, int32, int64, uint32, uint64, double
 *@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
 *
 *@par Attributes:
 *keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
 *
 *@par Outputs:
 *y: The reduced tensor. Has the same type and format as input "x" . \n
 *
 *@par Third-party framework compatibility
 * Compatible with the Onnx operator ReduceLogSumExp.
 */
 REG_OP(ReduceLogSumExp)
    .INPUT(x, TensorType::NumberType())
    .INPUT(axes, TensorType::IndexNumberType())
    .OUTPUT(y, TensorType::NumberType())
    .ATTR(keep_dims, Bool, false)
    .OP_END_FACTORY_REG(ReduceLogSumExp)

 /**
 *@brief Computes the log and sum of elements across dimensions of a tensor.
 * Reduces "x" along the dimensions given in "axes".
 * Unless "keep_dims" is true, the rank of the tensor is reduced by 1 for each
 * entry in "axes". If "keep_dims" is true, the reduced dimensions
 * are retained with length 1.
 *
 *@par Inputs:
 * Two inputs, including:
 *@li x: A Tensor. Must be one of the following types:
 *     float32, float16, int32, int64, uint32, uint64, double
 *@li axes: A 1D list or tuple of int32 or int64. Specifies the dimensions to reduce . \n
 *
 *@par Attributes:
 *keep_dims: An optional bool. If "true", retains reduced dimensions with length 1. Defaults to "false" . \n
 *
 *@par Outputs:
 *y: The reduced tensor. Has the same type and format as input "x" . \n
 *
 *@par Third-party framework compatibility
 * Compatible with the Onnx operator ReduceLogSum.
 */
 REG_OP(ReduceLogSum)
    .INPUT(x, TensorType::NumberType())
    .INPUT(axes, TensorType::IndexNumberType())
    .OUTPUT(y, TensorType::NumberType())
    .ATTR(keep_dims, Bool, false)
    .OP_END_FACTORY_REG(ReduceLogSum)
 } //namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_REDUCE_OPS_H_
--- a/third_party/fwkacllib/inc/ops/rnn.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -1280,6 +1280,44 @@ REG_OP(EmbeddingBag)
    .ATTR(sparse, Bool, false)
    .ATTR(include_last_offset, Bool, false)
    .OP_END_FACTORY_REG(EmbeddingBag)
 /**
 * @brief:LSTMP calculation
 * @par Inputs:
 * eight inputs:
 * @li x:A required Tensor(seq, batch, dim). Must be one of the following types: float16, float32.
 * @li real_mask:A optional Tensor(seq, batch). Must be one of the following types: float16, float32.
 * @li init_h:A optional Tensor(batch, state). Must be one of the following types: float16, float32.
 * @li init_c:A optional Tensor(batch, hidden). Must be one of the following types: float16, float32.
 * @li wx:A required Tensor(4*hidden, dim). Must be one of the following types: float16, float32.
 * @li wr:A required Tensor(4*hidden, state). Must be one of the following types: float16, float32.
 * @li bias:A optional Tensor(hidden). Must be one of the following types: float16, float32. The format must be ND.
 * @li project: A optional Tensor. Must be one of the following types: float16, float32.
 *
 * @par Outputs:
 *three outputs:
 *@li y:A Tensor. Must be one of the following types: float16, float32.
 *@li output_h:A Tensor. Must be one of the following types: float16, float32.
 *@li output_c:A Tensor. Must be one of the following types: float16, float32.
 *
 *@par Attributes:
 *time_major:An bool identifying the time major in the op. Default to false.
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(LSTMP)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(wx, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(wr, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(project, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(real_mask, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(init_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OPTIONAL_INPUT(init_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_h, TensorType({DT_FLOAT16, DT_FLOAT}))
    .OUTPUT(output_c, TensorType({DT_FLOAT16, DT_FLOAT}))
    .ATTR(time_major, Bool, false)
    .OP_END_FACTORY_REG(LSTMP)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_RNN_H_
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -259,13 +259,39 @@ REG_OP(GatherV2D)
 *@par Third-party framework compatibility
 *Compatible with the PyTorch operator Gather.
 */

 REG_OP(GatherElements)
    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32,
    DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
    .INPUT(index, TensorType({DT_INT32,DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_INT16,DT_INT32,
    DT_INT64,DT_UINT8,DT_UINT16,DT_UINT32,DT_UINT64}))
    .ATTR(dim, Int, 0)
    .OP_END_FACTORY_REG(GatherElements)

 /**
 *@Gathers values along an axis specified by dim . \n

 *@par Inputs:
 *@li x: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8,
 *     int64, uint16, float16, uint32, uint64, bool.
 *@li dim: A Tensor. Must be one of the following types: int32, int64.
 *@li index: A Tensor. Must be one of the following types: int32, int64 . \n


 *@par Outputs:
 * y: A Tensor. Has the same type as "x" . \n

 *@par Third-party framework compatibility
 *Compatible with the PyTorch operator Gather.
 */
 REG_OP(GatherD)
    .INPUT(x, TensorType({DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32, DT_UINT32
                          DT_INT64, DT_UINT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(dim, TensorType({DT_INT32, DT_INT64}))
    .INPUT(index, TensorType({DT_INT32, DT_INT64}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
    .ATTR(dim, Int, 0)
    .OP_END_FACTORY_REG(GatherElements)
    .OP_END_FACTORY_REG(GatherD)

 /**
 *@brief Extracts a strided slice of a tensor. Roughly speaking, this op
@@ -360,9 +386,9 @@ REG_OP(StridedSlice)
 * Warning: THIS FUNCTION IS DEPRECATED. Please use StridedSlice instead.
 */
 REG_OP(StridedSliceD)
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8,
                          DT_BOOL}))
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_UINT8, DT_INT8,
    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT64, DT_UINT8, DT_INT8,
                          DT_BOOL}))
    .REQUIRED_ATTR(begin, ListInt)
    .REQUIRED_ATTR(end, ListInt)
@@ -700,6 +726,27 @@ REG_OP(SegmentMax)
    .OUTPUT(y, TensorType::RealNumberType())
    .OP_END_FACTORY_REG(SegmentMax)

 /**
 *@brief Computes the sum along segments of a tensor . \n

 *@par Inputs:
 *Two inputs, including:
 * @li x: A Tensor of type NumberType.
 * @li segment_ids: A Tensor of type IndexNumberType, whose shape is a prefix
 * of "x.shape".

 *@par Outputs:
 *y: A Tensor of type NumberType . \n

 *@par Third-party framework compatibility
 * Compatible with the TensorFlow operator SegmentSum.
 */
 REG_OP(SegmentSum)
    .INPUT(x, TensorType::NumberType())
    .INPUT(segment_ids, TensorType::IndexNumberType())
    .OUTPUT(y, TensorType::NumberType())
    .OP_END_FACTORY_REG(SegmentSum)

 /**
 *@brief: Computes the maximum along segments of a tensor.
 *Computes a tensor such that output[i]=(data[i]) where max is over j
@@ -929,6 +976,49 @@ REG_OP(TopKD)
 * @brief Finds values and indices of the "k" largest elements for the last
 * dimension . \n

 * @par Inputs:
 * Two inputs, including:
 * @li x: A 1D or higher tensor of type BasicType, with the last dimension
 * at least "k".
 * @li k: A 0D Tensor of type int32.
 * Number of top elements to look for along the last dimension (along each row
 * for matrices) .
 * @li assist_seq: A 1D tensor of type float16.
 * with size of 2N, which "N" is the last dimension.
 * The first N numbers is indices, and the next N numbers is deviation of casting
 * int32 to float16. \n

 * @par Attributes:
 * @li sorted: An optional bool. Defaults to true.
 * If true, the resulting "k" elements will be sorted by the values in descending
 * order.
 * @li dim: An optional int. Defaults to -1. For reserved use.
 * @li largest: An optional bool. Defaults to true. For reserved use. \n

 * @par Outputs:
 * @li values: A Tensor, specifying the sorted data. Has the same type as
 * "input".
 * @li indices: A Tensor of type int32, specifying the indices of sorted data . \n

 * @see TopK()
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator TopKV2.
 */
 REG_OP(TopKV2D)
    .INPUT(x, TensorType::RealNumberType())
    .INPUT(k, TensorType({DT_INT32}))
    .INPUT(assist_seq, TensorType({DT_FLOAT16}))
    .OUTPUT(values, TensorType::RealNumberType())
    .OUTPUT(indices, TensorType({DT_INT32}))
    .ATTR(sorted, Bool, true)
    .ATTR(dim, Int, -1)
    .ATTR(largest, Bool, true)
    .OP_END_FACTORY_REG(TopKV2D)

 /**
 * @brief Finds values and indices of the "k" largest elements for the last
 * dimension . \n

 * @par Inputs:
 * Two inputs, including:
 * @li x: A 1D or higher tensor of type BasicType, with the last dimension
@@ -2340,7 +2430,7 @@ REG_OP(AddRowRanges)
 *@par Outputs:
 *y: A ND Tensor of float32/float16/int32/int8 with shapes 1-D (D,), 2-D(N, D), 3-D(N, C, D)

 * @par Restrictions:
 *@attention Constraints:
 * Warning: input shape's length must not be bigger than 1024 * 1024 * 1024.
 */
 REG_OP(MaskedFillRange)
@@ -2442,6 +2532,34 @@ REG_OP(StridedSliceV3)
    .OPTIONAL_INPUT(strides, TensorType::IndexNumberType())
    .OUTPUT(y, TensorType::BasicType())
    .OP_END_FACTORY_REG(StridedSliceV3)

 /**
 *@brief MovingSumWithSigmoid.

 *@par Inputs:
 *Four inputs, including:
 * @li alpha: A Tensor. Must be one of the following types: float32, float16.
 * @li energy: A Tensor. Must be one of the following types: float32, float16.
 * @li beam_size: A Tensor of type int32.
 * @li frame_size: A Tensor of type int32. \n

 *@par Outputs:
 * y: A Tensor. Has the same type as "alpha". \n
 *
 * @par Attributes:
 * window_size: A int.
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL.  Please do not use.
 */
 REG_OP(MovingSumWithSigmoid)
    .INPUT(alpha, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(energy, TensorType({DT_FLOAT16, DT_FLOAT}))
    .INPUT(beam_size, TensorType({DT_INT32}))
    .INPUT(frame_size, TensorType({DT_INT32}))
    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
    .REQUIRED_ATTR(window_size, Int)
    .OP_END_FACTORY_REG(MovingSumWithSigmoid)
 } // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SELECTION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/split_combination_ops.h
+++ b/third_party/fwkacllib/inc/ops/split_combination_ops.h
@@ -381,6 +381,30 @@ REG_OP(ConcatOffsetD)
    .REQUIRED_ATTR(concat_dim, Int)
    .REQUIRED_ATTR(N, Int)
    .OP_END_FACTORY_REG(ConcatOffsetD)

 /**
 *@brief Compute combinations of length of the given tensor. \n

 *@par Inputs:
 *x:  A list of 1D Tensor objects. \n

 *@par Attributes:
 *@li r: An optional int indicates number of elements to combine. Defaults to 2.
 *@li with_replacement: An optional bool indicates whether to allow duplication
 *in combination. Defaults to "False". \n

 *@par Outputs:
 *y: A Tensor list with same type as "x" . \n

 *@par Third-party framework compatibility
 *@ Compatible with the Pytorch operator Combinations.
 */
 REG_OP(Combinations)
    .INPUT(x, TensorType::ALL())
    .OUTPUT(y, TensorType::ALL())
    .ATTR(r, Int, 2)
    .ATTR(with_replacement, Bool, false)
    .OP_END_FACTORY_REG(Combinations)
 }  // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_SPLIT_COMBINATION_OPS_H_
--- a/third_party/fwkacllib/inc/ops/vector_search.h
+++ b/third_party/fwkacllib/inc/ops/vector_search.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2021 Huawei Technologies Co., Ltd
 * Copyright (c) Huawei Technologies Co., Ltd. 2021. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -154,43 +154,6 @@ REG_OP(CalcBucketsLimitAndOffset)
    .OUTPUT(buckets_offset, TensorType({DT_INT32, DT_INT64}))
    .REQUIRED_ATTR(total_limit, Int)
    .OP_END_FACTORY_REG(CalcBucketsLimitAndOffset)

 /**
 * @brief Calculate ProdVirialSeA. \n
 *
 * @par Inputs:
 * Five inputs, including:
 * @li net_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li in_deriv: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li rij: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li nlist: A Tensor. dtype is int32.
 * @li natoms: A Tensor. dtype is int32. \n
 *
 * @par Outputs:
 * Two outputs, including:
 * @li virial: A Tensor. Must be one of the following types: float16, float32, float64.
 * @li atom_virial: A Tensor. Must be one of the following types: float16, float32, float64. \n
 *
 * @par Attributes:
 * Two attributes, including:
 * @li n_a_sel: A Scalar.
 * @li n_r_sel: A Scalar. \n
 *
 * @par Restrictions:
 * Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
 */
 REG_OP(ProdVirialSeA)
    .INPUT(net_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(in_deriv, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(rij, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .INPUT(nlist, TensorType({DT_INT32}))
    .INPUT(natoms, TensorType({DT_INT32}))
    .OUTPUT(virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .OUTPUT(atom_virial, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
    .REQUIRED_ATTR(n_a_sel, Int)
    .REQUIRED_ATTR(n_r_sel, Int)
    .ATTR(nall, Int, 28328)
    .OP_END_FACTORY_REG(ProdVirialSeA)
 } // namespace ge

 #endif  // OPS_BUILT_IN_OP_PROTO_INC_VECTOR_SEARCH_H_
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -98,11 +98,11 @@ typedef struct rtExceptionInfo {
    uint32_t tid;
    uint32_t deviceid;
    uint32_t retcode;
 } rtExceptionInfo;
 } rtExceptionInfo_t;

 typedef void (*rtErrorCallback)(rtExceptionType);

 typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
 typedef void (*rtTaskFailCallback)(rtExceptionInfo_t *exceptionInfo);

 typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);

@@ -429,6 +429,15 @@ RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *lbl, rtModel_t mdl, rtStream_t st
 * @return RT_ERROR_INVALID_VALUE for input null ptr
 */
 RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskId, uint32_t *streamId);

 /**
 * @ingroup dvrt_base
 * @brief get max model num
 * @param [out] max model num
 * @param [in] null
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtGetMaxModelNum(uint32_t *maxModelCount);
 #if defined(__cplusplus)
 }
 #endif
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -22,7 +22,8 @@ typedef enum tagRtArchType {
    ARCH_BEGIN = 0,
    ARCH_V100 = ARCH_BEGIN,
    ARCH_V200 = 1,
    ARCH_END = 2,
    ARCH_V300 = 2,
    ARCH_END = 3,
 } rtArchType_t;

 typedef enum tagRtChipType {
@@ -34,7 +35,8 @@ typedef enum tagRtChipType {
    CHIP_DC = 4,
    CHIP_CLOUD_V2 = 5,
    CHIP_NO_DEVICE = 6,
    CHIP_END = 7,
    CHIP_MINI_V3 = 7,
    CHIP_END = 8,
 } rtChipType_t;

 typedef enum tagRtAicpuScheType {
@@ -74,7 +76,8 @@ typedef enum tagRtPlatformType {
    PLATFORM_DC = 5,
    PLATFORM_CLOUD_V2 = 6,
    PLATFORM_LHISI_SD3403 = 7,
    PLATFORM_END = 8,
    PLATFORM_MINI_V3 = 8,
    PLATFORM_END = 9,
 } rtPlatformType_t;

 typedef enum tagRtCubeFracMKNFp16 {
@@ -140,6 +143,12 @@ typedef enum tagRTTaskTimeoutType {
    RT_TIMEOUT_TYPE_OP_EXECUTE,
 } rtTaskTimeoutType_t;

 typedef enum tagRtFloatOverflowMode {
    RT_OVERFLOW_MODE_SATURATION = 0,
    RT_OVERFLOW_MODE_INFNAN,
    RT_OVERFLOW_MODE_UNDEF,
 } rtFloatOverflowMode_t;

 /**
 * @ingroup
 * @brief get AI core count
@@ -180,6 +189,15 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate
 */
 RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);

 /**
 * @ingroup
 * @brief get float overflow mode
 * @param [out] floatOverflowMode
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtGetFloatOverflowMode(rtFloatOverflowMode_t * const floatOverflowMode);

 /**
 * @ingroup
 * @brief get l2 buffer Info,virtual baseaddr,Size
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -140,7 +140,7 @@ RTS_API rtError_t rtSetGroup(int32_t groupId);
 * @param [in] groupid count
 * @return RT_ERROR_NONE for ok, errno for failed
 */
 RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count);
 RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t cnt);

 /**
 * @ingroup
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -94,11 +94,11 @@ typedef enum tagGetDevMsgType {
 /**
 * @ingroup dvrt_dev
 * @brief get total device number.
 * @param [in|out] count the device number
 * @param [in|out] cnt the device number
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtGetDeviceCount(int32_t *count);
 RTS_API rtError_t rtGetDeviceCount(int32_t *cnt);
 /**
 * @ingroup dvrt_dev
 * @brief get device ids
@@ -338,7 +338,7 @@ RTS_API rtError_t rtSetTSDevice(uint32_t tsId);
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_DRV_ERR for can not get run mode
 */
 RTS_API rtError_t rtGetRunMode(rtRunMode *mode);
 RTS_API rtError_t rtGetRunMode(rtRunMode *runMode);

 /**
 * @ingroup dvrt_dev
--- a/third_party/fwkacllib/inc/runtime/dvfsprofile.h
+++ b/third_party/fwkacllib/inc/runtime/dvfsprofile.h
@@ -23,11 +23,11 @@ typedef enum dvfsProfileMode {
 /**
 * @ingroup dvrt_dvfsprofile
 * @brief Set the performance mode of the device
 * @param [in] mode   dvfsProfileMode
 * @param [in] profMode   dvfsProfileMode
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode mode);
 RTS_API rtError_t rtSetDvfsProfile(DvfsProfileMode profMode);

 /**
 * @ingroup dvrt_dvfsprofile
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -19,6 +19,11 @@ typedef enum rtEventWaitStatus {
    EVENT_STATUS_MAX = 2,
 } rtEventWaitStatus_t;

 typedef enum rtEventStatus {
    RT_EVENT_INIT = 0,
    RT_EVENT_RECORDED = 1,
 } rtEventStatus_t;

 /**
 * @ingroup event_flags
 * @brief event op bit flags
@@ -115,6 +120,16 @@ RTS_API rtError_t rtEventQuery(rtEvent_t evt);
 */
 RTS_API rtError_t rtEventQueryWaitStatus(rtEvent_t evt, rtEventWaitStatus_t *status);

 /**
 * @ingroup dvrt_event
 * @brief Queries an event's status
 * @param [in] evt   event to query
 * @param [in out] rtEventStatus_t status
 * @return RT_EVENT_RECORDED  for recorded
 * @return RT_EVENT_INIT for not recorded
 */
 RTS_API rtError_t rtEventQueryStatus(rtEvent_t evt, rtEventStatus_t *status);

 /**
 * @ingroup dvrt_event
 * @brief computes the elapsed time between events.
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -287,13 +287,13 @@ RTS_API rtError_t rtDependencyRegister(void *mHandle, void *sHandle);
 * @param [in] binHandle   device binary handle
 * @param [in] stubFunc   stub function
 * @param [in] stubName   stub function name
 * @param [in] devFunc   device function description. symbol name or address
 *                       offset, depending binary type.
 * @param [in] kernelInfoExt   kernel Info extension. device function description or tiling key,
 *                             depending static shape or dynmaic shape.
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtFunctionRegister(void *binHandle, const void *stubFunc, const char_t *stubName,
                                     const void *devFunc, uint32_t funcMode);
                                     const void *kernelInfoExt, uint32_t funcMode);

 /**
 * @ingroup rt_kernel
@@ -354,7 +354,8 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
 * @ingroup rt_kernel
 * @brief launch kernel with handle to device
 * @param [in] hdl   program
 * @param [in] devFunc   device function description.
 * @param [in] kernelInfoExt   kernel Info extension. device function description or tiling key,
 *                             depending static shape or dynmaic shape.
 * @param [in] blockDim   block dimentions
 * @param [in] args   argments address for kernel function
 * @param [in] argsSize   argements size
@@ -364,7 +365,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *devFunc, uint32_t blockDim,
 RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
                                           void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream_,
                                           const void *kernelInfo);

@@ -497,6 +498,28 @@ RTS_API rtError_t rtDumpAddrSet(rtModel_t mdl, void *addr, uint32_t dumpSize, ui
 */
 RTS_API rtError_t rtDatadumpInfoLoad(const void *dumpInfo, uint32_t length);

 /**
 * @ingroup rt_kernel
 * @brief launch npu get float status task
 * @param [in] outputAddr   pointer to op output addr
 * @param [in] outputSize   op output size
 * @param [in] checkMode   check mode
 * @param [in] stm   associated stream
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtNpuGetFloatStatus(void *outputAddr, uint64_t outputSize, uint32_t checkMode, rtStream_t stm);

 /**
 * @ingroup rt_kernel
 * @brief launch npu clear float status task
 * @param [in] checkMode   check mode
 * @param [in] stm   associated stream
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtNpuClearFloatStatus(uint32_t checkMode, rtStream_t stm);

 #ifndef __CLANG_CCE_RUNTIME_H__
 #define __CLANG_CCE_RUNTIME_H__
 /**
@@ -519,13 +542,13 @@ RTS_API rtError_t rtConfigureCall(uint32_t numBlocks, rtSmDesc_t *smDesc, rtStre
 /**
 * @ingroup rt_kernel
 * @brief setup argment for next rtLaunch in current thread
 * @param [in] arg   argment address for kernel function
 * @param [in] args   argment address for kernel function
 * @param [in] size   argment size
 * @param [in] offset  argment table offset
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtSetupArgument(const void *arg, uint32_t size, uint32_t offset);
 RTS_API rtError_t rtSetupArgument(const void *args, uint32_t size, uint32_t offset);

 /**
 * @ingroup rt_kernel
@@ -544,11 +567,11 @@ RTS_API rtError_t rtLaunch(const void *stubFunc);
 * @param [in] ptr   host memory
 * @param [in] size   host memory size
 * @param [in] flag   reserved. set to 0
 * @param [out] arg   returned arg. used for next kernel's arg.
 * @param [out] args   returned arg. used for next kernel's arg.
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg);
 RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **args);

 /**
 * @ingroup rt_kernel
@@ -675,7 +698,8 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD
 * @ingroup rt_kernel
 * @brief launch kernel with handle and tiling data to device
 * @param [in] hdl   program
 * @param [in] devFunc   device function description.
 * @param [in] kernelInfoExt   kernel Info extension. device function description or tiling key,
 *                             depending static shape or dynmaic shape.
 * @param [in] blockDim   block dimentions
 * @param [in] argsInfo   argments info address for kernel function
 * @param [in] smDesc   shared memory description
@@ -684,7 +708,7 @@ RTS_API rtError_t rtKernelLaunchWithTiling(const void *stubFunc, uint32_t blockD
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *devFunc, uint32_t blockDim,
 RTS_API rtError_t rtKernelLaunchWithHandleAndTiling(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
    rtArgsWithTiling_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm, const void* kernelInfo);

 #if defined(__cplusplus)
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -341,6 +341,20 @@ RTS_API rtError_t rtInvalidCache(void *base, size_t len);
 */
 RTS_API rtError_t rtMemcpy(void *dst, uint64_t destMax, const void *src, uint64_t cnt, rtMemcpyKind_t kind);

 /**
 * @ingroup dvrt_mem
 * @brief host task memcpy
 * @param [in] dst   destination address pointer
 * @param [in] destMax length of destination address memory
 * @param [in] src   source address pointer
 * @param [in] cnt   the number of byte to copy
 * @param [in] kind  memcpy type
 * @param [in] stm   task stream
 * @return RT_ERROR_NONE for ok, errno for failed
 */
 RTS_API rtError_t rtMemcpyHostTask(void * const dst, const uint64_t destMax, const void * const src,
    const uint64_t cnt, rtMemcpyKind_t kind, rtStream_t stm);

 /**
 * @ingroup dvrt_mem
 * @brief asynchronized memcpy
@@ -424,6 +438,16 @@ RTS_API rtError_t rtAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);
 */
 RTS_API rtError_t rtSetAiCoreMemorySizes(rtAiCoreMemorySize_t *aiCoreMemorySize);

 /**
 * @ingroup dvrt_mem
 * @brief Specifies how memory is use
 * @param [in] devPtr   memory pointer
 * @param [in] count    memory count
 * @param [in] advise   reserved, set to 1
 * @return RT_ERROR_NONE for ok
 * @return others for error
 */
 RTS_API rtError_t rtMemAdvise(void *devPtr, uint64_t count, uint32_t advise);
 /**
 * @ingroup dvrt_mem
 * @brief set memory with uint32_t value
--- a/third_party/fwkacllib/inc/runtime/rt_dfx.h
+++ b/third_party/fwkacllib/inc/runtime/rt_dfx.h
@@ -28,6 +28,16 @@ extern "C" {
 */
 RTS_API rtError_t rtSetTaskTag(const char_t *taskTag);

 /**
 * @brief set aicpu device attribute.
 * it is used for aicpu device to be aware of enviroment config
 * @param [in] key  attrubute key.
 * @param [in] val  attrubute value.
 * @return RT_ERROR_NONE for ok
 * @return other failed
 */
 RTS_API rtError_t rtSetAicpuAttr(const char_t *key, const char_t *val);

 #if defined(__cplusplus)
 }
 #endif
--- a/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
+++ b/third_party/fwkacllib/inc/runtime/rt_ffts_plus.h
@@ -25,7 +25,8 @@ typedef struct tagFftsPlusTaskInfo {

 #pragma pack(pop)

 RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *devFunc, void **addr, uint32_t *prefetchCnt);
 RTS_API rtError_t rtGetAddrAndPrefCntWithHandle(void *hdl, const void *kernelInfoExt, void **addr,
    uint32_t *prefetchCnt);

 RTS_API rtError_t rtFftsPlusTaskLaunch(rtFftsPlusTaskInfo_t *fftsPlusTaskInfo, rtStream_t stm);

--- a/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
+++ b/third_party/fwkacllib/inc/runtime/rt_mem_queue.h
@@ -184,6 +184,13 @@ typedef enum rtGroupType {
    RT_GRP_TYPE_BIND_DP_CPU_EXCLUSIVE    /* Bound to a AICPU, intra-group threads are mutex awakened */
 } rtGroupType_t;

 typedef struct tagInitFlowGwInfo {
    const char_t *groupName;
    uint64_t schedPolicy;
    uint64_t reschedInterval;
    char_t rsv[128];
 } rtInitFlowGwInfo_t;

 /**
 * @ingroup rt_mem_queue
 * @brief init queue schedule
@@ -193,6 +200,15 @@ typedef enum rtGroupType {
 */
 RTS_API rtError_t rtMemQueueInitQS(int32_t devId, const char_t *grpName);

 /**
 * @ingroup rt_mem_queue
 * @brief init flow gateway
 * @param [in] devId   the logical device id
 * @param [in] initInfo   Initialization parameters
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMemQueueInitFlowGw(int32_t devId, const rtInitFlowGwInfo_t * const initInfo);

 /**
 * @ingroup rt_mem_queue
 * @brief create mbuf queue
@@ -222,24 +238,24 @@ RTS_API rtError_t rtMemQueueInit(int32_t devId);

 /**
 * @ingroup rt_mem_queue
 * @brief enqueu mbuf
 * @brief enqueue memBuf
 * @param [in] devId   the logical device id
 * @param [in] qid  queue id
 * @param [in] mbuf   enqueue mbuf
 * @param [in] memBuf   enqueue memBuf
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *mbuf);
 RTS_API rtError_t rtMemQueueEnQueue(int32_t devId, uint32_t qid, void *memBuf);


 /**
 * @ingroup rt_mem_queue
 * @brief enqueu mbuf
 * @brief dequeue memBuf
 * @param [in] devId   the logical device id
 * @param [in] qid  queue id
 * @param [out] mbuf   dequeue mbuf
 * @param [out] memBuf   dequeue memBuf
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **mbuf);
 RTS_API rtError_t rtMemQueueDeQueue(int32_t devId, uint32_t qid, void **memBuf);

 /**
 * @ingroup rt_mem_queue
@@ -350,47 +366,56 @@ RTS_API rtError_t rtMbufInit(rtMemBuffCfg_t *cfg);
 /**
 * @ingroup rt_mem_queue
 * @brief alloc buff
 * @param [out] buff: buff addr alloced
 * @param [out] memBuf: buff addr alloced
 * @param [in]  size: The amount of memory space requested
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *mbuf, uint64_t size);
 RTS_API rtError_t rtMbufAlloc(rtMbufPtr_t *memBuf, uint64_t size);

 /**
 * @ingroup rt_mem_queue
 * @brief free buff
 * @param [in] buff: buff addr to be freed
 * @param [in] memBuf: buff addr to be freed
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMbufFree(rtMbufPtr_t mbuf);
 RTS_API rtError_t rtMbufFree(rtMbufPtr_t memBuf);

 /**
 * @ingroup rt_mem_queue
 * @brief set Data len of Mbuf
 * @param [in] memBuf: Mbuf addr
 * @param [in] len: data len
 * @return   RT_ERROR_NONE for success, others for fail
 */
 RTS_API rtError_t rtMbufSetDataLen(rtMbufPtr_t memBuf, uint64_t len);

 /**
 * @ingroup rt_mem_queue
 * @brief get Data addr of Mbuf
 * @param [in] mbuf: Mbuf addr
 * @param [in] memBuf: Mbuf addr
 * @param [out] buf: Mbuf data addr
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t mbuf, void **buf);
 RTS_API rtError_t rtMbufGetBuffAddr(rtMbufPtr_t memBuf, void **buf);

 /**
 * @ingroup rt_mem_queue
 * @brief get total Buffer size of Mbuf
 * @param [in] mbuf: Mbuf addr
 * @param [in] memBuf: Mbuf addr
 * @param [out] totalSize: total buffer size of Mbuf
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t mbuf, uint64_t *totalSize);
 RTS_API rtError_t rtMbufGetBuffSize(rtMbufPtr_t memBuf, uint64_t *totalSize);

 /**
 * @ingroup rt_mem_queue
 * @brief Get the address and length of its user_data from the specified Mbuf
 * @param [in] mbuf: Mbuf addr
 * @param [in] memBuf: Mbuf addr
 * @param [out] priv: address of its user_data
 * @param [out]  size: length of its user_data
 * @return RT_ERROR_NONE for ok
 */
 RTS_API rtError_t rtMbufGetPrivInfo (rtMbufPtr_t mbuf,  void **priv, uint64_t *size);
 RTS_API rtError_t rtMbufGetPrivInfo(rtMbufPtr_t memBuf,  void **priv, uint64_t *size);

 // mem group
 typedef struct {
@@ -573,6 +598,14 @@ RTS_API rtError_t rtQueueSubF2NFEvent(int32_t devId, uint32_t qId, uint32_t grou
 */
 RTS_API rtError_t rtQueueSubscribe(int32_t devId, uint32_t qId, uint32_t groupId, int32_t type);

 /**
 * @ingroup rtBufEventTrigger
 * @brief buf event trigger
 * @param [in] name, group name
 * @return   0 for success, others for fail
 */
 RTS_API rtError_t rtBufEventTrigger(const char_t *name);

 #if defined(__cplusplus)
 }
 #endif
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -44,6 +44,11 @@ typedef enum tagModelTaskType {
    RT_MODEL_TASK_PROFILER_TRACE_EX,
    RT_MODEL_TASK_FFTS_TASK,
    RT_MODEL_TASK_FFTS_PLUS_TASK,
    RT_MODEL_TASK_DSA_TASK,
    RT_MODEL_TASK_CMO,
    RT_MODEL_TASK_BARRIER,
    RT_MODEL_TASK_NPU_GET_FLOAT_STATUS,
    RT_MODEL_TASK_NPU_CLEAR_FLOAT_STATUS,
 } rtModelTaskType_t;

 typedef enum tagModelStreamType {
@@ -115,9 +120,9 @@ typedef struct tagKernelTaskInfo {
    uint16_t argsCount;
    uint16_t argsSize;
    uint16_t reserved;
    char_t *stubFunc;
    const char_t *stubFunc;
    uint8_t *smDesc;
    uint8_t *args;
    const uint8_t *args;
    uint16_t *argsOffset;
 } rtKernelTaskInfo_t;

@@ -126,17 +131,17 @@ typedef struct tagAllKernelTaskInfo {
    uint16_t argsCount;
    uint16_t argsSize;
    uint16_t reserved;
    void *devfunc;
    const void *kernelInfoExt;
    void *handle;
    uint8_t *smDesc;
    uint8_t *args;
    const uint8_t *args;
    uint16_t *argsOffset;
 } rtAllKernelTaskInfo_t;

 typedef struct tagKernelTaskInfoEx {
    uint32_t flags;
    uint32_t argsSize;
    void *args;
    const void *args;
    uint32_t reserved[6];
 } rtKernelTaskInfoEx_t;

@@ -198,9 +203,9 @@ typedef struct tagProfilerTraceExTaskInfo {
 } rtProfilerTraceEx_t;

 typedef struct tagrtMemcpyAsyncTaskInfo {
    void *dst;
    const void *dst;
    uint64_t destMax;
    void *src;
    const void *src;
    uint64_t count;
    uint32_t kind;
    uint32_t reserved;
@@ -212,9 +217,9 @@ typedef struct tagrtNotifyTaskInfo {
 } rtNotifyTaskInfo_t;

 typedef struct tagrtReduceAsyncTaskInfo {
    void *dst;
    const void *dst;
    uint64_t destMax;
    void *src;
    const void *src;
    uint64_t count;
    uint32_t kind;
    uint32_t type;
@@ -481,6 +486,16 @@ RTS_API rtError_t rtDebugRegister(rtModel_t mdl, uint32_t flag, const void *addr
 */
 RTS_API rtError_t rtDebugUnRegister(rtModel_t mdl);

 /**
 * @ingroup rt_model
 * @brief set model group id
 * @param [in]    mdl     model
 * @param [in]     schGrpId    groupId  (0,4) 0:default invalid value   1-4 valid value Maximum support 4 groups
 * @return RT_ERROR_NONE for ok
 * @return RT_ERROR_INVALID_VALUE for error input
 */
 RTS_API rtError_t rtModelSetSchGroupId(rtModel_t mdl, const int16_t schGrpId);

 #if defined(__cplusplus)
 }
 #endif
--- a/third_party/fwkacllib/inc/runtime/rt_stars.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars.h
@@ -7,7 +7,7 @@
 #define CCE_RUNTIME_RT_STARS_H

 #include "base.h"

 #include "rt_stars_define.h"
 #if defined(__cplusplus)
 extern "C" {
 #endif
@@ -80,6 +80,25 @@ RTS_API rtError_t rtCdqEnQueue(const char_t *queName, uint32_t cdqeIndex, void *
 RTS_API rtError_t rtCdqEnQueuePtrMode(const char_t *queName, uint32_t cdqeIndex, const void *ptrAddr,
    rtStream_t stm);

 /**
 * @ingroup rt_stars
 * @brief launch common cmo task on the stream.
 * @param [in] taskInfo     cmo task info
 * @param [in] stm          launch task on the stream
 * @param [in] flag         flag
 * @return RT_ERROR_NONE for ok, others failed
 */
 RTS_API rtError_t rtCmoTaskLaunch(rtCmoTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag);

 /**
 * @ingroup rt_stars
 * @brief launch barrier cmo task on the stream.
 * @param [in] taskInfo     barrier task info
 * @param [in] stm          launch task on the stream
 * @param [in] flag         flag
 * @return RT_ERROR_NONE for ok, others failed
 */
 RTS_API rtError_t rtBarrierTaskLaunch(rtBarrierTaskInfo_t *taskInfo, rtStream_t stm, uint32_t flag);
 #if defined(__cplusplus)

 }
--- a/third_party/fwkacllib/inc/runtime/rt_stars_define.h
+++ b/third_party/fwkacllib/inc/runtime/rt_stars_define.h
@@ -32,6 +32,37 @@ typedef struct tagStarsSqeHeader {
    uint16_t taskId;
 } rtStarsSqeHeader_t;

 typedef struct tagStarsDsaSqe {
    // 0-7 bytes
    rtStarsSqeHeader_t sqeHeader;
    // 8-11 bytes
    uint32_t start : 1;
    uint32_t functionType : 3;
    uint32_t dataType : 3;
    uint32_t algoType : 3;
    uint32_t paramVldBitmap : 5;
    uint32_t paramAddrValBitmap : 7;
    uint32_t reserved0 : 10;
    // 12-15 bytes
    uint16_t sqeIndex;
    uint8_t kernelCredit;
    uint8_t reserved1;
    // 16-31 bytes
    uint32_t dsaCfgResultAddrLow;
    uint32_t dsaCfgResultAddrHigh;
    uint32_t dsaCfgStateAddrLow;
    uint32_t dsaCfgStateAddrHigh;
    // 32-47 bytes
    uint32_t dsaCfgParamAddrLow;
    uint32_t dsaCfgParamAddrHigh;
    uint32_t dsaCfgSeedLow;
    uint32_t dsaCfgSeedHigh;
    // 48-63 bytes
    uint32_t dsaCfgNumberLow;
    uint32_t dsaCfgNumberHigh;
    uint32_t reserved2[2];
 } rtStarsDsaSqe_t;

 // ffts+ type
 typedef enum tagFftsPlusType {
    RT_FFTS_PLUS_TYPE_RES1 = 2,   // Reserved
@@ -83,6 +114,33 @@ typedef struct tagFftsPlusSqe {
    uint32_t reserved16[4];
 } rtFftsPlusSqe_t;

 typedef struct tagCmoTaskInfo {
    uint8_t  qos;
    uint8_t  partId;
    uint8_t  pmg;
    uint8_t  reserved;
    uint16_t cmoType;
    uint16_t opCode;
    uint16_t numInner;
    uint16_t numOuter;
    uint32_t logicId;
    uint32_t lengthInner;
    uint64_t sourceAddr;
    uint32_t striderOuter;
    uint32_t striderInner;
 } rtCmoTaskInfo_t;

 typedef struct tagBarrierCmoInfo {
    uint16_t cmoType; // 0 is barrier, 1 is invalid, Prefetch is 2, Write_back is 3, FE/GE only use invalid type.
    uint32_t logicId;
 } rtBarrierCmoInfo_t;

 #define RT_CMO_MAX_BARRIER_NUM 6U // 6U is max support
 typedef struct tagBarrierTaskInfo {
    uint8_t logicIdNum;
    rtBarrierCmoInfo_t cmoInfo[RT_CMO_MAX_BARRIER_NUM];
 } rtBarrierTaskInfo_t;

 #pragma pack(pop)

 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
--- a/third_party/fwkacllib/inc/tdt/data_common.h
+++ b/third_party/fwkacllib/inc/tdt/data_common.h
@@ -1,21 +1,14 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 * @file data_common.h
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
 *
 * This program is used to data structure
 */

 #ifndef HOST_INNER_INC_DATA_COMMON_H_
 #define HOST_INNER_INC_DATA_COMMON_H_
 #include <string>

 namespace tdt {
 #ifndef TDT_DATA_TYPE
--- a/third_party/fwkacllib/inc/tdt/index_transform.h
+++ b/third_party/fwkacllib/inc/tdt/index_transform.h
@@ -1,18 +1,10 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 * @file index_transform.h
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2018-2019. All Rights Reserved.
 *
 * This program is used to get logical device id by phy device id.
 */

 #ifndef INC_TDT_INDEX_TRANSFORM_H
 #define INC_TDT_INDEX_TRANSFORM_H
--- a/third_party/fwkacllib/inc/tdt/status.h
+++ b/third_party/fwkacllib/inc/tdt/status.h
@@ -1,18 +1,10 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 * @file status.h
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
 *
 * This program is used to describe status
 */

 #ifndef INC_TDT_STATUS_H_
 #define INC_TDT_STATUS_H_
--- a/third_party/fwkacllib/inc/tdt/tdt_host_interface.h
+++ b/third_party/fwkacllib/inc/tdt/tdt_host_interface.h
@@ -1,18 +1,10 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 * @file tdt_host_interface.h
 *
 * Copyright (C) Huawei Technologies Co., Ltd. 2019-2020. All Rights Reserved.
 *
 * This program is used to host server
 */

 #ifndef HOST_INNER_INC_TDT_HOST_INTERFACE_H_
 #define HOST_INNER_INC_TDT_HOST_INTERFACE_H_
--- a/third_party/fwkacllib/inc/tdt/tsd_client.h
+++ b/third_party/fwkacllib/inc/tdt/tsd_client.h
@@ -1,5 +1,5 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 * Copyright (c) Hisilicon Technologies Co., Ltd. 2018-2021. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,17 +14,22 @@
 * limitations under the License.
 */

 #ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H_
 #define TDT_HOST_INNER_INC_TSD_CLIENT_H_
 #ifndef TDT_HOST_INNER_INC_TSD_CLIENT_H
 #define TDT_HOST_INNER_INC_TSD_CLIENT_H

 #include <condition_variable>
 #include <map>
 #include <memory>
 #include <mutex>
 #include "tdt/status.h"
 #include "tdt/data_common.h"
 #include "tsd/status.h"
 #include "toolchain/prof_callback.h"

 #ifdef WIN_TSD
 #define TDT_LIB_EXPORT __declspec(dllexport)
 #else
 #define TDT_LIB_EXPORT __attribute__((visibility("default")))
 #endif

 #ifdef __cplusplus
 extern "C" {
 #endif  // __cplusplus
@@ -50,7 +55,51 @@ extern "C" {
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);
 TDT_LIB_EXPORT uint32_t TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);

 /**
 * @ingroup Open
 * @brief Used for the Framework process to communicate with the TSDDaemon process in 1981,
 * and notify TSD to complete the initialization of other processes
 *
 * @par Function
 * Used for the Framework process to communicate with the TSDDaemon process,
 * and notify TSD to complete the initialization of other processes
 *
 * @param logicDeviceId [IN] type #unsigned int. Logic device ID
 * @param rankSize [IN] type #unsigned int. The rankSize of the training.
 * The default value is 1. When rankSize is greater than 1,
 * HCCP will be pulled to perform set communication related operations.
 * @param deviceMode [IN] type unsigned int. The device running mode of aicpuSd,
 * it include chipMode and DieMode
 * @retval TDT_OK Success
 * @retval OtherValues Failure
 *
 * @par Dependency
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT uint32_t TsdOpenEx(const uint32_t logicDeviceId, const uint32_t rankSize, const uint32_t deviceMode);

 /**
 * @ingroup InitialQs
 * @brief Used for the Framework process to communicate with the TSDDaemon process,
 * and notify TSD to complete the initialization of QS processes
 *
 * @par Function
 * Used for the Framework process to communicate with the TSDDaemon process,
 * and notify TSD to complete the initialization of other processes
 *
 * @param logicDeviceId [IN] type #unsigned int. Logic device ID
 * @param groupName [IN] type #char pointer. qs group name send by host process
 * @retval TDT_OK Success
 * @retval OtherValues Failure
 *
 * @par Dependency
 * @li libtsdclient.so: Library to which the interface belongs.
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT uint32_t TsdInitQs(const uint32_t logicDeviceId, const char_t * const groupName = nullptr);

 /**
 * @ingroup Close
@@ -64,11 +113,12 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t
 * @retval OtherValues Failure
 *
 * @par Dependency

 * @li libtsdclient.so: Library to which the interface belongs.
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
 TDT_LIB_EXPORT uint32_t TsdClose(const uint32_t logicDeviceId);

 /**
 * @ingroup UpdateProfilingMode
@@ -86,7 +136,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
 TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);
 TDT_LIB_EXPORT uint32_t UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);

 /**
 * @ingroup TsdSetMsprofReporterCallback
@@ -105,9 +155,22 @@ TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, con
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 * @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
 */
 TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);
 TDT_LIB_EXPORT uint32_t TsdSetMsprofReporterCallback(const MsprofReporterCallback callback);

 /**
 * @ingroup TsdSetAttr
 * @brief used to set tsd attr
 *
 * @par key
 * key set for tsd attr,now only support RunMode
 *
 * @par value
 * value set to run correspond mode, PROCESS_MODE or THREAD_MODE
 * @retval TDT_OK Success
 * @retval OtherValues Failure
 */
 TDT_LIB_EXPORT uint32_t TsdSetAttr(const char * const attrKey, const char * const attrValue);
 #ifdef __cplusplus
 }
 #endif  // __cplusplus
 #endif  // TDT_HOST_INNER_INC_TSD_CLIENT_H_
 #endif  // TDT_HOST_INNER_INC_TSD_CLIENT_H
--- a/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_acl_api.h
@@ -1,17 +1,8 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
 * Description: handle perf data
 * Author: xp
 * Create: 2019-10-13
 */

 #ifndef MSPROFILER_API_PROF_ACL_API_H_
@@ -25,6 +16,8 @@
 #define PROF_L2CACHE                0x00000010ULL
 #define PROF_HCCL_TRACE             0x00000020ULL
 #define PROF_TRAINING_TRACE         0x00000040ULL
 #define PROF_MSPROFTX               0x00000080ULL
 #define PROF_RUNTIME_API            0x00000100ULL

 // system profilinig switch
 #define PROF_CPU                    0x00010000ULL
@@ -36,17 +29,18 @@
 #define PROF_AIVECTORCORE_SAMPLE    0x00400000ULL

 #define PROF_MODEL_EXECUTE          0x0000001000000ULL
 #define PROF_RUNTIME_API            0x0000002000000ULL
 #define PROF_RUNTIME_TRACE          0x0000004000000ULL
 #define PROF_SCHEDULE_TIMELINE      0x0000008000000ULL
 #define PROF_SCHEDULE_TRACE         0x0000010000000ULL
 #define PROF_AIVECTORCORE_METRICS   0x0000020000000ULL
 #define PROF_SUBTASK_TIME           0x0000040000000ULL

 #define PROF_TASK_TRACE             0x0000005000062ULL
 #define PROF_OP_DETAIL              0x0000080000000ULL

 #define PROF_MODEL_LOAD             0x8000000000000000ULL

 #define PROF_TASK_TRACE             (PROF_MODEL_EXECUTE | PROF_RUNTIME_TRACE | PROF_TRAINING_TRACE | \
                                     PROF_HCCL_TRACE | PROF_TASK_TIME)

 // DataTypeConfig MASK
 #define PROF_ACL_API_MASK                0x00000001ULL
 #define PROF_TASK_TIME_MASK              0x00000002ULL
@@ -55,6 +49,8 @@
 #define PROF_L2CACHE_MASK                0x00000010ULL
 #define PROF_HCCL_TRACE_MASK             0x00000020ULL
 #define PROF_TRAINING_TRACE_MASK         0x00000040ULL
 #define PROF_MSPROFTX_MASK               0x00000080ULL
 #define PROF_RUNTIME_API_MASK            0x00000100ULL

 // system profilinig mask
 #define PROF_CPU_MASK                    0x00010000ULL
@@ -66,12 +62,12 @@
 #define PROF_AIVECTORCORE_SAMPLE_MASK    0x00400000ULL

 #define PROF_MODEL_EXECUTE_MASK          0x0000001000000ULL
 #define PROF_RUNTIME_API_MASK            0x0000002000000ULL
 #define PROF_RUNTIME_TRACE_MASK          0x0000004000000ULL
 #define PROF_SCHEDULE_TIMELINE_MASK      0x0000008000000ULL
 #define PROF_SCHEDULE_TRACE_MASK         0x0000010000000ULL
 #define PROF_AIVECTORCORE_METRICS_MASK   0x0000020000000ULL
 #define PROF_SUBTASK_TIME_MASK           0x0000040000000ULL
 #define PROF_OP_DETAIL_MASK              0x0000080000000ULL

 #define PROF_MODEL_LOAD_MASK             0x8000000000000000ULL

@@ -104,7 +100,7 @@ extern "C" {

 MSVP_PROF_API uint64_t ProfGetOpExecutionTime(const void *data, uint32_t len, uint32_t index);

 typedef int32_t Status;
 typedef in32_t Status;
 typedef struct aclprofSubscribeConfig aclprofSubscribeConfig1;
 ///
 /// @ingroup AscendCL
@@ -135,6 +131,33 @@ MSVP_PROF_API Status aclgrphProfGraphUnSubscribe(const uint32_t graphId);
 * @retval 0 for failed
 */
 MSVP_PROF_API size_t aclprofGetGraphId(const void *opInfo, size_t opInfoLen, uint32_t index);

 /**
 * @ingroup AscendCL
 * @brief set stamp pay load
 *
 *
 * @retval void
 */
 MSVP_PROF_API int aclprofSetStampPayload(void *stamp, const int32_t type, void *value);

 /**
 * @ingroup AscendCL
 * @brief set category and name
 *
 *
 * @retval void
 */
 MSVP_PROF_API int aclprofSetCategoryName(uint32_t category, const char *categoryName);

 /**
 * @ingroup AscendCL
 * @brief set category to stamp
 *
 *
 * @retval void
 */
 MSVP_PROF_API int aclprofSetStampCategory(void *stamp, uint32_t category);
 #ifdef __cplusplus
 }
 #endif
--- a/third_party/fwkacllib/inc/toolchain/prof_callback.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -1,17 +1,8 @@
 /**
 * Copyright 2019-2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
 * Description: handle perf data
 * Author: xp
 * Create: 2019-10-13
 */

 #ifndef MSPROFILER_PROF_CALLBACK_H_
@@ -76,7 +67,8 @@ enum MsprofReporterModuleId {
    MSPROF_MODULE_HCCL,                   // HCCL
    MSPROF_MODULE_ACL,                    // AclModule
    MSPROF_MODULE_FRAMEWORK,              // Framework
    MSPROF_MODULE_RUNTIME                 // runtime
    MSPROF_MODULE_RUNTIME,                // runtime
    MSPROF_MODULE_MSPROF                  // msprofTx
 };

 /**
@@ -119,7 +111,7 @@ struct MsprofGeOptions {
 */
 enum MsprofCtrlCallbackType {
    MSPROF_CTRL_INIT_ACL_ENV = 0,           // start profiling with acl env
    MSPROF_CTRL_INIT_ACL_JSON,              // start profiling with acl.json
    MSPROF_CTRL_INIT_ACL_JSON,              // start pro with acl.json
    MSPROF_CTRL_INIT_GE_OPTIONS,            // start profiling with ge env and options
    MSPROF_CTRL_FINALIZE,                   // stop profiling
    MSPROF_CTRL_INIT_DYNA = 0xFF,           // start profiling for dynamic profiling
--- a/third_party/fwkacllib/inc/toolchain/prof_common.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_common.h
@@ -0,0 +1,449 @@
 /*
 * Copyright (c) Huawei Technologies Co., Ltd. 2019-2021. All rights reserved.
 * Description: handle perf data
 * Author: Huawei Technologies Co., Ltd.
 * Create: 2019-10-13
 */
 #ifndef MSPROFILER_PROF_COMMON_H_
 #define MSPROFILER_PROF_COMMON_H_

 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus

 #include <stdint.h>

 #define MSPROF_DATA_HEAD_MAGIC_NUM  0x5a5a

 enum MsprofDataTag {
    MSPROF_ACL_DATA_TAG = 0,            //acl data tag, range: 0~19
    MSPROF_GE_DATA_TAG_MODEL_LOAD = 20, //ge data tag, range: 20~39
    MSPROF_GE_DATA_TAG_FUSION = 21,
    MSPROF_GE_DATA_TAG_INFER = 22,
    MSPROF_GE_DATA_TAG_TASK = 23,
    MSPROF_GE_DATA_TAG_TENSOR = 24,
    MSPROF_GE_DATA_TAG_STEP = 25,
    MSPROF_GE_DATA_TAG_ID_MAP = 26,
    MSPROF_GE_DATA_TAG_HOST_SCH = 27,
    MSPROF_RUNTIME_DATA_TAG_API = 40,   //runtime data tag, range: 40~59
    MSPROF_RUNTIME_DATA_TAG_TRACK = 41,
    MSPROF_AICPU_DATA_TAG = 60,         //aicpu data tag, range: 60~79
    MSPROF_HCCL_DATA_TAG = 80,          //hccl data tag, range: 80~99
    MSPROF_DP_DATA_TAG = 100,           //dp data tag, range: 100~119
    MSPROF_MSPROFTX_DATA_TAG = 120,     //hccl data tag, range: 120~139
    MSPROF_DATA_TAG_MAX = 65536,        //data tag value type is uint16_t
 };

 /**
 * @brief struct of mixed data
 */
 #define MSPROF_MIX_DATA_RESERVE_BYTES 7
 #define MSPROF_MIX_DATA_STRING_LEN 120
 enum MsprofMixDataType {
    MSPROF_MIX_DATA_HASH_ID = 0,
    MSPROF_MIX_DATA_STRING,
 };
 struct MsprofMixData {
    uint8_t type;  // MsprofMixDataType
    uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
    union {
        uint64_t hashId;
        char dataStr[MSPROF_MIX_DATA_STRING_LEN];
    } data;
 };

 /**
 * @brief profiling command info
 */
 #define MSPROF_MAX_DEV_NUM 64
 struct MsprofCommandHandle {
    uint64_t profSwitch;
    uint64_t profSwitchHi;
    uint32_t devNums;
    uint32_t devIdList[MSPROF_MAX_DEV_NUM];
    uint32_t modelId;
    uint32_t type;
 };

 /**
 * @brief struct of data reported by acl
 */
 #define MSPROF_ACL_DATA_RESERVE_BYTES 32
 #define MSPROF_ACL_API_NAME_LEN 64
 enum MsprofAclApiType {
    MSPROF_ACL_API_TYPE_OP = 1,
    MSPROF_ACL_API_TYPE_MODEL,
    MSPROF_ACL_API_TYPE_RUNTIME,
    MSPROF_ACL_API_TYPE_OTHERS,
 };
 struct MsprofAclProfData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_ACL_DATA_TAG;
    uint32_t apiType;       // enum MsprofAclApiType
    uint64_t beginTime;
    uint64_t endTime;
    uint32_t processId;
    uint32_t threadId;
    char apiName[MSPROF_ACL_API_NAME_LEN];
    uint8_t  reserve[MSPROF_ACL_DATA_RESERVE_BYTES];
 };

 /**
 * @brief struct of data reported by GE
 */
 #define MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES 104
 struct MsprofGeProfModelLoadData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_MODEL_LOAD;
    uint32_t modelId;
    MsprofMixData modelName;
    uint64_t startTime;
    uint64_t endTime;
    uint8_t  reserve[MSPROF_GE_MODELLOAD_DATA_RESERVE_BYTES];
 };

 #define MSPROF_GE_FUSION_DATA_RESERVE_BYTES 8
 #define MSPROF_GE_FUSION_OP_NUM 8
 struct MsprofGeProfFusionData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_FUSION;
    uint32_t modelId;
    MsprofMixData fusionName;
    uint64_t inputMemSize;
    uint64_t outputMemSize;
    uint64_t weightMemSize;
    uint64_t workspaceMemSize;
    uint64_t totalMemSize;
    uint64_t fusionOpNum;
    uint64_t fusionOp[MSPROF_GE_FUSION_OP_NUM];
    uint8_t  reserve[MSPROF_GE_FUSION_DATA_RESERVE_BYTES];
 };

 #define MSPROF_GE_INFER_DATA_RESERVE_BYTES 64
 struct MsprofGeProfInferData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_INFER;
    uint32_t modelId;
    MsprofMixData modelName;
    uint32_t requestId;
    uint32_t threadId;
    uint64_t inputDataStartTime;
    uint64_t inputDataEndTime;
    uint64_t inferStartTime;
    uint64_t inferEndTime;
    uint64_t outputDataStartTime;
    uint64_t outputDataEndTime;
    uint8_t  reserve[MSPROF_GE_INFER_DATA_RESERVE_BYTES];
 };

 #define MSPROF_GE_TASK_DATA_RESERVE_BYTES 16
 #define MSPROF_GE_OP_TYPE_LEN 56
 enum MsprofGeTaskType {
    MSPROF_GE_TASK_TYPE_AI_CORE = 0,
    MSPROF_GE_TASK_TYPE_AI_CPU,
    MSPROF_GE_TASK_TYPE_AIV,
 };
 enum MsprofGeShapeType {
    MSPROF_GE_SHAPE_TYPE_STATIC = 0,
    MSPROF_GE_SHAPE_TYPE_DYNAMIC,
 };
 struct MsprofGeOpType {
    uint8_t type;  // MsprofMixDataType
    uint8_t rsv[MSPROF_MIX_DATA_RESERVE_BYTES];
    union {
        uint64_t hashId;
        char dataStr[MSPROF_GE_OP_TYPE_LEN];
    } data;
 };
 struct MsprofGeProfTaskData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_TASK;
    uint32_t taskType;      // MsprofGeTaskType
    MsprofMixData opName;
    MsprofGeOpType opType;
    uint64_t curIterNum;
    uint64_t timeStamp;
    uint32_t shapeType;     // MsprofGeShapeType
    uint32_t blockDims;
    uint32_t modelId;
    uint32_t streamId;
    uint32_t taskId;
    uint32_t threadId;
    uint8_t  reserve[MSPROF_GE_TASK_DATA_RESERVE_BYTES];
 };

 #define MSPROF_GE_TENSOR_DATA_RESERVE_BYTES 8
 #define MSPROF_GE_TENSOR_DATA_SHAPE_LEN 8
 #define MSPROF_GE_TENSOR_DATA_NUM 5
 enum MsprofGeTensorType {
    MSPROF_GE_TENSOR_TYPE_INPUT = 0,
    MSPROF_GE_TENSOR_TYPE_OUTPUT,
 };
 struct MsprofGeTensorData {
    uint32_t tensorType;    // MsprofGeTensorType
    uint32_t format;
    uint32_t dataType;
    uint32_t shape[MSPROF_GE_TENSOR_DATA_SHAPE_LEN];
 };

 struct MsprofGeProfTensorData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_TENSOR;
    uint32_t modelId;
    uint64_t curIterNum;
    uint32_t streamId;
    uint32_t taskId;
    uint32_t tensorNum;
    MsprofGeTensorData tensorData[MSPROF_GE_TENSOR_DATA_NUM];
    uint8_t  reserve[MSPROF_GE_TENSOR_DATA_RESERVE_BYTES];
 };

 #define MSPROF_GE_STEP_DATA_RESERVE_BYTES 27
 enum MsprofGeStepTag {
    MSPROF_GE_STEP_TAG_BEGIN = 0,
    MSPROF_GE_STEP_TAG_END,
 };
 struct MsprofGeProfStepData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_STEP;
    uint32_t modelId;
    uint32_t streamId;
    uint32_t taskId;
    uint64_t timeStamp;
    uint64_t curIterNum;
    uint32_t threadId;
    uint8_t  tag;           // MsprofGeStepTag
    uint8_t  reserve[MSPROF_GE_STEP_DATA_RESERVE_BYTES];
 };

 #define MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES 6
 struct MsprofGeProfIdMapData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_ID_MAP;
    uint32_t graphId;
    uint32_t modelId;
    uint32_t sessionId;
    uint64_t timeStamp;
    uint16_t mode;
    uint8_t  reserve[MSPROF_GE_ID_MAP_DATA_RESERVE_BYTES];
 };

 #define MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES 24
 struct MsprofGeProfHostSchData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_GE_DATA_TAG_HOST_SCH;
    uint32_t threadId;      // record in start event
    uint64_t element;
    uint64_t event;
    uint64_t startTime;     // record in start event
    uint64_t endTime;       // record in end event
    uint8_t  reserve[MSPROF_GE_HOST_SCH_DATA_RESERVE_BYTES];
 };

 /**
 * @brief struct of data reported by RunTime
 */
 #define MSPROF_RUNTIME_API_DATA_RESERVE_BYTES 106
 #define MSPROF_RUNTIME_TASK_ID_NUM 10
 #define MSPROF_RUNTIME_API_NAME_LEN 64
 struct MsprofRuntimeProfApiData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_API;
    uint32_t threadId;
    uint64_t entryTime;
    uint64_t exitTime;
    uint64_t dataSize;
    uint8_t  apiName[MSPROF_RUNTIME_API_NAME_LEN];
    uint32_t retCode;
    uint32_t streamId;
    uint32_t taskNum;
    uint32_t taskId[MSPROF_RUNTIME_TASK_ID_NUM];
    uint16_t memcpyDirection;
    uint8_t  reserve[MSPROF_RUNTIME_API_DATA_RESERVE_BYTES];
 };

 #define MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES 10
 #define MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN 32
 struct MsprofRuntimeProfTrackData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_RUNTIME_DATA_TAG_TRACK;
    uint32_t threadId;
    uint64_t timeStamp;
    char taskType[MSPROF_RUNTIME_TRACK_TASK_TYPE_LEN];
    uint32_t taskId;
    uint16_t streamId;
    uint8_t  reserve[MSPROF_RUNTIME_TRACK_DATA_RESERVE_BYTES];
 };

 /**
 * @brief struct of data reported by RunTime
 */
 #define MSPROF_AICPU_DATA_RESERVE_BYTES 9
 struct MsprofAicpuProfData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_AICPU_DATA_TAG;
    uint16_t streamId;
    uint16_t taskId;
    uint64_t runStartTime;
    uint64_t runStartTick;
    uint64_t computeStartTime;
    uint64_t memcpyStartTime;
    uint64_t memcpyEndTime;
    uint64_t runEndTime;
    uint64_t runEndTick;
    uint32_t threadId;
    uint32_t deviceId;
    uint64_t submitTick;
    uint64_t scheduleTick;
    uint64_t tickBeforeRun;
    uint64_t tickAfterRun;
    uint32_t kernelType;
    uint32_t dispatchTime;
    uint32_t totalTime;
    uint16_t fftsThreadId;
    uint8_t  version;
    uint8_t  reserve[MSPROF_AICPU_DATA_RESERVE_BYTES];
 };

 /**
 * @brief struct of data reported by DP
 */
 #define MSPROF_DP_DATA_RESERVE_BYTES 16
 #define MSPROF_DP_DATA_ACTION_LEN 16
 #define MSPROF_DP_DATA_SOURCE_LEN 64
 struct MsprofDpProfData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_DP_DATA_TAG;
    uint32_t rsv;   // Ensure 8-byte alignment
    uint64_t timeStamp;
    char action[MSPROF_DP_DATA_ACTION_LEN];
    char source[MSPROF_DP_DATA_SOURCE_LEN];
    uint64_t index;
    uint64_t size;
    uint8_t  reserve[MSPROF_DP_DATA_RESERVE_BYTES];
 };

 /**
 * @brief struct of data reported by HCCL
 */
 #pragma pack(4)
 struct MsprofHcclProfNotify {
    uint32_t taskID;
    uint64_t notifyID;
    uint32_t stage;
    uint32_t remoteRank;
    uint32_t transportType;
    uint32_t role; // role {0: dst, 1:src}
    double durationEstimated;
 };

 struct MsprofHcclProfReduce {
    uint32_t taskID;
    uint64_t src;
    uint64_t dst;
    uint64_t size;
    uint32_t op;       // {0: sum, 1: mul, 2: max, 3: min}
    uint32_t dataType; // data type {0: INT8, 1: INT16, 2: INT32, 3: FP16, 4:FP32, 5:INT64, 6:UINT64}
    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
    uint32_t remoteRank;
    uint32_t transportType; //  transport type {0: SDMA, 1: RDMA, 2:LOCAL}
    uint32_t role;          // role {0: dst, 1:src}
    double durationEstimated;
 };

 struct MsprofHcclProfRDMA {
    uint32_t taskID;
    uint64_t src;
    uint64_t dst;
    uint64_t size;
    uint64_t notifyID;
    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
    uint32_t remoteRank;
    uint32_t transportType; //  transport type {0: RDMA, 1:SDMA, 2:LOCAL}
    uint32_t role;          // role {0: dst, 1:src}
    uint32_t type;          // RDMA type {0: RDMASendNotify, 1:RDMASendPayload}
    double durationEstimated;
 };

 struct MsprofHcclProfMemcpy {
    uint32_t taskID;
    uint64_t src;
    uint64_t dst;
    uint64_t size;
    uint64_t notifyID;
    uint32_t linkType; // link type {0: 'OnChip', 1: 'HCCS', 2: 'PCIe', 3: 'RoCE'}
    uint32_t remoteRank;
    uint32_t transportType; // transport type {0: RDMA, 1:SDMA, 2:LOCAL}
    uint32_t role;          // role {0: dst, 1:src}
    double durationEstimated;
 };

 struct MsprofHcclProfStageStep {
    uint32_t rank;
    uint32_t rankSize;
 };

 struct MsprofHcclProfFlag {
    uint64_t cclTag;
    uint64_t groupName;
    uint32_t localRank;
    uint32_t workFlowMode;
 };

 /**
 * @name MsprofHcclProfData
 * @brief struct of data reported by hccl
 */
 struct MsprofHcclProfData {
    uint16_t magicNumber = MSPROF_DATA_HEAD_MAGIC_NUM;
    uint16_t dataTag = MSPROF_HCCL_DATA_TAG;
    uint32_t planeID;
    uint32_t deviceID;
    uint32_t streamID;
    double ts;
    char name[16];
    union {
        MsprofHcclProfNotify notify;
        MsprofHcclProfReduce reduce;
        MsprofHcclProfStageStep stageStep;
        MsprofHcclProfMemcpy forMemcpy;
        MsprofHcclProfRDMA RDMA;
        MsprofHcclProfFlag flag;
    } args;
 };
 #pragma pack()

 /**
 * @name  MsprofStampInfo
 * @brief struct of data reported by msproftx
 */
 struct MsprofStampInfo {
    uint16_t magicNumber;
    uint16_t dataTag;
    uint32_t processId;
    uint32_t threadId;
    uint32_t category;         //marker category
    uint32_t  eventType;
    int32_t payloadType;
    union PayloadValue         //payload info for marker
    {
        uint64_t ullValue;
        int64_t llValue;
        double dValue;
        uint32_t uiValue[2];
        int32_t iValue[2];
        float fValue[2];
    } payload;
    uint64_t startTime;
    uint64_t endTime;
    int32_t messageType;
    char message[128];
    uint8_t reserve0[4];
    uint8_t reserve1[72];
 };

 #ifdef __cplusplus
 }
 #endif

 #endif  // MSPROFILER_PROF_COMMON_H_