From 3f3c41fd041d21803384c8d373bc5c851b016ccc Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Mon, 13 Apr 2020 19:56:30 +0800
Subject: [PATCH] Update GraphEngine to synchronize with latest Ascend driver
 software suite 13 Apr 2020

---
 inc/common/opskernel/ops_kernel_info_types.h  |   6 +-
 inc/external/ge/ge_api_types.h                |   2 +-
 inc/framework/common/debug/ge_log.h           |   2 -
 inc/framework/common/ge_inner_error_codes.h   |   1 -
 inc/framework/common/helper/om_file_helper.h  |   1 -
 inc/graph/debug/ge_attr_define.h              |   1 -
 inc/graph/model.h                             |   3 -
 src/common/graph/ge_attr_define.cc            |   2 +-
 src/common/graph/ge_attr_value.cc             |  10 +-
 src/common/graph/model_serialize.cc           |   4 +-
 src/common/graph/op_desc.cc                   |   1 -
 src/common/graph/operator.cc                  |   2 +-
 src/common/graph/opsproto/opsproto_manager.cc |   2 -
 src/common/graph/utils/op_desc_utils.cc       |  12 +-
 src/common/graph/utils/tensor_utils.cc        |  24 +-
 src/ge/common/op/attr_define.cc               |   2 +-
 .../graph/build/memory/block_mem_assigner.cc  |  31 +-
 .../graph/build/memory/graph_mem_assigner.cc  |  12 +-
 src/ge/graph/build/task_generator.cc          |  44 ++-
 src/ge/graph/build/task_generator.h           |   2 +
 src/ge/graph/load/graph_loader.cc             |  13 +-
 src/ge/graph/load/graph_loader.h              |   2 +
 .../load/new_model_manager/model_manager.cc   |  76 ++++-
 .../load/new_model_manager/model_manager.h    |   8 +
 .../task_info/kernel_ex_task_info.cc          |  10 +-
 src/ge/graph/load/output/output.cc            |   7 +-
 src/ge/graph/manager/graph_manager.cc         |  72 +++--
 src/ge/graph/optimize/graph_optimize.h        |   6 +-
 src/ge/graph/passes/assert_pass.h             |   6 +-
 src/ge/graph/passes/atomic_addr_clean_pass.cc |   4 +-
 src/ge/graph/passes/compile_nodes_pass.cc     |   3 +
 src/ge/graph/passes/net_output_pass.cc        |  12 +-
 src/ge/graph/passes/variable_op_pass.cc       |   8 +-
 .../graph/passes/variable_prepare_op_pass.cc  |   6 +-
 src/ge/graph/preprocess/graph_preprocess.h    |   2 -
 src/ge/model/ge_model.h                       |   8 +-
 src/ge/session/inner_session.h                |   3 +-
 tests/ut/common/graph/CMakeLists.txt          |   3 -
 tests/ut/ge/CMakeLists.txt                    |   3 -
 .../fwkacllib/inc/cce/fwk_adpt_struct.h       |   7 +-
 third_party/fwkacllib/inc/hccl/base.h         |  10 -
 third_party/fwkacllib/inc/mmpa/mmpa_api.h     |   4 +-
 third_party/fwkacllib/inc/ops/all_ops.h       |   3 +-
 .../fwkacllib/inc/ops/atomic_addr_clean_ops.h |  28 --
 .../inc/ops/elewise_calculation_ops.h         |  52 +---
 third_party/fwkacllib/inc/ops/math_ops.h      |  77 +++++
 .../inc/ops/matrix_calculation_ops.h          | 108 -------
 .../fwkacllib/inc/ops/nn_calculation_ops.h    |  26 ++
 third_party/fwkacllib/inc/ops/nn_detect_ops.h | 100 +++++++
 third_party/fwkacllib/inc/ops/nn_norm_ops.h   | 250 ++++++++++++++++
 third_party/fwkacllib/inc/ops/nn_other_ops.h  | 268 ------------------
 .../fwkacllib/inc/ops/nn_training_ops.h       | 238 ++++++++++++++++
 .../fwkacllib/inc/ops/npu_loss_scale_ops.h    |  24 ++
 third_party/fwkacllib/inc/ops/reduce_ops.h    |  31 ++
 .../inc/ops/{basic_lstm_cell.h => rnn.h}      |   6 +-
 third_party/fwkacllib/inc/ops/rpn_ops.h       |  26 ++
 third_party/fwkacllib/inc/runtime/event.h     |  22 --
 third_party/fwkacllib/inc/runtime/kernel.h    |   2 -
 third_party/fwkacllib/inc/runtime/mem.h       |   2 -
 third_party/fwkacllib/inc/runtime/rt_model.h  |   3 +-
 third_party/fwkacllib/inc/tdt/data_common.h   |   1 +
 61 files changed, 1075 insertions(+), 629 deletions(-)
 delete mode 100644 third_party/fwkacllib/inc/ops/atomic_addr_clean_ops.h
 delete mode 100644 third_party/fwkacllib/inc/ops/nn_other_ops.h
 rename third_party/fwkacllib/inc/ops/{basic_lstm_cell.h => rnn.h} (98%)

diff --git a/inc/common/opskernel/ops_kernel_info_types.h b/inc/common/opskernel/ops_kernel_info_types.h
index 19a738de..7ebf463d 100644
--- a/inc/common/opskernel/ops_kernel_info_types.h
+++ b/inc/common/opskernel/ops_kernel_info_types.h
@@ -26,7 +26,6 @@
 using std::string;
 
 namespace ge {
-/*lint -e148*/
 struct RunContext {
   rtModel_t model;
   rtStream_t stream;
@@ -40,8 +39,6 @@ struct RunContext {
   std::vector<rtEvent_t> graphEventList;    // all events of graph, order by ge event id(0,1,...)
 };
 
-/*lint +e148*/
-
 struct Task {
   uint32_t id;
   uint16_t type;
@@ -50,8 +47,7 @@ struct Task {
 };
 
 struct OpInfo {
-  string engine;  // which engin
-  /*lint -e148*/
+  string engine;       // which engin
   string opKernelLib;  // which opsKernelStore
   int computeCost;     // compute cost
   bool flagPartial;    // whether to support is related to shape
diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h
index 7aad64b6..a47c5889 100644
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -98,7 +98,7 @@ const std::string OUTPUT_NODE_NAME = "ge.outputNodeName";
 // its value should be "0" or "1", default value is "0"
 const std::string COMPRESS_FLAG = "ge.compressFlag";
 
-const std::string ATUO_PRECISION_FLAG = "ge.exec.auto_mix_precision";
+const std::string PRECISION_MODE = "ge.exec.precision_mode";
 
 // Configure single op flag for FE
 // its value should be "0" or "1", default value is "0"
diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h
index bdc865de..1556fd07 100644
--- a/inc/framework/common/debug/ge_log.h
+++ b/inc/framework/common/debug/ge_log.h
@@ -44,8 +44,6 @@ inline bool IsLogEnable(int module_name, int log_level) noexcept {
   return false;
 }
 
-/*lint --emacro((773),GE_TIMESTAMP_START)*/
-/*lint -esym(773,GE_TIMESTAMP_START)*/
 #define GE_TIMESTAMP_START(stage) uint64_t startUsec_##stage = ge::GetCurrentTimestap()
 
 #define GE_TIMESTAMP_END(stage, stage_name)                                           \
diff --git a/inc/framework/common/ge_inner_error_codes.h b/inc/framework/common/ge_inner_error_codes.h
index c6bfc576..f01ede03 100644
--- a/inc/framework/common/ge_inner_error_codes.h
+++ b/inc/framework/common/ge_inner_error_codes.h
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-/*lint -e* */
 #ifndef INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_
 #define INC_FRAMEWORK_COMMON_GE_INNER_ERROR_CODES_H_
 
diff --git a/inc/framework/common/helper/om_file_helper.h b/inc/framework/common/helper/om_file_helper.h
index 1341243b..daabd118 100644
--- a/inc/framework/common/helper/om_file_helper.h
+++ b/inc/framework/common/helper/om_file_helper.h
@@ -88,5 +88,4 @@ class OmFileSaveHelper {
   OmFileContext context_;
 };
 }  // namespace ge
-/*lint +e148*/
 #endif  // INC_FRAMEWORK_COMMON_HELPER_OM_FILE_HELPER_H_
diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h
index e35c77f9..6476d07f 100644
--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -774,4 +774,3 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string DYNAMIC_
 }  // namespace ge
 
 #endif  // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_
-/*lint +e618*/
diff --git a/inc/graph/model.h b/inc/graph/model.h
index 8e33b119..02510d8f 100644
--- a/inc/graph/model.h
+++ b/inc/graph/model.h
@@ -31,8 +31,6 @@ using std::map;
 using std::string;
 using std::vector;
 
-/*lint -e148*/
-
 class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Model : public AttrHolder {
  public:
   Model();
@@ -91,7 +89,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Model : public AttrHolder {
   std::string platform_version_{""};
   Graph graph_;
 };
-/*lint +e148*/
 }  // namespace ge
 using ModelPtr = std::shared_ptr<ge::Model>;
 
diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc
index 67587632..55113191 100644
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -124,7 +124,7 @@ const std::string ATTR_NAME_BROACAST_REAL_DIM_CNT = "broacast_real_dim_cnt";
 const std::string ATTR_NAME_DIM_ALIGN = "dim_align";
 const std::string ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE = "original_type";
 
-const std::string ATTR_NAME_SESSION_GRAPH_ID = "session_graph_id";
+const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id";
 
 const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start";
 const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size";
diff --git a/src/common/graph/ge_attr_value.cc b/src/common/graph/ge_attr_value.cc
index 8be42429..8eb91606 100644
--- a/src/common/graph/ge_attr_value.cc
+++ b/src/common/graph/ge_attr_value.cc
@@ -34,7 +34,7 @@ namespace ge {
 GeAttrValue::NamedAttrs::NamedAttrs() { named_attrs_.InitDefault(); }
 
 GeAttrValue::NamedAttrs::NamedAttrs(const ProtoMsgOwner &owner, proto::NamedAttrs *proto_msg)
-    : named_attrs_(owner, proto_msg) {}  // lint !e1744
+    : named_attrs_(owner, proto_msg) {}
 
 void GeAttrValue::NamedAttrs::SetName(const std::string &name) {
   auto proto_msg = named_attrs_.GetProtoMsg();
@@ -239,7 +239,7 @@ ATTR_VALUE_SET_GET_IMP(GeAttrValue::STR)
 ATTR_VALUE_SET_GET_IMP(vector<GeAttrValue::STR>)
 ATTR_VALUE_SET_GET_IMP(GeAttrValue::INT)
 ATTR_VALUE_SET_GET_IMP(vector<GeAttrValue::INT>)
-ATTR_VALUE_SET_GET_IMP(GeAttrValue::FLOAT)  // lint !e524
+ATTR_VALUE_SET_GET_IMP(GeAttrValue::FLOAT)
 ATTR_VALUE_SET_GET_IMP(vector<GeAttrValue::FLOAT>)
 ATTR_VALUE_SET_GET_IMP(GeAttrValue::BOOL)
 ATTR_VALUE_SET_GET_IMP(vector<GeAttrValue::BOOL>)
@@ -253,11 +253,9 @@ ATTR_VALUE_SET_GET_IMP(GeAttrValue::BYTES)
 ATTR_VALUE_SET_GET_IMP(vector<GeAttrValue::BYTES>)
 ATTR_VALUE_SET_GET_IMP(GeAttrValue::NAMED_ATTRS)
 ATTR_VALUE_SET_GET_IMP(vector<GeAttrValue::NAMED_ATTRS>)
-/*lint -e665*/
 ATTR_VALUE_SET_GET_IMP(vector<vector<int64_t>>)
-/*lint +e665*/
-ATTR_VALUE_SET_GET_IMP(vector<DataType>)        // lint !e665
-ATTR_VALUE_SET_GET_IMP(GeAttrValue::DATA_TYPE)  // lint !e665
+ATTR_VALUE_SET_GET_IMP(vector<DataType>)
+ATTR_VALUE_SET_GET_IMP(GeAttrValue::DATA_TYPE)
 
 #undef ATTR_VALUE_SET_GET_IMP
 
diff --git a/src/common/graph/model_serialize.cc b/src/common/graph/model_serialize.cc
index ebb61589..f92ebf1e 100644
--- a/src/common/graph/model_serialize.cc
+++ b/src/common/graph/model_serialize.cc
@@ -265,13 +265,13 @@ bool ModelSerializeImp::HandleNodeNameRef() {
                item.dst_node_name.c_str(), item.dst_in_index);
         return false;
       }
-      GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed.");  // lint !e737
+      GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed.");
     } else {
       // Control edge
       auto src_anchor = src_node_it->second->GetOutControlAnchor();
       auto dst_anchor = item.dst_node->GetInControlAnchor();
       if (src_anchor != nullptr && dst_anchor != nullptr) {
-        GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed.");  // lint !e737
+        GE_CHK_BOOL_ONLY_LOG((src_anchor->LinkTo(dst_anchor) == GRAPH_SUCCESS), " linkTo failed.");
       }
     }
   }
diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc
index 22120a37..e6184ed3 100644
--- a/src/common/graph/op_desc.cc
+++ b/src/common/graph/op_desc.cc
@@ -32,7 +32,6 @@ using std::shared_ptr;
 using std::string;
 using std::vector;
 
-/*lint -save -e521 -e681 -e732 -e737*/
 namespace ge {
 const std::string ATTR_NAME_ID = "id";
 
diff --git a/src/common/graph/operator.cc b/src/common/graph/operator.cc
index 1d8db14e..b40938a8 100644
--- a/src/common/graph/operator.cc
+++ b/src/common/graph/operator.cc
@@ -421,7 +421,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Operator OpDescUtils::CreateOpera
     return Operator("default");
   }
   OperatorKeeper::GetInstance().CheckInOperator(operator_impl_ptr);
-  return operator_impl_ptr->ToOperator();  // lint !e514
+  return operator_impl_ptr->ToOperator();
 }
 
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OpDescPtr OpDescUtils::GetOpDescFromOperator(const Operator &oprt) {
diff --git a/src/common/graph/opsproto/opsproto_manager.cc b/src/common/graph/opsproto/opsproto_manager.cc
index 2c9bf7d1..a5bdb4c5 100644
--- a/src/common/graph/opsproto/opsproto_manager.cc
+++ b/src/common/graph/opsproto/opsproto_manager.cc
@@ -33,9 +33,7 @@ OpsProtoManager *OpsProtoManager::Instance() {
 }
 
 bool OpsProtoManager::Initialize(const std::map<std::string, std::string> &options) {
-  /*lint -e1561*/
   auto proto_iter = options.find("ge.opsProtoLibPath");
-  /*lint +e1561*/
   if (proto_iter == options.end()) {
     GELOGW("ge.opsProtoLibPath option not set, return.");
     return false;
diff --git a/src/common/graph/utils/op_desc_utils.cc b/src/common/graph/utils/op_desc_utils.cc
index 57f7cdee..f2214350 100644
--- a/src/common/graph/utils/op_desc_utils.cc
+++ b/src/common/graph/utils/op_desc_utils.cc
@@ -30,7 +30,6 @@
 
 using std::vector;
 
-/*lint -e512 -e737 -e752*/
 namespace ge {
 const char OP_DESC_QUANT_PARAMS[] = "quantize_factor";
 static const int CONST_OP_NORMAL_WEIGHT_SIZE = 1;
@@ -135,11 +134,11 @@ graphStatus OpDescUtils::GetQuantizeFactorParams(const OpDesc &op_desc, Quantize
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus
 OpDescUtils::SetQuantizeFactorParams(const OpDescPtr &op_desc, const QuantizeFactorParams &quant) {
   GE_CHK_BOOL_EXEC_INFO(op_desc != nullptr, return GRAPH_FAILED, "op_desc is nullptr");
-  return op_desc->SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom<QuantizeFactorParams>(quant));  // lint !e732
+  return op_desc->SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom<QuantizeFactorParams>(quant));
 }
 
 graphStatus OpDescUtils::SetQuantizeFactorParams(OpDesc &op_desc, const QuantizeFactorParams &quant) {
-  return op_desc.SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom<QuantizeFactorParams>(quant));  // lint !e732
+  return op_desc.SetAttr(OP_DESC_QUANT_PARAMS, GeAttrValue::CreateFrom<QuantizeFactorParams>(quant));
 }
 
 GeTensorPtr OpDescUtils::MutableWeights(OpDesc &op_desc) {
@@ -164,7 +163,7 @@ graphStatus OpDescUtils::SetWeights(OpDesc &op_desc, const GeTensorPtr weight) {
     GELOGE(GRAPH_FAILED, "weight is null");
     return GRAPH_FAILED;
   }
-  return AttrUtils::SetTensor(&op_desc, ATTR_NAME_WEIGHTS, weight) ? GRAPH_SUCCESS : GRAPH_FAILED;  // lint !e737
+  return AttrUtils::SetTensor(&op_desc, ATTR_NAME_WEIGHTS, weight) ? GRAPH_SUCCESS : GRAPH_FAILED;
 }
 
 graphStatus OpDescUtils::SetWeights(OpDescPtr op_desc, const GeTensorPtr weight) {
@@ -230,7 +229,7 @@ size_t OpDescUtils::GetNonConstInputsSize(const ge::Node &node) {
         continue;
       }
     }
-    return input_num;  // lint !e712
+    return input_num;
   } else {
     GE_IF_BOOL_EXEC(
       node.GetInDataNodes().size() < GetConstInputs(node).size(),
@@ -335,7 +334,7 @@ bool OpDescUtils::IsNonConstInput(const ge::Node &node, const size_t index) {
   bool ret = false;
   if (index < node.GetAllInDataAnchors().size()) {
     if (NodeUtils::IsAnchorStatusSet(node)) {
-      ret = (ge::AnchorUtils::GetStatus(node.GetInDataAnchor(static_cast<int>(index))) == ANCHOR_DATA);  // lint !e712
+      ret = (ge::AnchorUtils::GetStatus(node.GetInDataAnchor(static_cast<int>(index))) == ANCHOR_DATA);
     } else {
       for (const auto &anchor : node.GetAllInDataAnchors()) {
         if (anchor->GetIdx() != static_cast<int>(index)) {
@@ -574,4 +573,3 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDescUtils::ClearWei
   return GRAPH_SUCCESS;
 }
 }  // namespace ge
-/*lint +e512 +e737 +e752*/
diff --git a/src/common/graph/utils/tensor_utils.cc b/src/common/graph/utils/tensor_utils.cc
index 390fed46..819f5d58 100644
--- a/src/common/graph/utils/tensor_utils.cc
+++ b/src/common/graph/utils/tensor_utils.cc
@@ -286,10 +286,10 @@ static graphStatus CalcTensorElementCnt(const std::vector<int64_t> &dims, Format
 
   const string type_str = TypeUtils::DataTypeToSerialString(data_type);
   if (graph_status == GRAPH_SUCCESS) {
-    GELOGI(
-        "CalcTensorElementCnt end, format=%d(%s),"
-        " data_type=%d(%s), element_cnt=%ld.",
-        format, format_str.c_str(), data_type, type_str.c_str(), element_cnt);
+    GELOGD(
+      "CalcTensorElementCnt end, format=%d(%s),"
+      " data_type=%d(%s), element_cnt=%ld.",
+      format, format_str.c_str(), data_type, type_str.c_str(), element_cnt);
   } else {
     GELOGE(GRAPH_FAILED, "CalcTensorElementCnt failed, format=%d(%s), data_type=%d(%s).", format, format_str.c_str(),
            data_type, type_str.c_str());
@@ -329,10 +329,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus TensorUtils::CalcTens
   // Support unknown shape
   if (element_cnt < 0) {
     mem_size = kMemSizeUnknownShape;
-    GELOGI(
-        "element_cnt is unknown. "
-        "format=%d(%s), data_type=%d(%s), mem_size=%ld",
-        format, format_str.c_str(), data_type, type_str.c_str(), mem_size);
+    GELOGD(
+      "element_cnt is unknown. "
+      "format=%d(%s), data_type=%d(%s), mem_size=%ld",
+      format, format_str.c_str(), data_type, type_str.c_str(), mem_size);
     return GRAPH_SUCCESS;
   }
   auto type_size_int64 = static_cast<int64_t>(type_size);
@@ -343,10 +343,10 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus TensorUtils::CalcTens
   }
   mem_size = element_cnt * type_size_int64;
 
-  GELOGI(
-      "CalcTensorMemSize end, "
-      "format=%d(%s), data_type=%d(%s), mem_size=%ld",
-      format, format_str.c_str(), data_type, type_str.c_str(), mem_size);
+  GELOGD(
+    "CalcTensorMemSize end, "
+    "format=%d(%s), data_type=%d(%s), mem_size=%ld",
+    format, format_str.c_str(), data_type, type_str.c_str(), mem_size);
   return GRAPH_SUCCESS;
 }
 
diff --git a/src/ge/common/op/attr_define.cc b/src/ge/common/op/attr_define.cc
index bd9731ac..a2f703ed 100644
--- a/src/ge/common/op/attr_define.cc
+++ b/src/ge/common/op/attr_define.cc
@@ -108,7 +108,7 @@ const std::string ATTR_NAME_NAN_OPT = "nan_opt";
 const std::string ATTR_NAME_AIPP = "aipp";
 const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp";
 
-const std::string ATTR_NAME_SESSION_GRAPH_ID = "session_graph_id";
+const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id";
 
 const std::string ATTR_NAME_MULTISHAPE_BATCHLIST = "multi_shape_batchlist";
 const std::string ATTR_NAME_MULTISHAPE_BATCHLIST_SIZE = "multi_shape_batchlist_size";
diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc
index 42e03839..70b36ad1 100644
--- a/src/ge/graph/build/memory/block_mem_assigner.cc
+++ b/src/ge/graph/build/memory/block_mem_assigner.cc
@@ -402,6 +402,31 @@ bool IsOutputBlock(const ge::InDataAnchorPtr &in_data_anchor) {
   return false;
 }
 
+// current node's output uses previous node's output memory
+bool IsReferencePreviousNodeOutputMemory(const ge::NodePtr &node, uint32_t output_index) {
+  // Get the reference type of the node, default is false
+  bool is_ref = false;
+  // If GetBool fail, is_ref is false.
+  auto op_desc = node->GetOpDesc();
+  if (op_desc == nullptr) {
+    return false;
+  }
+  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref);
+  if (!is_ref) {
+    return false;
+  }
+  const string &output_name = op_desc->GetOutputNameByIndex(output_index);
+  for (const auto &input_name : op_desc->GetAllInputNames()) {
+    if (!input_name.empty() && output_name == input_name) {
+      int input_index = op_desc->GetInputIndexByName(input_name);
+      GELOGI("Reference memory:name[%s] output[%s][%u] ref to input[%s][%d] ", op_desc->GetName().c_str(),
+             output_name.c_str(), output_index, input_name.c_str(), input_index);
+      return true;
+    }
+  }
+  return false;
+}
+
 void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) {
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null.");
   GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory");
@@ -489,7 +514,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
       if (output_op_desc != nullptr) {
         GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
       }
-      if ((size == 0) || CheckIsZeroMemNodeType(n->GetType())) {
+      if ((size == 0) || CheckIsZeroMemNodeType(n->GetType()) || IsReferencePreviousNodeOutputMemory(n, i)) {
         zero_memory_list_.emplace_back(n, kOutput, i);
         continue;
       }
@@ -607,11 +632,11 @@ void BlockMemAssigner::MergeDynamicBatchBlocks() {
     std::sort(it->second.begin(), it->second.end(), CompareBlockMaxSize);
   }
   if (it_max != dynamic_batch_blocks.end()) {
-    GELOGI("MergeDynamicBatch %s block counts %zu", it_max->first.c_str(), it_max->second.size());
+    GELOGD("MergeDynamicBatch %s block counts %zu", it_max->first.c_str(), it_max->second.size());
   }
   for (it = dynamic_batch_blocks.begin(); it != dynamic_batch_blocks.end(); ++it) {
     if (it != it_max) {
-      GELOGI("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str());
+      GELOGD("MergeDynamicBatch from %s to %s", it->first.c_str(), it_max->first.c_str());
       MergeBlocks(it_max->second, it->second);
     }
   }
diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc
index 815c8e39..f3f8494d 100644
--- a/src/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/src/ge/graph/build/memory/graph_mem_assigner.cc
@@ -296,7 +296,7 @@ Status GraphMemoryAssigner::ReAssignVirtualConcatMemory() {
       }
       output_list.at(0) = memory_offset_[0].mem_offset_;
       n->GetOpDesc()->SetOutputOffset(output_list);
-      GELOGI("Set Concat %s output offset to %zu.", n->GetOpDesc()->GetName().c_str(), memory_offset_[0].mem_offset_);
+      GELOGD("Set Concat %s output offset to %zu.", n->GetOpDesc()->GetName().c_str(), memory_offset_[0].mem_offset_);
 
       size_t extra_memory_size = 0;
       for (const auto &in_data_anchor : n->GetAllInDataAnchors()) {
@@ -401,7 +401,7 @@ Status GraphMemoryAssigner::ReAssignMergeMemory() {
         data_output_offset = output_list[index];
         max_output_size = tmp_output_size;
       }
-      GELOGI("merge=%s, input=%s, size=%ld, offset=%ld, max_size=%ld", n->GetName().c_str(),
+      GELOGD("merge=%s, input=%s, size=%ld, offset=%ld, max_size=%ld", n->GetName().c_str(),
              src_node->GetName().c_str(), tmp_output_size, data_output_offset, max_output_size);
     }
 
@@ -541,7 +541,7 @@ Status GraphMemoryAssigner::AssignReferenceMemory(const ge::NodePtr &node) {
       GE_CHECK_NOTNULL(peer_out_op_desc);
       output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
     } else {
-      GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
+      GELOGD("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
              node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
              output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
     }
@@ -576,7 +576,7 @@ bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
 Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) {
   auto op_desc = node->GetOpDesc();
   GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
-  GELOGI("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
+  GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
 
   vector<int64_t> atomic_output_index;
   // If GetListInt fail, atomic_output_index is empty.
@@ -620,7 +620,7 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node) {
 
     // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
     if (is_assigned_mem) {
-      GELOGI(
+      GELOGD(
         "[IMAS]Atomic output : we have assigned atomic memory as the input of next node in "
         "ReAssignContinuousMemory function.");
       continue;
@@ -822,7 +822,7 @@ Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int6
         continue;
       }
 
-      GELOGI("SetLoopGraphAtomicAttr,  node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(),
+      GELOGD("SetLoopGraphAtomicAttr,  node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(),
              peer_out_node_desc->GetType().c_str());
 
       if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
diff --git a/src/ge/graph/build/task_generator.cc b/src/ge/graph/build/task_generator.cc
index 2b9e30af..a192c0d2 100644
--- a/src/ge/graph/build/task_generator.cc
+++ b/src/ge/graph/build/task_generator.cc
@@ -398,23 +398,26 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi
     if (op_kernel_lib_name.empty()) {
       continue;
     }
-    if (op_desc->GetName() == bp_point_str) {
-      last_bp = current_idx;
-      GELOGI("Last bp name %s, idx %u", op_desc->GetName().c_str(), last_bp);
-    }
+
     if (op_desc->GetType() == NETOUTPUT) {
       iter_end = current_idx;
       GELOGI("Iter end name %s, idx %u", op_desc->GetName().c_str(), iter_end);
     }
-    if (op_desc->GetName() == fp_point_str) {
-      first_fp = current_idx;
-      GELOGI("First fp name %s, idx %u", op_desc->GetName().c_str(), first_fp);
-    }
 
     if (op_desc->GetType() == HCOMALLREDUCE) {
       ar_ppoint.emplace_back(current_idx);
       GELOGI("Allreduce name %s, idx %u", op_desc->GetName().c_str(), current_idx);
     }
+
+    if (first_fp == 0 && IsProfPoint(op_desc, fp_point_str)) {
+      first_fp = current_idx;
+      GELOGI("First fp name %s, idx %u", op_desc->GetName().c_str(), first_fp);
+    }
+
+    if (IsProfPoint(op_desc, bp_point_str)) {
+      last_bp = current_idx;
+      GELOGI("Last bp name %s, idx %u", op_desc->GetName().c_str(), last_bp);
+    }
   }
   ppoint.fp_index = first_fp;
   ppoint.bp_index = last_bp;
@@ -526,4 +529,29 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P
   }
   return SUCCESS;
 }
+
+bool TaskGenerator::IsProfPoint(const OpDescPtr &op, const std::string &name) {
+  if (op == nullptr) {
+    return false;
+  }
+
+  if (op->GetName() == name) {
+    return true;
+  }
+
+  std::vector<std::string> original_op_names;
+  bool ret = AttrUtils::GetListStr(op, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names);
+  if (!ret) {
+    return false;
+  }
+
+  for (auto &origin_name : original_op_names) {
+    if (origin_name == name) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 }  // namespace ge
diff --git a/src/ge/graph/build/task_generator.h b/src/ge/graph/build/task_generator.h
index ed89df47..ae7f6885 100644
--- a/src/ge/graph/build/task_generator.h
+++ b/src/ge/graph/build/task_generator.h
@@ -99,6 +99,8 @@ class TaskGenerator {
                                   std::vector<uint32_t> &ar_ppoint, uint32_t node_index,
                                   std::vector<domi::TaskDef> &task_def_list);
 
+  static bool IsProfPoint(const OpDescPtr &op, const std::string &name);
+
   uint8_t *var_mem_base_ = nullptr;
   uint64_t var_mem_size_ = 0;
 };
diff --git a/src/ge/graph/load/graph_loader.cc b/src/ge/graph/load/graph_loader.cc
index 2cf03022..c58cdcb9 100644
--- a/src/ge/graph/load/graph_loader.cc
+++ b/src/ge/graph/load/graph_loader.cc
@@ -336,7 +336,7 @@ Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model
     auto model_manager = ModelManager::GetInstance();
     GE_CHECK_NOTNULL(model_manager);
     Status ret =
-        model_manager->LoadModelOffline(model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
+      model_manager->LoadModelOffline(model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
     if (ret != SUCCESS) {
       GELOGE(ret, "Load model failed, model_id:%u.", model_id);
       return ret;
@@ -428,4 +428,15 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) {
   GELOGI("GetMemoryInfo free[%zu], total[%zu], return free[%ld]", free_mem, total_mem, free);
   return SUCCESS;
 }
+
+Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
+  auto model_manager = ModelManager::GetInstance();
+  GE_CHECK_NOTNULL(model_manager);
+  Status ret = model_manager->DestroyAicpuKernel(session_id, model_id);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "Destroy aicpu kernel failed.");
+    return ret;
+  }
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/src/ge/graph/load/graph_loader.h b/src/ge/graph/load/graph_loader.h
index d0620ce7..946e39ec 100644
--- a/src/ge/graph/load/graph_loader.h
+++ b/src/ge/graph/load/graph_loader.h
@@ -73,6 +73,8 @@ class GraphLoader {
   static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
                              OutputData &output_data);
 
+  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
+
  private:
   static Status LoadModelOnline(uint32_t &model_id, std::shared_ptr<ge::Model> &model,
                                 const std::shared_ptr<ModelListener> &listener);
diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc
index 0c4fe294..70828916 100644
--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -18,7 +18,6 @@
 
 #include <string>
 
-#include "cce/aicpu_engine_struct.h"
 #include "common/l2_cache_optimize.h"
 #include "common/profiling/profiling_manager.h"
 #include "common/properties_manager.h"
@@ -41,17 +40,43 @@ std::shared_ptr<ModelManager> ModelManager::GetInstance() {
 
 ModelManager::ModelManager() { max_model_id_ = 0; }
 
-static Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType opType, uint64_t session_id) {
+Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) {
   STR_FWK_OP_KERNEL param_base = {};
   void *devicebase = nullptr;
+  void *aicpu_kernel_addr = nullptr;
   const uint32_t kKernelType = 0;
   param_base.fwkKernelType = kKernelType;
-  param_base.fwkKernelBase.fwk_kernel.opType = opType;
+  param_base.fwkKernelBase.fwk_kernel.opType = op_type;
   param_base.fwkKernelBase.fwk_kernel.sessionID = session_id;
+  if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
+    std::vector<uint64_t> v_aicpu_kernel;
+    std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
+    auto iter = model_aicpu_kernel_.find(model_key);
+    if (iter != model_aicpu_kernel_.end()) {
+      GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
+      v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
+      // Insert size of aicpu kernel vector in the first element
+      v_aicpu_kernel.insert(v_aicpu_kernel.begin(), v_aicpu_kernel.size());
+
+      auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size());
+      rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM);
+      GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
+                      return RT_FAILED;)
+
+      rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE);
+      GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
+                      GE_CHK_RT(rtFree(aicpu_kernel_addr)); return FAILED;)
+      uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr));
+      param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr;
+      // Remove model key from map
+      model_aicpu_kernel_.erase(iter);
+    }
+  }
 
   rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(rt_ret, "malloc device memory failed.");
+    GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
     return FAILED;
   }
 
@@ -59,6 +84,7 @@ static Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType opType, uint64_t
     rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), &param_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(rt_ret, "memory copy to device failed.");
+    GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
     GE_CHK_RT(rtFree(devicebase));
     return FAILED;
   }
@@ -67,6 +93,7 @@ static Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType opType, uint64_t
   rt_ret = rtStreamCreate(&stream, 0);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(rt_ret, "create stream failed.");
+    GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
     GE_CHK_RT(rtFree(devicebase));
     return FAILED;
   }
@@ -74,6 +101,7 @@ static Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType opType, uint64_t
   rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(rt_ret, "rtKernelLaunchEx failed.");
+    GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
     GE_CHK_RT(rtFree(devicebase));
     GE_CHK_RT(rtStreamDestroy(stream));
     return FAILED;
@@ -81,11 +109,20 @@ static Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType opType, uint64_t
   rt_ret = rtStreamSynchronize(stream);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(rt_ret, "rtStreamSynchronize failed.");
+    GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
     GE_CHK_RT(rtFree(devicebase));
     GE_CHK_RT(rtStreamDestroy(stream));
     return FAILED;
   }
-
+  if (aicpu_kernel_addr != nullptr) {
+    rt_ret = rtFree(aicpu_kernel_addr);
+    if (rt_ret != RT_ERROR_NONE) {
+      GELOGE(rt_ret, "free memory failed.");
+      GE_CHK_RT(rtFree(devicebase));
+      GE_CHK_RT(rtStreamDestroy(stream));
+      return FAILED;
+    }
+  }
   rt_ret = rtFree(devicebase);
   if (rt_ret != RT_ERROR_NONE) {
     GELOGE(rt_ret, "free memory failed.");
@@ -107,7 +144,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
     GELOGI("The session: %lu not created.", session_id);
     return;
   } else {
-    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id);
+    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0);
     if (ret != SUCCESS) {
       GELOGW("The session: %lu destroy failed.", session_id);
     } else {
@@ -117,9 +154,36 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
   }
 }
 
+ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
+  GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
+  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
+  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
+  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
+    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
+    if (ret != SUCCESS) {
+      GELOGE(FAILED, "Destroy aicpu kernel failed.");
+      return FAILED;
+    }
+  }
+  return SUCCESS;
+}
+
+ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
+  std::vector<uint64_t> v_aicpu_kernel;
+  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
+  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
+  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
+    v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
+  }
+  v_aicpu_kernel.push_back(kernel_id);
+  model_aicpu_kernel_[model_key] = v_aicpu_kernel;
+  return SUCCESS;
+}
+
 ModelManager::~ModelManager() {
   std::lock_guard<std::mutex> lock(map_mutex_);
   model_map_.clear();
+  model_aicpu_kernel_.clear();
 
   GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0)));
 }
@@ -687,7 +751,7 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
   auto it = sess_ids_.find(session_id);
   // never been created by any model
   if (it == sess_ids_.end()) {
-    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id);
+    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0);
     if (ret == SUCCESS) {
       (void)sess_ids_.insert(session_id);
       GELOGI("The session: %lu create success.", session_id);
diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h
index 08607926..c2d98d10 100644
--- a/src/ge/graph/load/new_model_manager/model_manager.h
+++ b/src/ge/graph/load/new_model_manager/model_manager.h
@@ -24,6 +24,7 @@
 #include <memory>
 #include <set>
 #include <vector>
+#include "cce/aicpu_engine_struct.h"
 #include "common/types.h"
 #include "common/ge_types.h"
 #include "common/ge_inner_error_codes.h"
@@ -199,12 +200,18 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   ///
   std::shared_ptr<DavinciModel> GetModel(uint32_t id);
 
+  ge::Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id);
+
   ge::Status CreateAicpuSession(uint64_t session_id);
 
   static ge::Status GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size);
 
   void DestroyAicpuSession(uint64_t session_id);
 
+  ge::Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
+
+  ge::Status CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id);
+
  private:
   ///
   /// @ingroup domi_ome
@@ -233,6 +240,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   void GenModelId(uint32_t *id);
 
   std::map<uint32_t, std::shared_ptr<DavinciModel>> model_map_;
+  std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_;
   std::vector<uint32_t> free_model_id_;
   uint32_t max_model_id_;
   std::mutex map_mutex_;
diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
index 32a9da8b..cfee3610 100644
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
@@ -120,9 +120,13 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
   GELOGI("session_id: %lu", session_id);
   GE_CHECK_NOTNULL(ModelManager::GetInstance());
   GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS,
-                  GELOGE(ret, "CreateAicpuSession error.");
-                  return ret;)
-
+                  GELOGE(FAILED, "CreateAicpuSession error.");
+                  return FAILED;)
+  // 4.1 Collect aicpu kernel
+  uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID;
+  GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS,
+                  GELOGE(FAILED, "CreateAicpuKernel error.");
+                  return FAILED;)
   // 5. Return result
   rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMalloc error: 0x%X", rt_ret); return FAILED;)
diff --git a/src/ge/graph/load/output/output.cc b/src/ge/graph/load/output/output.cc
index bbc2bf4e..8351715c 100644
--- a/src/ge/graph/load/output/output.cc
+++ b/src/ge/graph/load/output/output.cc
@@ -84,8 +84,6 @@ Status Output::Init() {
     }
   }
 
-  GELOGI("Init output:%lu, %lu, %lu", input_num_, v_input_size_.size(), v_input_data_addr_.size());
-
   return SUCCESS;
 }
 
@@ -125,11 +123,10 @@ Status Output::SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i,
   }
 
   if (data_buf.isDataSupportMemShare && support_mem_share) {
-    GELOGI("No need to copy input data, user's output data buffer can be shared.");
+    GELOGD("No need to copy input data, user's output data buffer can be shared.");
   } else {
     // Copy result to Databuf
     uint32_t size = v_input_size_[i];
-    GELOGI("Tensor data size before: %u", size);
 
     graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(*tensor_desc, size);
     if (graph_status != ge::GRAPH_SUCCESS) {
@@ -142,7 +139,7 @@ Status Output::SetDataBuf(DataBuffer &data_buf, uint32_t &data_count, size_t i,
       GELOGE(rt_ret, "rtmemcpy error");
       return FAILED;
     }
-    GELOGI("Tensor data size: %u data_buflength: %u", size, data_buf.length);
+    GELOGD("Tensor data size: %u data_buflength: %u", size, data_buf.length);
   }
 
   ++data_count;
diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc
index ff238fb3..9796b2ac 100644
--- a/src/ge/graph/manager/graph_manager.cc
+++ b/src/ge/graph/manager/graph_manager.cc
@@ -34,6 +34,7 @@
 #include "framework/common/ge_inner_error_codes.h"
 #include "framework/common/ge_types.h"
 #include "graph/common/transop_util.h"
+#include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
 #include "graph/ge_global_options.h"
 #include "graph/ge_local_context.h"
@@ -1078,28 +1079,60 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const std::vector
   GELOGI("[GraphManager] CheckpointHandle, outputsSize=%zu.", outputs.size());
   std::vector<InputOutputDescInfo> outputs_desc = graph_executor_.GetOutputsDesc();
   GELOGI("[GraphManager] CheckpointHandle, outputsDescSize=%zu.", outputs_desc.size());
+  // find graph
+  GraphNodePtr graph_node = nullptr;
+  Status ret = GetGraphNode(graph_id, graph_node);
+  if (ret != SUCCESS) {
+    GELOGE(ret, "[CheckpointHandle] graph not exist, graph_id = %u.", graph_id);
+    return ret;
+  }
+  ComputeGraphPtr compute_graph_ptr = GraphUtils::GetComputeGraph(*(graph_node->GetGraph()));
   std::map<string, Tensor> save_results;
-  for (size_t i = 0; i < outputs_desc.size(); ++i) {
-    std::string desc_name = outputs_desc.at(i).name;
-    auto index = desc_name.find_last_of("_");
-    if (index != std::string::npos) {
-      desc_name = desc_name.substr(0, index);
-      index = desc_name.find_first_of("_");
-      if (index != std::string::npos) {
-        desc_name = desc_name.substr(index + 1);
-        index = desc_name.find_first_of("_");
-        if (index != std::string::npos) {
-          desc_name = desc_name.substr(index + 1);
+  NodePtr netoutput = nullptr;
+  for (const auto &node : compute_graph_ptr->GetDirectNode()) {
+    if (node->GetType() == kNetOutput) {
+      netoutput = node;
+      break;
+    }
+  }
+  if (netoutput == nullptr) {
+    GELOGE(FAILED, "Netoutput is null.");
+    return FAILED;
+  }
+  for (const auto &in : netoutput->GetAllInDataAnchors()) {
+    std::string desc_name;
+    auto out_anchor = in->GetPeerOutAnchor();
+    if (out_anchor == nullptr) {
+      GELOGE(FAILED, "out_anchor is null.");
+      return FAILED;
+    }
+    ge::NodePtr peer_node = out_anchor->GetOwnerNode();
+    // find the variable node in graph
+    while (peer_node != nullptr && peer_node->GetType() != kVariable) {
+      if (peer_node->GetAllInDataAnchors().size() != 1) {
+        GELOGE(FAILED, "More than one prior nodes of peer_node %s in checkpoint Graph.", peer_node->GetName().c_str());
+        return FAILED;
+      }
+      auto peer_node_in = peer_node->GetAllInDataAnchors().at(0);
+      auto peer_node_out_anchor = peer_node_in->GetPeerOutAnchor();
+      if (peer_node_out_anchor != nullptr) {
+        peer_node = peer_node_out_anchor->GetOwnerNode();
+        if (peer_node->GetType() == kVariable) {
+          break;
         }
       }
     }
-    index = desc_name.find("_trans");
-    if (index != std::string::npos) {
-      desc_name = desc_name.substr(0, index);
+    if (peer_node == nullptr) {
+      GELOGE(FAILED, "No variable op found in one branch, checkpoint graph illegal.");
+      return FAILED;
     }
-
+    desc_name = peer_node->GetName();
     GELOGI("[GraphManager] CheckpointHandle, descName=%s.", desc_name.c_str());
-    save_results.emplace(desc_name, TensorAdapter::AsTensor(outputs.at(i)));
+    if (in->GetIdx() >= static_cast<int>(outputs.size())) {
+      GELOGE(FAILED, "variable index out of range.");
+      return FAILED;
+    }
+    save_results.emplace(desc_name, TensorAdapter::AsTensor(outputs.at(in->GetIdx())));
   }
 
   if (!save_results.empty()) {
@@ -1447,6 +1480,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
   int64_t memory_size = ret ? value : 0;
   ret = ge::AttrUtils::GetInt(ge_model, ATTR_MODEL_WEIGHT_SIZE, value);
   int64_t weight_size = ret ? value : 0;
+  ret = ge::AttrUtils::GetInt(ge_model, MODEL_ATTR_SESSION_ID, value);
+  uint64_t session_id = ret ? value : 0;
 
   int64_t free_memory = 0;
   Status result = GraphLoader::GetMemoryInfo(free_memory);
@@ -1494,6 +1529,11 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
     if (result != SUCCESS) {
       GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id);
     }
+    result = GraphLoader::DestroyAicpuKernel(session_id, model_id);
+    if (result != SUCCESS) {
+      GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
+             graph_id);
+    }
     rt_ret = rtDeviceReset(GetContext().DeviceId());
     if (rt_ret != RT_ERROR_NONE) {
       GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", model_id, graph_id);
diff --git a/src/ge/graph/optimize/graph_optimize.h b/src/ge/graph/optimize/graph_optimize.h
index 26b65b46..ceb50e3f 100644
--- a/src/ge/graph/optimize/graph_optimize.h
+++ b/src/ge/graph/optimize/graph_optimize.h
@@ -33,7 +33,6 @@
 #include "graph/manager/graph_manager_utils.h"
 #include "omg/omg_inner_types.h"
 
-/*lint -e148*/
 namespace ge {
 using ComputeGraphPtr = std::shared_ptr<ge::ComputeGraph>;
 using GraphOptimizerPtr = std::shared_ptr<ge::GraphOptimizer>;
@@ -56,7 +55,7 @@ class GraphOptimize {
 
   const std::map<uint32_t, std::map<string, size_t>> &GetSummaryOutputIndexes() const {
     return summary_output_indexes_;
-  }  // lint !e1073
+  }
 
   void ClearSummaryOutputIndexes() { summary_output_indexes_.clear(); }
 
@@ -81,6 +80,5 @@ class GraphOptimize {
   std::map<uint32_t, std::map<string, size_t>> summary_output_indexes_ = {};
   std::string func_bin_path_;
 };
-};  // namespace ge
-/*lint +e148*/
+};      // namespace ge
 #endif  // GE_GRAPH_OPTIMIZE_GRAPH_OPTIMIZE_H_
diff --git a/src/ge/graph/passes/assert_pass.h b/src/ge/graph/passes/assert_pass.h
index f8e35b32..79955348 100644
--- a/src/ge/graph/passes/assert_pass.h
+++ b/src/ge/graph/passes/assert_pass.h
@@ -24,7 +24,7 @@
 namespace ge {
 class AssertPass : public BaseNodePass {
  public:
-  Status Run(NodePtr& node) override; /*lint !e148*/
+  Status Run(NodePtr& node) override;
 
  private:
   ///
@@ -33,7 +33,7 @@ class AssertPass : public BaseNodePass {
   /// @param nodes_unused nodes to be deleted
   /// @return void
   ///
-  void CollectUnusedNode(const NodePtr& assert_node, std::vector<ge::NodePtr>& nodes_unused); /*lint !e148*/
+  void CollectUnusedNode(const NodePtr& assert_node, std::vector<ge::NodePtr>& nodes_unused);
 
   ///
   /// remove unused nodes from graph
@@ -41,7 +41,7 @@ class AssertPass : public BaseNodePass {
   /// @param nodes_unused nodes to be deleted
   /// @return Status
   ///
-  Status RemoveUnusedNode(std::vector<NodePtr>& nodes_unused); /*lint !e148*/
+  Status RemoveUnusedNode(std::vector<NodePtr>& nodes_unused);
 };
 }  // namespace ge
 #endif  // GE_GRAPH_PASSES_ASSERT_PASS_H_
diff --git a/src/ge/graph/passes/atomic_addr_clean_pass.cc b/src/ge/graph/passes/atomic_addr_clean_pass.cc
index 6ca1b98b..39f6a6d9 100644
--- a/src/ge/graph/passes/atomic_addr_clean_pass.cc
+++ b/src/ge/graph/passes/atomic_addr_clean_pass.cc
@@ -170,7 +170,7 @@ Status AtomicAddrCleanPass::LinkToAtomicNode(const NodePtr &atomic_node, NodePtr
            atomic_node->GetName().c_str());
     return INTERNAL_ERROR;
   }
-  GELOGI("Graph add cleanAddrNode op out ctrl edge, dst node: %s.", atomic_node->GetName().c_str());
+  GELOGD("Graph add cleanAddrNode op out ctrl edge, dst node: %s.", atomic_node->GetName().c_str());
   std::string stream_label;
   if (is_loop_graph && AttrUtils::GetStr(atomic_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
     if (!AttrUtils::SetStr(atomic_clean_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
@@ -228,7 +228,7 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) {
   if (ret != GRAPH_SUCCESS) {
     GELOGW("set attr ATOMIC_ATTR_IS_ATOMIC_NODE fail.");
   }
-  GELOGI("Recognized atomic op %s from FE engine.", op_desc->GetName().c_str());
+  GELOGD("Recognized atomic op %s from FE engine.", op_desc->GetName().c_str());
   return true;
 }
 }  // namespace ge
diff --git a/src/ge/graph/passes/compile_nodes_pass.cc b/src/ge/graph/passes/compile_nodes_pass.cc
index 35d17523..8f7438ea 100644
--- a/src/ge/graph/passes/compile_nodes_pass.cc
+++ b/src/ge/graph/passes/compile_nodes_pass.cc
@@ -25,6 +25,8 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/op_desc.h"
 
+using domi::ImplyType;
+
 namespace {
 const char *const kAICPUEngineName = "DNN_VM_AICPU";
 const char *const kAICPUKernelLibName = "aicpu_kernel";
@@ -142,6 +144,7 @@ graphStatus CompileNodesPass::CompileNodes(const std::shared_ptr<GELib> instance
         // this node will go to aicpu engine ,no need compile
         node->GetOpDesc()->SetOpEngineName(kAICPUEngineName);
         node->GetOpDesc()->SetOpKernelLibName(kAICPUKernelLibName);
+        AttrUtils::SetInt(node->GetOpDesc(), ATTR_NAME_IMPLY_TYPE, static_cast<int64_t>(ImplyType::AI_CPU));
       }
       continue;
     }
diff --git a/src/ge/graph/passes/net_output_pass.cc b/src/ge/graph/passes/net_output_pass.cc
index 63db7ce3..7caf4990 100644
--- a/src/ge/graph/passes/net_output_pass.cc
+++ b/src/ge/graph/passes/net_output_pass.cc
@@ -169,7 +169,6 @@ Status NetOutputPass::RemoveUnusedNode(const ge::ComputeGraphPtr &graph) {
       GELOGE(INTERNAL_ERROR, "Remove node failed, node name:%s.", node->GetName().c_str());
       return INTERNAL_ERROR;
     }
-    GELOGI("Net output pass remove node:%s.", node->GetName().c_str());
   }
   return SUCCESS;
 }
@@ -209,7 +208,7 @@ Status NetOutputPass::UpdateNetOutputDesc(const ge::NodePtr &net_output) {
       GELOGE(INTERNAL_ERROR, "Update output desc failed, index:%u.", index);
       return INTERNAL_ERROR;
     }
-    GELOGI("Update desc, format:%s, data type:%s, index:%u.",
+    GELOGD("Update desc, format:%s, data type:%s, index:%u.",
            TypeUtils::FormatToSerialString(output_in_desc.GetFormat()).c_str(),
            TypeUtils::DataTypeToSerialString(output_in_desc.GetDataType()).c_str(), index);
   }
@@ -234,7 +233,7 @@ Status NetOutputPass::AddCtrlEdgeForTargets(const ge::NodePtr &net_out_node) {
              net_out_node->GetName().c_str(), node->GetName().c_str());
       return INTERNAL_ERROR;
     }
-    GELOGI("Add ctrl edge to netoutput node[%s] for target node [%s] success!", net_out_node->GetName().c_str(),
+    GELOGD("Add ctrl edge to netoutput node[%s] for target node [%s] success!", net_out_node->GetName().c_str(),
            node->GetName().c_str());
   }
   return SUCCESS;
@@ -265,7 +264,7 @@ Status NetOutputPass::AddEdgesForNetOutput(const ge::ComputeGraphPtr &graph, con
              item.second, net_input_index);
       return INTERNAL_ERROR;
     }
-    GELOGI("AddEdge to output node, src name:%s, src index:%d, dst index:%d.", src_node->GetName().c_str(), item.second,
+    GELOGD("AddEdge to output node, src name:%s, src index:%d, dst index:%d.", src_node->GetName().c_str(), item.second,
            net_input_index);
     net_input_index++;
   }
@@ -417,7 +416,7 @@ Status NetOutputPass::AddCtrlEdgesBetweenLeafAndNetOutput(const ge::ComputeGraph
         node->GetOutDataNodesSize() == 0 && node->GetOutControlNodes().size() == 0) {
       GE_CHK_STATUS_RET(GraphUtils::AddEdge(node->GetOutControlAnchor(), net_out_node->GetInControlAnchor()),
                         "add edge failed");
-      GELOGI("Add ctrl edge success. src name :%s, dst name :%s", node->GetName().c_str(),
+      GELOGD("Add ctrl edge success. src name :%s, dst name :%s", node->GetName().c_str(),
              net_out_node->GetName().c_str());
     }
   }
@@ -488,11 +487,10 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) {
       auto it = targets_.find(src_node);
       if (it != targets_.end()) {
         iter = output_nodes_info.erase(iter);
-        GELOGI("node [%s] is in processed targets, do not add inout for netoutput!", src_node->GetName().c_str());
+        GELOGD("node [%s] is in processed targets, do not add inout for netoutput!", src_node->GetName().c_str());
         continue;
       }
       AddInOutForNetOutputOp(graph, net_output_desc, src_node, src_index);
-      GELOGI("Add output node:%s, index:%d.", src_node->GetName().c_str(), src_index);
       is_input_const.push_back(PassUtils::IsConstant(src_node));
       ++iter;
     }
diff --git a/src/ge/graph/passes/variable_op_pass.cc b/src/ge/graph/passes/variable_op_pass.cc
index 755de62d..26bb453a 100644
--- a/src/ge/graph/passes/variable_op_pass.cc
+++ b/src/ge/graph/passes/variable_op_pass.cc
@@ -160,7 +160,7 @@ Status VariableOpPass::Run(ge::ComputeGraphPtr graph) {
 
     auto start_iter = fusion_road.begin();
     auto end_iter = fusion_road.rbegin();
-    GELOGI(
+    GELOGD(
       "Trans variable data for %s from format %s to %s, shape %s to %s "
       "data-type %s to %s, path len %zu success",
       node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(),
@@ -197,7 +197,7 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) {
   GELOGD("Begin to fusion var %s with trans", var_node->GetName().c_str());
   auto graph = var_node->GetOwnerComputeGraph();
   for (auto &trans_node : var_node->GetOutDataNodes()) {
-    GELOGI("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(),
+    GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(),
            trans_node->GetType().c_str(), var_node->GetName().c_str());
 
     if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) {
@@ -218,7 +218,7 @@ Status VariableOpPass::DealFusion(const ge::NodePtr &var_node) {
   for (auto ref_node : iterator->second) {
     GE_CHECK_NOTNULL(ref_node);
     for (auto &trans_node : ref_node->GetInDataNodes()) {
-      GELOGI("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(),
+      GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(),
              trans_node->GetType().c_str(), var_node->GetName().c_str());
       if (trans_node->GetOutDataNodes().size() > 1) {
         GELOGD(
@@ -578,7 +578,7 @@ Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) {
       (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP);
     if (is_var_node) {
       if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) {
-        GELOGI("var manager does not exist var node[%s]", node->GetName().c_str());
+        GELOGD("var manager does not exist var node[%s]", node->GetName().c_str());
         continue;
       }
       GELOGD("var manager exist var node[%s], graph name[%s]", node->GetName().c_str(), graph->GetName().c_str());
diff --git a/src/ge/graph/passes/variable_prepare_op_pass.cc b/src/ge/graph/passes/variable_prepare_op_pass.cc
index c4dca7dc..ca9e1c0a 100644
--- a/src/ge/graph/passes/variable_prepare_op_pass.cc
+++ b/src/ge/graph/passes/variable_prepare_op_pass.cc
@@ -190,7 +190,7 @@ ge::NodePtr VariablePrepareOpPass::CreatVariableRef(ge::NodePtr &final_writable_
     GELOGE(FAILED, "parameter ptr is null.");
     return nullptr;
   }
-  GELOGI("Create VarRef Op: final_writable_node: [%s] var_node: [%s]>>>>", final_writable_node->GetName().c_str(),
+  GELOGD("Create VarRef Op: final_writable_node: [%s] var_node: [%s]>>>>", final_writable_node->GetName().c_str(),
          var_node->GetName().c_str());
 
   static uint32_t var_ref_count = 0;
@@ -220,7 +220,7 @@ ge::NodePtr VariablePrepareOpPass::CreatVariableRef(ge::NodePtr &final_writable_
 
   bool is_set_str = ge::AttrUtils::SetStr(var_ref_op_desc, REF_VAR_SRC_VAR_NAME, var_op_desc->GetName());
   if (is_set_str) {
-    GELOGI("Set node [%s] REF_VAR_SRC_VAR_NAME [%s]", var_ref_node->GetName().c_str(), var_op_desc->GetName().c_str());
+    GELOGD("Set node [%s] REF_VAR_SRC_VAR_NAME [%s]", var_ref_node->GetName().c_str(), var_op_desc->GetName().c_str());
   }
   return var_ref_node;
 }
@@ -229,7 +229,7 @@ int VariablePrepareOpPass::GetWritableNodeOutIndex(const NodePtr &node, int inpu
   if (node == nullptr) {
     return -1;
   }
-  GELOGI("get writable node and input index %s:%d", node->GetName().c_str(), input_index);
+  GELOGD("get writable node and input index %s:%d", node->GetName().c_str(), input_index);
   auto node_type = node->GetType();
   if (node_type == ASSIGN) {
     if (UpdateAssignOpDesc(node) != SUCCESS) {
diff --git a/src/ge/graph/preprocess/graph_preprocess.h b/src/ge/graph/preprocess/graph_preprocess.h
index ee1eb0bc..c53edf43 100644
--- a/src/ge/graph/preprocess/graph_preprocess.h
+++ b/src/ge/graph/preprocess/graph_preprocess.h
@@ -37,7 +37,6 @@
 #include "omg/omg_inner_types.h"
 #include "runtime/context.h"
 
-/*lint -e148*/
 namespace ge {
 class GraphPrepare {
  public:
@@ -73,5 +72,4 @@ class GraphPrepare {
   GraphManagerOptions options_;
 };
 }  // namespace ge
-/*lint +e148*/
 #endif  // GE_GRAPH_PREPROCESS_GRAPH_PREPROCESS_H_
diff --git a/src/ge/model/ge_model.h b/src/ge/model/ge_model.h
index ab779d03..6305211a 100644
--- a/src/ge/model/ge_model.h
+++ b/src/ge/model/ge_model.h
@@ -74,12 +74,12 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeModel : public AttrHolder
  private:
   void Init();
 
-  ProtoAttrMapHelper attrs_; /*lint !e148*/
+  ProtoAttrMapHelper attrs_;
 
-  Graph graph_;                              /*lint !e148*/
-  std::shared_ptr<domi::ModelTaskDef> task_; /*lint !e148*/
+  Graph graph_;
+  std::shared_ptr<domi::ModelTaskDef> task_;
   TBEKernelStore tbe_kernal_store_;
-  Buffer weights_buffer_; /*lint !e148*/
+  Buffer weights_buffer_;
 
   std::string name_;
   uint32_t version_ = {0};
diff --git a/src/ge/session/inner_session.h b/src/ge/session/inner_session.h
index d79a2eac..b35d01e6 100644
--- a/src/ge/session/inner_session.h
+++ b/src/ge/session/inner_session.h
@@ -35,8 +35,7 @@ class InnerSession {
 
   Status AddGraph(uint32_t graph_id, const Graph &graph);
 
-  Status AddGraph(uint32_t graph_id, const Graph &graph,
-                  const std::map<std::string, std::string> &options); /*lint !e148*/
+  Status AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options);
 
   Status RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs);
 
diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt
index 674e2c1f..cda1f1e1 100644
--- a/tests/ut/common/graph/CMakeLists.txt
+++ b/tests/ut/common/graph/CMakeLists.txt
@@ -35,11 +35,8 @@ include_directories(${GE_SOURCE_DIR}/inc/external)
 include_directories(${GE_SOURCE_DIR}/inc/external/graph)
 include_directories(${GE_SOURCE_DIR}/inc/graph)
 include_directories(${GE_SOURCE_DIR}/inc/common)
-include_directories(${GE_SOURCE_DIR}/inc/ops)
 include_directories(${GE_SOURCE_DIR}/third_party/securec/include)
-include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/ops)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc)
-include_directories(/usr/local/HiAI/opp/op_proto/built-in/inc)
 include_directories(${CMAKE_BINARY_DIR})
 include_directories(${CMAKE_BINARY_DIR}/proto/ge)
 
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 69b93905..8ac71939 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -43,13 +43,10 @@ include_directories(${GE_SOURCE_DIR}/inc/external/graph)
 include_directories(${GE_SOURCE_DIR}/inc/graph)
 include_directories(${GE_SOURCE_DIR}/inc/framework)
 include_directories(${GE_SOURCE_DIR}/inc/common)
-include_directories(${GE_SOURCE_DIR}/inc/ops)
 include_directories(${GE_SOURCE_DIR}/third_party/securec/include)
-include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/ops)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc)
 include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/cce)
 include_directories(${GE_SOURCE_DIR}/tests/ut/ge)
-include_directories(/usr/local/HiAI/opp/op_proto/built-in/inc)
 include_directories(${CMAKE_BINARY_DIR})
 include_directories(${CMAKE_BINARY_DIR}/proto/ge)
 
diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
index ff0f10e3..3075f795 100644
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -44,9 +44,10 @@ enum FWKAdptAPIRetCode {
 // Notice: add new operate type  need check with OMM, and make sure append to the end line.
 enum FWKOperateType {
   FWK_ADPT_SESSION_CREATE = 0,
-  FWK_ADPT_KERNEL_RUN = 1,
-  FWK_ADPT_SESSION_DESTROY = 2,
-  FWK_ADPT_SINGLE_OP_RUN = 3
+  FWK_ADPT_KERNEL_RUN,
+  FWK_ADPT_KERNEL_DESTROY,
+  FWK_ADPT_SESSION_DESTROY,
+  FWK_ADPT_SINGLE_OP_RUN
 };
 
 // API Parameter Structure
diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h
index c2d22630..4ca597bb 100644
--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -63,9 +63,6 @@ typedef enum tagHcclResult {
     HCCL_E_RESERVED                 /**< reserved */
 } hcclResult_t;
 
-/* handle to communicator */
-typedef void *hcclComm_t;
-
 /**
  * @brief HCCL Reduction opperation
  */
@@ -88,13 +85,6 @@ typedef enum tagHcclDataType {
     HCCL_DATA_TYPE_RESERVED   /**< reserved */
 } hcclDataType_t;
 
-const s32 HCCL_TAG_ANY = -1;
-const u32 BASE_UNIQUE_ID_BYTES = 27;
-#define HCCL_UNIQUE_ID_BYTES (BASE_UNIQUE_ID_BYTES + 5 + 16 + 128)
-typedef struct {
-    char internal[HCCL_UNIQUE_ID_BYTES];
-} hcclUniqueId;
-
 const u32 HCCL_MAX_SEGMENT_NUM = 8;   // The max number of gradient segments.
 
 /**
diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h
index ce1c9720..f1e30538 100644
--- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h
+++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h
@@ -20,7 +20,7 @@
 #define  LINUX    0
 #define  WIN      1
 
-#if(OS_TYPE == LINUX) //lint !e553
+#if(OS_TYPE == LINUX)
 
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE
@@ -84,7 +84,7 @@
 #endif
 
 
-#if(OS_TYPE == WIN) //lint !e553
+#if(OS_TYPE == WIN)
 #include <winsock2.h>
 #include <winsock.h>
 #include "Windows.h"
diff --git a/third_party/fwkacllib/inc/ops/all_ops.h b/third_party/fwkacllib/inc/ops/all_ops.h
index d6350322..f572f298 100644
--- a/third_party/fwkacllib/inc/ops/all_ops.h
+++ b/third_party/fwkacllib/inc/ops/all_ops.h
@@ -40,7 +40,6 @@
 #include "nn_detect_ops.h"
 #include "nn_norm_ops.h"
 #include "nn_ops.h"
-#include "nn_other_ops.h"
 #include "nn_pooling_ops.h"
 #include "nn_training_ops.h"
 #include "nonlinear_fuc_ops.h"
@@ -62,5 +61,5 @@
 #include "outfeed_ops.h"
 #include "stateless_random_ops.h"
 #include "dvpp_ops.h"
-#include "basic_lstm_cell.h"
+#include "rnn.h"
 #endif  // BUILT_IN_OP_PROTO_INC_ALL_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/atomic_addr_clean_ops.h b/third_party/fwkacllib/inc/ops/atomic_addr_clean_ops.h
deleted file mode 100644
index 1cd5dc3a..00000000
--- a/third_party/fwkacllib/inc/ops/atomic_addr_clean_ops.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- #ifndef GE_OP_ATOMICADDRCLEAN_H
- #define GE_OP_ATOMICADDRCLEAN_H
-
- #include "../../../inc/external/graph/operator_reg.h"
-
-namespace ge{
-REG_OP(AtomicAddrClean)
-    .ATTR(automic_add_mem_size, ListInt, {})
-    .OP_END()
-}  // namespace ge
-
- #endif // GE_OP_ATOMICADDRCLEAN_H
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index ba043d5a..f92f42eb 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -2543,41 +2543,6 @@ REG_OP(Bias)
     .ATTR(bias_from_blob, Bool, true)
     .OP_END_FACTORY_REG(Bias)
 
-/**
-* @brief Computes the gradient for Local Response Normalization.
-
-* @par Inputs:
-* @li grads: A 4D Tensor of type float16 or float32.
-* @li x: A 4D Tensor of type float16 or float32.
-* @li y: A 4D Tensor of type float16 or float32.
-
-* @par Attributes:
-* @li depth_radius: An optional int, specifying the half-width of the
-* normalization window. Defaults to "5".
-* @li bias: An optional float32. An offset, usually > 0 to avoid dividing by 0.
-* Defaults to "1".
-* @li alpha: An optional float32. A scaling factor, usually positive.
-* Defaults to "1".
-* @li beta: An optional float32. An exponent. Defaults to "0.5".
-
-* @par Outputs:
-* z: A Tensor. Has the same type and shape as "grads".
-
-* @attention Constraints:
-* "x" and "y" must have the same shape and type as "grads".
-*/
-
-REG_OP(LRNGrad)
-    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .INPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(z, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .ATTR(depth_radius, Int, 5)
-    .ATTR(bias, Float, 1.0)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.5)
-    .OP_END_FACTORY_REG(LRNGrad)
-
 REG_OP(ConfusionMulGrad)
     .INPUT(input0, TensorType({DT_FLOAT16,DT_FLOAT}))
     .INPUT(input1, TensorType({DT_FLOAT16,DT_FLOAT}))
@@ -2588,16 +2553,13 @@ REG_OP(ConfusionMulGrad)
     .ATTR(keep_dims, Bool, false)
     .OP_END_FACTORY_REG(ConfusionMulGrad)
 
-REG_OP(LRN)
-    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
-    .ATTR(depth_radius, Int, 5)
-    .ATTR(bias, Float, 1.0)
-    .ATTR(alpha, Float, 1.0)
-    .ATTR(beta, Float, 0.5)
-    .ATTR(norm_region, String, "ACROSS_CHANNELS")
-    .OP_END_FACTORY_REG(LRN)
-
+REG_OP(FusedMulAddNL2loss)
+    .INPUT(x1, TensorType::NumberType())
+    .INPUT(x2, TensorType::NumberType())
+    .INPUT(x3, TensorType::NumberType())
+    .OUTPUT(y1, TensorType::NumberType())
+    .OUTPUT(y2, TensorType::NumberType())
+    .OP_END_FACTORY_REG(FusedMulAddNL2loss)
 }  // namespace ge
 
 #endif  // GE_OP_ELEWISE_CALCULATION_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h
index f311f292..358d5341 100644
--- a/third_party/fwkacllib/inc/ops/math_ops.h
+++ b/third_party/fwkacllib/inc/ops/math_ops.h
@@ -110,6 +110,83 @@ REG_OP(GetNext)
     .ATTR(output_num, Int, 1)
     .ATTR(channel_name, String, "")
     .OP_END_FACTORY_REG(GetNext)
+/**
+*@brief: Computes the Gauss error function of `x` element-wise.
+
+*@par Inputs:\n
+*x: A Tensor of type float16 or float32.
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*/
+REG_OP(Erf)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(Erf)
+
+/**
+*@brief: Computes the Gauss complementary error function of "x" element-wise.
+
+*@par Inputs:\n
+*x: A Tensor of type float16 or float32.
+
+*@par Outputs:
+*y: A Tensor. Has the same type as "x".
+*/
+REG_OP(Erfc)
+    .INPUT(x, TensorType::FloatingDataType())
+    .OUTPUT(y, TensorType::FloatingDataType())
+    .OP_END_FACTORY_REG(Erfc)
+
+/**
+*@brief This operation returns a rank 1 histogram counting the number of entries in `values` \n
+*  that fell into every bin.The bins are equal width and determined by the arguments \n
+*  'value_range' and 'nbins'. \n
+
+*@par Inputs:
+*Three inputs, including: \n
+*@li x: A Tensor of type float32,float16,int32.
+*@li range: A Tensor of type float32,float16,int32.
+*@li nbins: A Tensor of type int32.
+
+*@par Attributes:
+* dtype: An optional attribute. Defaults to "int32".
+
+*@par Outputs:
+*y: A Tensor. A Tensor of type int32.
+*/
+REG_OP(HistogramFixedWidth)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(nbins, TensorType({DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .ATTR(dtype, String, "int32")
+    .OP_END_FACTORY_REG(HistogramFixedWidth)
+
+/**
+*@brief This operation returns a rank 1 histogram counting the number of entries in `values` \n
+*  that fell into every bin.The bins are equal width and determined by the arguments \n
+*  'value_range' and 'nbins'. \n
+
+*@par Inputs:
+*Two inputs, including: \n
+*@li x: A Tensor of type float32,float16,int32.
+*@li range: A Tensor of type float32,float16,int32.
+
+*@par Attributes:
+*@li dtype: An optional attribute. Defaults to "int32".
+*@li nbins: A required attribute,the type is int32.
+
+*@par Outputs:
+*y: A Tensor. A Tensor of type int32.
+*/
+REG_OP(HistogramFixedWidthD)
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_INT32}))
+    .REQUIRED_ATTR(nbins, Int)
+    .ATTR(dtype, String, "int32")
+    .OP_END_FACTORY_REG(HistogramFixedWidthD)
 }  // namespace ge
 
 #endif  // GE_OP_MATH_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index e11922c0..f5045786 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -287,26 +287,6 @@ REG_OP(ScatterMax)
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterMax)
 
-REG_OP(SparseApplyAdagrad)
-    .INPUT(var, TensorType({DT_FLOAT}))
-    .INPUT(accum, TensorType({DT_FLOAT}))
-    .INPUT(lr, TensorType({DT_FLOAT}))
-    .INPUT(grad, TensorType({DT_FLOAT}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .OUTPUT(var, TensorType({DT_FLOAT}))
-    .ATTR(use_locking, Bool, false)
-    .OP_END_FACTORY_REG(SparseApplyAdagrad)
-
-REG_OP(SparseApplyAdagradD)
-    .INPUT(var, TensorType({DT_FLOAT}))
-    .INPUT(accum, TensorType({DT_FLOAT}))
-    .INPUT(grad, TensorType({DT_FLOAT}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .OUTPUT(var, TensorType({DT_FLOAT}))
-    .REQUIRED_ATTR(lr, Float)
-    .ATTR(use_locking, Bool, false)
-    .OP_END_FACTORY_REG(SparseApplyAdagradD)
-
 REG_OP(ScatterUpdate)
     .INPUT(var, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT8,DT_UINT8}))
     .INPUT(indices, TensorType({DT_INT32}))
@@ -315,94 +295,6 @@ REG_OP(ScatterUpdate)
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ScatterUpdate)
 
-/**
-* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-* That is for rows we have grad for, we update var, accum and linear
-
-* @par Inputs:
-* Ten inputs, including:
-* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
-*     Should be a Variable Tensor.
-* @li accum: A mutable Tensor of the same type as "var".
-*     Should be a Variable Tensor.
-* @li linear: A mutable Tensor of the same type as "var".
-*     Should be a Variable Tensor.
-* @li grad: A Tensor of the same type as "var", for the gradient.
-* @li indices: A vector of indices into the first dimension of var and accum.
-* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
-* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
-* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
-* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
-* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
-
-* @par Attributes:
-* use_locking: An optional bool. Defaults to "False".
-* If "True", updating of the "var" and "accum" tensors will be
-* rotected by a lock; otherwise the behavior is undefined,
-* but may exhibit less contention.
-
-* @par Outputs:
-* var: A Tensor. Has the same type and format as input "var".
-*/
-REG_OP(SparseApplyFtrlV2)
-    .INPUT(var, TensorType({DT_FLOAT}))
-    .INPUT(accum, TensorType({DT_FLOAT}))
-    .INPUT(linear, TensorType({DT_FLOAT}))
-    .INPUT(grad, TensorType({DT_FLOAT}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .INPUT(lr, TensorType({DT_FLOAT}))
-    .INPUT(l1, TensorType({DT_FLOAT}))
-    .INPUT(l2, TensorType({DT_FLOAT}))
-    .INPUT(l2_shrinkage, TensorType({DT_FLOAT}))
-    .INPUT(lr_power, TensorType({DT_FLOAT}))
-    .OUTPUT(var, TensorType({DT_FLOAT}))
-    .ATTR(use_locking, Bool, false)
-    .OP_END_FACTORY_REG(SparseApplyFtrlV2)
-
-/**
-* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-* That is for rows we have grad for, we update var, accum and linear
-
-* @par Inputs:
-* Ten inputs, including:
-* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
-*     Should be a Variable Tensor.
-* @li accum: A mutable Tensor of the same type as "var".
-*     Should be a Variable Tensor.
-* @li linear: A mutable Tensor of the same type as "var".
-*     Should be a Variable Tensor.
-* @li grad: A Tensor of the same type as "var", for the gradient.
-* @li indices: A vector of indices into the first dimension of var and accum.
-
-* @par Attributes:
-* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
-* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
-* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
-* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
-* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
-* @li use_locking: An optional bool. Defaults to "False".
-*     If "True", updating of the "var" and "accum" tensors will be
-*     rotected by a lock; otherwise the behavior is undefined,
-*     but may exhibit less contention.
-
-* @par Outputs:
-* var: A Tensor. Has the same type and format as input "var".
-*/
-REG_OP(SparseApplyFtrlV2D)
-    .INPUT(var, TensorType({DT_FLOAT}))
-    .INPUT(accum, TensorType({DT_FLOAT}))
-    .INPUT(linear, TensorType({DT_FLOAT}))
-    .INPUT(grad, TensorType({DT_FLOAT}))
-    .INPUT(indices, TensorType({DT_INT32}))
-    .OUTPUT(var, TensorType({DT_FLOAT}))
-    .REQUIRED_ATTR(lr, Float)
-    .REQUIRED_ATTR(l1, Float)
-    .REQUIRED_ATTR(l2, Float)
-    .REQUIRED_ATTR(l2_shrinkage, Float)
-    .REQUIRED_ATTR(lr_power, Float)
-    .ATTR(use_locking, Bool, false)
-    .OP_END_FACTORY_REG(SparseApplyFtrlV2D)
-
 }  // namespace ge
 
 #endif  // GE_OP_MATRIX_CALCULATION_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index 5cf56464..175e6e2a 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -407,6 +407,32 @@ REG_OP(Conv2DBackpropInputD)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .OP_END_FACTORY_REG(Conv2DBackpropInputD)
 
+/**
+*@brief Computes the Deconvolution with respect to the input.
+*@par Inputs:
+ * Two inputs:
+ * @li x: A Tensor. Must have the same type as "filter". 4D with shape\n
+ * [batch, out_height, out_width, out_channels]\n
+ * or [batch, out_channels, out_height, out_width]. Gradients with respect\n
+ * to the output of the convolution.
+ * @li filter: A Tensor of type float16.
+ * 4D with shape [filter_height, filter_width, in_channels, out_channels],\n
+ * or [out_channels, filter_height, filter_width, in_channels], \n
+ * or [out_channels, in_channel, filter_height, filter_width].
+ * One optional input:
+ * @li bias: An optional tensor of type int8
+*@par Attributes:
+ * Three attributes:
+ * @li strides: A tuple or list of 2 integers. The stride of the sliding window\n
+ * for H/W dimension.
+ * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right] \n
+ * padding on the feature map
+ * @li dilations: A tuple or list of 4 integers. The dilation factor for each\n
+ * dimension of input. Must be [1, 1, 1, 1].
+*@par Outputs:
+ * y: A Tensor. Has the same type as "filter". 4D tensor with shape\n
+ * [batch, height, width, channels] or [batch, channels, height, width].
+*/
 REG_OP(Deconvolution)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
     .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE}))
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index 0a432efe..39dd23b1 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -22,6 +22,23 @@
 
 namespace ge {
 
+/**
+*@brief Generates bounding boxes based on "rois" and "deltas". It is a customized FasterRcnn operator.
+
+*@par Inputs:
+* Two inputs, including: \n
+*@li rois: Region of interests (ROIs) generated by the region proposal network (RPN). A 2D Tensor of type float 32 with shape (N, 4). "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1", "y0", and "y1".
+*@li deltas: Absolute variation between the ROIs generated by the RPN and ground truth boxes. A 2D Tensor of type float32 with shape (N, 4). "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh".
+
+*@par Attributes:
+*@li means: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means".
+*@li stds: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means".
+*@li max_shape: Shape [h, w], specifying the size of the image transferred to the network. Used to ensure that the bbox shape after conversion does not exceed "max_shape".
+*@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall within (-wh_ratio_clip, wh_ratio_clip).
+
+*@par Outputs:
+*bboxes: Bboxes generated based on "rois" and "deltas". Have the same format and type as "rois".
+*/
 REG_OP(BoundingBoxDecode)
     .INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -32,6 +49,21 @@ REG_OP(BoundingBoxDecode)
     .ATTR(wh_ratio_clip, Float, 0.016)
     .OP_END_FACTORY_REG(BoundingBoxDecode)
 
+/**
+*@brief Computes the coordinate variations between bboxes and ground truth boxes. It is a customized FasterRcnn operator.
+
+*@par Inputs:
+* Two inputs, including: \n
+*@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" refers to "x0", "x1", "y0", and "y1".
+*@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" refers to "x0", "x1", "y0", and "y1".
+
+*@par Attributes:
+*@li means: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means".
+*@li stds: An index of type int. Defaults to [0,0,0,0]. "deltas" = "deltas" x "stds" + "means".
+
+*@par Outputs:
+*delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes.
+*/
 REG_OP(BoundingBoxEncode)
     .INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(ground_truth_box, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -40,12 +72,43 @@ REG_OP(BoundingBoxEncode)
     .ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
     .OP_END_FACTORY_REG(BoundingBoxEncode)
 
+/**
+*@brief Judges whether the bounding box is valid. It is a customized FasterRcnn operator.
+
+*@par Inputs:
+* Two inputs, including: \n
+*@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4). "N" indicates the number of bounding boxes, the value "4" indicates "x0", "x1", "y0", and "y1".
+*@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16 with shape (16,)
+
+*@par Outputs:
+*valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is in an image. "1" indicates valid, while "0" indicates invalid.
+
+*@attention Constraints:
+* 16 "img_metas" are input. The first three numbers (height, width, ratio) are valid, specifying the valid boundary (heights x ratio, weights x ratio).
+*/
 REG_OP(CheckValid)
     .INPUT(bbox_tensor, TensorType({DT_FLOAT16}))
     .INPUT(img_metas, TensorType({DT_FLOAT16}))
     .OUTPUT(valid_tensor, TensorType({DT_INT8}))
     .OP_END_FACTORY_REG(CheckValid)
 
+/**
+*@brief Computes the intersection over union (iou) or the intersection over foreground (iof) based on the ground-truth and predicted regions.
+
+*@par Inputs:
+* Two inputs, including: \n
+*@li bboxes: Bounding boxes, a 2D Tensor of type float16 with shape (N, 4). "N" indicates the number of bounding boxes, and the value "4" refers to "x0", "x1", "y0", and "y1".
+*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 with shape (M, 4). "M" indicates the number of ground truth boxes, and the value "4" refers to "x0", "x1", "y0", and "y1".
+
+*@par Attributes:
+*mode: Computation mode, a character string with the value range of [iou, iof].
+
+*@par Outputs:
+*overlap: A 2D Tensor of type float16 with shape [M, N], specifying the IoU or IoF ratio.
+
+*@attention Constraints:
+* Only computation of float16 data is supported. To avoid overflow, the input length and width are scaled by 0.2 internally.
+*/
 REG_OP(Iou)
     .INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
     .INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
@@ -53,6 +116,25 @@ REG_OP(Iou)
     .ATTR(mode, String, "iou")
     .OP_END_FACTORY_REG(Iou)
 
+/**
+*@brief Performs the backpropagation of ROIAlign for training scenarios.
+
+*@par Inputs:
+* Three inputs, including: \n
+*@li ydiff: A 5HD gradient input of type float32.
+*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
+*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved.
+
+*@par Attributes:
+*@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
+*@li pooled_width: A required attribute of type int, specifying the W dimension.
+*@li pooled_height: A required attribute of type int, specifying the H dimension.
+*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
+*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
+
+*@par Outputs:
+*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
+*/
 REG_OP(ROIAlignGrad)
     .INPUT(ydiff, TensorType({DT_FLOAT}))
     .INPUT(rois, TensorType({DT_FLOAT}))
@@ -65,6 +147,24 @@ REG_OP(ROIAlignGrad)
     .ATTR(sample_num, Int, 2)
     .OP_END_FACTORY_REG(ROIAlignGrad)
 
+/**
+*@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator.
+
+*@par Inputs:
+* Three inputs, including: \n
+*@li features: A 5HD Tensor of type float32.
+*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
+*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved.
+
+*@par Attributes:
+*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
+*@li pooled_height: A required attribute of type int, specifying the H dimension.
+*@li pooled_width: A required attribute of type int, specifying the W dimension.
+*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0", the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
+
+*@par Outputs:
+*output: Outputs the feature sample of each ROI position. The format is 5HD. The axis N is the number of input ROIs. Axes H, W, and C are consistent with the values of "pooled_height", "pooled_width", and "features", respectively.
+*/
 REG_OP(ROIAlign)
     .INPUT(features, TensorType({DT_FLOAT}))
     .INPUT(rois, TensorType({DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
index 618dadf8..8dec2dc3 100644
--- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h
@@ -236,6 +236,256 @@ REG_OP(ConfusionSoftmaxGrad)
   .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
   .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
   .OP_END_FACTORY_REG(ConfusionSoftmaxGrad)
+
+/**
+*@brief Layernorm operator interface implementation
+*  calculating: x, gamma, beta
+*  mean  = np.mean(x, reduce_axis, keepdims=True)
+*  variance = np.mean(np.power((x - mean),2), reduce_axis, keepdims=True)
+*  y = gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta
+
+*@par Inputs:
+*Three inputs, including:
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li beta: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Attributes:
+* @li begin_norm_axis: A required attribute, the type is int32.
+* @li begin_params_axis: A required attribute,the type is int32.
+
+*@par Outputs:
+*Three outputs, including:
+* @li y: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+*/
+REG_OP(LayerNorm)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .ATTR(begin_norm_axis, Int, 0)
+    .ATTR(begin_params_axis, Int, 0)
+    .ATTR(epsilon, Float, 0.0000001)
+    .OP_END_FACTORY_REG(LayerNorm)
+
+/**
+*@brief LayerNormGrad operator interface implementation
+*  calculating: dy, x, variance, mean, gamma
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
+*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+*/
+REG_OP(LayerNormGrad)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(LayerNormGrad)
+
+/**
+*@brief LayerNormXBackprop operator interface implementation
+*  calculating: dy, x, variance, mean, gamma
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
+*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+* @li gamma: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
+*/
+REG_OP(LayerNormXBackprop)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(LayerNormXBackprop)
+
+/**
+*@brief LayerNormBetaGammaBackprop operator interface implementation
+*  calculating: dy, x, variance, mean
+*  pd_xl = data_dy*data_gamma
+*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
+*           np.power((data_variance + EPSLON), (-1.5))),
+*           reduce_axis, keepdims=True)
+*  pd_mean = np.sum(((-1.0)*pd_xl
+*            np.power((data_variance + EPSLON), (-0.5))),
+*            reduce_axis, keepdims=True)
+*            + pd_var*(1.0/m)
+*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
+*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
+*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
+*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
+*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
+*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
+
+*@par Inputs:
+*Three inputs, including:
+* @li dy: A Tensor. Must be one of the following types: float16, float32.
+* @li x: A Tensor. Must be one of the following types: float16, float32.
+* @li variance: A Tensor. Must be one of the following types: float16, float32.
+* @li mean: A Tensor. Must be one of the following types: float16, float32.
+
+*@par Outputs:
+*Three outputs, including:
+* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
+* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
+*/
+REG_OP(LayerNormBetaGammaBackprop)
+    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .REQUIRED_ATTR(shape_gamma, ListInt)
+    .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)
+
+/**
+*@brief Return "output" according to the algorithm of dropout_do_mask: \n
+*  scale_x = x *(1 / keep_prob)
+*  output = select(mask == 1, scale_x, 0)
+
+*@par Inputs:
+*Three inputs, including: \n
+* @li x: A mutable Tensor. Must be one of the following types:
+*     float16, float32
+* @li mask: A mutable Tensor. Must met all of the following rules:
+*     shape of mask should be 1D.
+*     dtype of mask should be uint8.
+*     value of shape should met the following algorithm:
+*     value = (size(x) + 128 - 1) // 128 * 128 //8
+* @li keep_prob: A mutable Tensor. Must met all of the following rules:
+*     shape of "keep_prob" should be (1,) or [1,].
+*     Has the same type as "x".
+
+*@par Output:
+*y: A mutable Tensor. Has the same type as "x".
+*/
+REG_OP(DropOutDoMask)
+    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .INPUT(mask, TensorType({DT_UINT8}))
+    .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
+    .OP_END_FACTORY_REG(DropOutDoMask)
+
+/**
+*@brief Local Response Normalization.
+
+*@par Inputs:
+*One input, including:
+*@li x: A Tensor. Must be 4-D shape, and only support the following types: float16, float32.
+
+*@par Attributes:
+*@li depth_radius: An optional int, specifying the half-width of the
+* normalization window. Defaults to "5".
+*@li bias: An optional float32. An offset, usually > 0 to avoid dividing by 0.
+* Defaults to "1".
+*@li alpha: An optional float32. A scaling factor, usually positive.
+* Defaults to "1".
+*@li beta: An optional float32. An exponent. Defaults to "0.5".
+*@li norm_region: An optional string. A mode option. Defaults to "ACROSS_CHANNELS".
+
+*@par Outputs:
+*y: A Tensor. Has the same data type and shape as "x".
+*/
+REG_OP(LRN)
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(depth_radius, Int, 5)
+    .ATTR(bias, Float, 1.0)
+    .ATTR(alpha, Float, 1.0)
+    .ATTR(beta, Float, 0.5)
+    .ATTR(norm_region, String, "ACROSS_CHANNELS")
+    .OP_END_FACTORY_REG(LRN)
+
+/**
+* @brief Computes the gradient for Local Response Normalization.
+
+* @par Inputs:
+* @li grads: A 4D Tensor of type float16 or float32.
+* @li x: A 4D Tensor of type float16 or float32.
+* @li y: A 4D Tensor of type float16 or float32.
+
+* @par Attributes:
+* @li depth_radius: An optional int, specifying the half-width of the
+* normalization window. Defaults to "5".
+* @li bias: An optional float32. An offset, usually > 0 to avoid dividing by 0.
+* Defaults to "1".
+* @li alpha: An optional float32. A scaling factor, usually positive.
+* Defaults to "1".
+* @li beta: An optional float32. An exponent. Defaults to "0.5".
+
+* @par Outputs:
+* z: A Tensor. Has the same type and shape as "grads".
+
+* @attention Constraints:
+* "x" and "y" must have the same shape and type as "grads".
+*/
+REG_OP(LRNGrad)
+    .INPUT(grads, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .INPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .OUTPUT(z, TensorType({DT_FLOAT16,DT_FLOAT}))
+    .ATTR(depth_radius, Int, 5)
+    .ATTR(bias, Float, 1.0)
+    .ATTR(alpha, Float, 1.0)
+    .ATTR(beta, Float, 0.5)
+    .OP_END_FACTORY_REG(LRNGrad)
+
 }  // namespace ge
 
 #endif  //GE_OP_NN_NORM_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/nn_other_ops.h b/third_party/fwkacllib/inc/ops/nn_other_ops.h
deleted file mode 100644
index 125b21a5..00000000
--- a/third_party/fwkacllib/inc/ops/nn_other_ops.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef GE_OP_NN_OTHER_OPS_H
-#define GE_OP_NN_OTHER_OPS_H
-#include "../graph/operator_reg.h"
-
-namespace ge {
-REG_OP(Erf)
-    .INPUT(x, TensorType::FloatingDataType())
-    .OUTPUT(y, TensorType::FloatingDataType())
-    .OP_END_FACTORY_REG(Erf)
-
-REG_OP(Erfc)
-    .INPUT(x, TensorType::FloatingDataType())
-    .OUTPUT(y, TensorType::FloatingDataType())
-    .OP_END_FACTORY_REG(Erfc)
-
-/**
-*@brief This operation returns a rank 1 histogram counting the number of entries in `values` \n
-*  that fell into every bin.The bins are equal width and determined by the arguments \n
-*  'value_range' and 'nbins'. \n
-
-*@par Inputs: 
-*Three inputs, including: \n
-*@li x: A Tensor of type float32,float16,int32.
-*@li range: A Tensor of type float32,float16,int32.
-*@li nbins: A Tensor of type int32.
-
-*@par Attributes:
-* dtype: An optional attribute. Defaults to "int32".
-
-*@par Outputs:
-*y: A Tensor. A Tensor of type int32.
-*/
-REG_OP(HistogramFixedWidth)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .INPUT(nbins, TensorType({DT_INT32}))
-    .OUTPUT(y, TensorType({DT_INT32}))
-    .ATTR(dtype, String, "int32")
-    .OP_END_FACTORY_REG(HistogramFixedWidth)
-
-/**
-*@brief This operation returns a rank 1 histogram counting the number of entries in `values` \n
-*  that fell into every bin.The bins are equal width and determined by the arguments \n
-*  'value_range' and 'nbins'. \n
-
-*@par Inputs: 
-*Two inputs, including: \n
-*@li x: A Tensor of type float32,float16,int32.
-*@li range: A Tensor of type float32,float16,int32.
-
-*@par Attributes:
-*@li dtype: An optional attribute. Defaults to "int32".
-*@li nbins: A required attribute,the type is int32.
-
-*@par Outputs:
-*y: A Tensor. A Tensor of type int32.
-*/
-REG_OP(HistogramFixedWidthD)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .INPUT(range, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
-    .OUTPUT(y, TensorType({DT_INT32}))
-    .REQUIRED_ATTR(nbins, Int)
-    .ATTR(dtype, String, "int32")
-    .OP_END_FACTORY_REG(HistogramFixedWidthD)
-
-/**
-*@brief Layernorm operator interface implementation
-*  calculating: x, gamma, beta
-*  mean  = np.mean(x, reduce_axis, keepdims=True)
-*  variance = np.mean(np.power((x - mean),2), reduce_axis, keepdims=True)
-*  y = gamma*((x - mean) / np.sqrt(variance + 0.001)) + beta
-
-*@par Inputs:
-*Three inputs, including:
-* @li x: A Tensor. Must be one of the following types: float16, float32.
-* @li gamma: A Tensor. Must be one of the following types: float16, float32.
-* @li beta: A Tensor. Must be one of the following types: float16, float32.
-
-*@par Attributes:
-* @li begin_norm_axis: A required attribute, the type is int32.
-* @li begin_params_axis: A required attribute,the type is int32.
-
-*@par Outputs:
-*Three outputs, including:
-* @li y: A Tensor. Must be one of the following types: float16, float32.
-* @li mean: A Tensor. Must be one of the following types: float16, float32.
-* @li variance: A Tensor. Must be one of the following types: float16, float32.
-*/
-REG_OP(LayerNorm)
-    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .ATTR(begin_norm_axis, Int, 0)
-    .ATTR(begin_params_axis, Int, 0)
-    .OP_END_FACTORY_REG(LayerNorm)
-
-/**
-*@brief LayerNormGrad operator interface implementation
-*  calculating: dy, x, variance, mean, gamma
-*  pd_xl = data_dy*data_gamma
-*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
-*           np.power((data_variance + EPSLON), (-1.5))),
-*           reduce_axis, keepdims=True)
-*  pd_mean = np.sum(((-1.0)*pd_xl
-*            np.power((data_variance + EPSLON), (-0.5))),
-*            reduce_axis, keepdims=True)
-*            + pd_var*(1.0/m)
-*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
-*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
-*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
-*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
-*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
-*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
-
-*@par Inputs:
-*Three inputs, including:
-* @li dy: A Tensor. Must be one of the following types: float16, float32.
-* @li x: A Tensor. Must be one of the following types: float16, float32.
-* @li variance: A Tensor. Must be one of the following types: float16, float32.
-* @li mean: A Tensor. Must be one of the following types: float16, float32.
-* @li gamma: A Tensor. Must be one of the following types: float16, float32.
-
-*@par Outputs:
-*Three outputs, including:
-* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
-* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
-* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
-*/
-REG_OP(LayerNormGrad)
-    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OP_END_FACTORY_REG(LayerNormGrad)
-
-/**
-*@brief LayerNormXBackprop operator interface implementation
-*  calculating: dy, x, variance, mean, gamma
-*  pd_xl = data_dy*data_gamma
-*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
-*           np.power((data_variance + EPSLON), (-1.5))),
-*           reduce_axis, keepdims=True)
-*  pd_mean = np.sum(((-1.0)*pd_xl
-*            np.power((data_variance + EPSLON), (-0.5))),
-*            reduce_axis, keepdims=True)
-*            + pd_var*(1.0/m)
-*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
-*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
-*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
-*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
-*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
-*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
-
-*@par Inputs:
-*Three inputs, including:
-* @li dy: A Tensor. Must be one of the following types: float16, float32.
-* @li x: A Tensor. Must be one of the following types: float16, float32.
-* @li variance: A Tensor. Must be one of the following types: float16, float32.
-* @li mean: A Tensor. Must be one of the following types: float16, float32.
-* @li gamma: A Tensor. Must be one of the following types: float16, float32.
-
-*@par Outputs:
-*Three outputs, including:
-* @li pd_x: A Tensor. Must be one of the following types: float16, float32.
-*/
-REG_OP(LayerNormXBackprop)
-    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OP_END_FACTORY_REG(LayerNormXBackprop)
-
-/**
-*@brief LayerNormBetaGammaBackprop operator interface implementation
-*  calculating: dy, x, variance, mean
-*  pd_xl = data_dy*data_gamma
-*  pd_var = np.sum(((-0.5)*pd_xl*(data_x - data_mean)
-*           np.power((data_variance + EPSLON), (-1.5))),
-*           reduce_axis, keepdims=True)
-*  pd_mean = np.sum(((-1.0)*pd_xl
-*            np.power((data_variance + EPSLON), (-0.5))),
-*            reduce_axis, keepdims=True)
-*            + pd_var*(1.0/m)
-*            np.sum(((-2.0)*(data_x - data_mean)), reduce_axis, keepdims=True)
-*  pd_x = pd_xl*np.power((data_variance + EPSLON), (-0.5)) +
-*         pd_var*(2.0/m)*(data_x - data_mean) + pd_mean*(1.0/m)
-*  pd_gamma = np.sum((data_dy*(data_x - data_mean)
-*             np.power((data_variance + EPSLON), (-0.5))), param_axis, keepdims=True)
-*  pd_beta = np.sum(data_dy, param_axis, keepdims=True)
-
-*@par Inputs:
-*Three inputs, including:
-* @li dy: A Tensor. Must be one of the following types: float16, float32.
-* @li x: A Tensor. Must be one of the following types: float16, float32.
-* @li variance: A Tensor. Must be one of the following types: float16, float32.
-* @li mean: A Tensor. Must be one of the following types: float16, float32.
-
-*@par Outputs:
-*Three outputs, including:
-* @li pd_gamma: A Tensor. Must be one of the following types: float16, float32.
-* @li pd_beta: A Tensor. Must be one of the following types: float16, float32.
-*/
-REG_OP(LayerNormBetaGammaBackprop)
-    .INPUT(dy, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(variance, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(mean, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_gamma, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(pd_beta, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .REQUIRED_ATTR(shape_gamma, ListInt)
-    .OP_END_FACTORY_REG(LayerNormBetaGammaBackprop)
-
-/**
-*@brief Return "output" according to the algorithm of dropout_do_mask: \n
-*  scale_x = x *(1 / keep_prob)
-*  output = select(mask == 1, scale_x, 0)
-
-*@par Inputs:
-*Three inputs, including: \n
-* @li x: A mutable Tensor. Must be one of the following types:
-*     float16, float32
-* @li mask: A mutable Tensor. Must met all of the following rules:
-*     shape of mask should be 1D.
-*     dtype of mask should be uint8.
-*     value of shape should met the following algorithm:
-*     value = (size(x) + 128 - 1) // 128 * 128 //8
-* @li keep_prob: A mutable Tensor. Must met all of the following rules:
-*     shape of "keep_prob" should be (1,) or [1,].
-*     Has the same type as "x".
-
-*@par Output:
-*y: A mutable Tensor. Has the same type as "x".
-*/
-REG_OP(DropOutDoMask)
-    .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .INPUT(mask, TensorType({DT_UINT8}))
-    .INPUT(keep_prob, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
-    .OP_END_FACTORY_REG(DropOutDoMask)
-
-}  // namespace ge
-
-#endif  // GE_OP_NN_OTHER_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 63fd59d6..97ca6dc0 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -17,6 +17,7 @@
 #ifndef GE_OP_TRAINING_OPS_H
 #define GE_OP_TRAINING_OPS_H
 
+#include "../../../inc/external/graph/operator_reg.h"
 #include "../graph/operator_reg.h"
 namespace ge {
 /**
@@ -110,6 +111,63 @@ REG_OP(ApplyMomentum)
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ApplyMomentum)
 
+/**
+*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
+
+*@par Inputs:
+* Five inputs, including:
+*@li var: An NCHW, NHWC, or ND Tensor of type float32.
+*@li accum: An NCHW, NHWC, or ND Tensor of type float32.
+*@li lr: An NCHW, NHWC, or ND Tensor of type float32.
+*@li grad: An NCHW, NHWC, or ND Tensor of type float32.
+*@li indices: An NCHW, NHWC, or ND Tensor of type float32.
+
+*@par Attributes:
+*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
+*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var".
+*/
+REG_OP(SparseApplyAdagrad)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(lr, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyAdagrad)
+
+/**
+*@brief Updates relevant entries in "var" and "accum" according to the adagrad scheme.
+
+*@par Inputs:
+* Four inputs, including:
+*@li var: An NCHW, NHWC, or ND Tensor of type float32.
+*@li accum: An NCHW, NHWC, or ND Tensor of type float32.
+*@li grad: An NCHW, NHWC, or ND Tensor of type float32.
+*@li indices: An NCHW, NHWC, or ND Tensor of type int32.
+
+*@par Attributes:
+*@li lr: Required, used for computation.
+*@li use_locking: An optional bool. Defaults to "False". If "True", the operation will be protected by a lock.
+*@li update_slots: An optional bool. Defaults to "True". If "True", the calcution will be different as "False".
+
+*@par Outputs:
+*var: A Tensor. Has the same type and format as input "var".
+*/
+REG_OP(SparseApplyAdagradD)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(lr, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyAdagradD)
+
+
 REG_OP(ApplyMomentumCCE)
     .INPUT(var, TensorType::NumberType())
     .INPUT(accum, TensorType::NumberType())
@@ -967,6 +1025,186 @@ REG_OP(LarsV2Update)
     .ATTR(use_clip, Bool, false)
     .OP_END_FACTORY_REG(LarsV2Update)
 
+/**
+* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+
+* @par Inputs:
+* Nine inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of var and accum.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+
+* @par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "accum" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention.
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var".
+*/
+REG_OP(SparseApplyFtrl)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(lr, TensorType({DT_FLOAT}))
+    .INPUT(l1, TensorType({DT_FLOAT}))
+    .INPUT(l2, TensorType({DT_FLOAT}))
+    .INPUT(lr_power, TensorType({DT_FLOAT}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrl)
+
+/**
+* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+
+* @par Inputs:
+* Nine inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of var and accum.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+
+* @par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "accum" tensors will be
+*     protected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention.
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var".
+*/
+REG_OP(SparseApplyFtrlD)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(lr, Float)
+    .REQUIRED_ATTR(l1, Float)
+    .REQUIRED_ATTR(l2, Float)
+    .REQUIRED_ATTR(lr_power, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrlD)
+
+/**
+* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+* That is for rows we have grad for, we update var, accum and linear
+
+* @par Inputs:
+* Ten inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of var and accum.
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+
+* @par Attributes:
+* use_locking: An optional bool. Defaults to "False".
+* If "True", updating of the "var" and "accum" tensors will be
+* rotected by a lock; otherwise the behavior is undefined,
+* but may exhibit less contention.
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var".
+*/
+REG_OP(SparseApplyFtrlV2)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .INPUT(lr, TensorType({DT_FLOAT}))
+    .INPUT(l1, TensorType({DT_FLOAT}))
+    .INPUT(l2, TensorType({DT_FLOAT}))
+    .INPUT(l2_shrinkage, TensorType({DT_FLOAT}))
+    .INPUT(lr_power, TensorType({DT_FLOAT}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrlV2)
+
+/**
+* @brief Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+* That is for rows we have grad for, we update var, accum and linear
+
+* @par Inputs:
+* Ten inputs, including:
+* @li var: A mutable Tensor. Must be of type TensorType::NumberType().
+*     Should be a Variable Tensor.
+* @li accum: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li linear: A mutable Tensor of the same type as "var".
+*     Should be a Variable Tensor.
+* @li grad: A Tensor of the same type as "var", for the gradient.
+* @li indices: A vector of indices into the first dimension of var and accum.
+
+* @par Attributes:
+* @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
+* @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
+* @li l2_shrinkage: A Tensor of the same type as "var", L2 shrinkage regulariation. Must be a scalar.
+* @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
+* @li use_locking: An optional bool. Defaults to "False".
+*     If "True", updating of the "var" and "accum" tensors will be
+*     rotected by a lock; otherwise the behavior is undefined,
+*     but may exhibit less contention.
+
+* @par Outputs:
+* var: A Tensor. Has the same type and format as input "var".
+*/
+REG_OP(SparseApplyFtrlV2D)
+    .INPUT(var, TensorType({DT_FLOAT}))
+    .INPUT(accum, TensorType({DT_FLOAT}))
+    .INPUT(linear, TensorType({DT_FLOAT}))
+    .INPUT(grad, TensorType({DT_FLOAT}))
+    .INPUT(indices, TensorType({DT_INT32}))
+    .OUTPUT(var, TensorType({DT_FLOAT}))
+    .REQUIRED_ATTR(lr, Float)
+    .REQUIRED_ATTR(l1, Float)
+    .REQUIRED_ATTR(l2, Float)
+    .REQUIRED_ATTR(l2_shrinkage, Float)
+    .REQUIRED_ATTR(lr_power, Float)
+    .ATTR(use_locking, Bool, false)
+    .OP_END_FACTORY_REG(SparseApplyFtrlV2D)
+
+/**
+*@brief Clean memory of workspace list.
+
+*@par Attributes:
+* @li automic_add_mem_size: sizes of workspaces.
+
+*/
+REG_OP(AtomicAddrClean)
+    .ATTR(automic_add_mem_size, ListInt, {})
+    .OP_END_FACTORY_REG(AtomicAddrClean)
 }  // namespace ge
 
 #endif // GE_OP_TRAINING_OPS_H
diff --git a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
index f2ed3104..eedd1c4c 100644
--- a/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
+++ b/third_party/fwkacllib/inc/ops/npu_loss_scale_ops.h
@@ -33,15 +33,39 @@ REG_OP(NPUGetFloatStatusOperator)
     .OUTPUT(data, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(NPUGetFloatStatusOperator)
 
+/**
+*@brief Produces a variable with 0 in memory.
+
+*@par Outputs:
+*y: A Tensor of type int32, output eight numbers with a value of zero.
+*/
 REG_OP(NPUAllocFloatStatus)
     .OUTPUT(data, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(NPUAllocFloatStatus)
 
+/**
+*@brief Set the value of address 0x40000 to 0 in each core.
+
+*@par Inputs:
+*@li addr: A tensor of type float32.
+
+*@par Outputs:
+*data: A Tensor of type float32.
+*/
 REG_OP(NPUClearFloatStatus)
     .INPUT(addr, TensorType{DT_FLOAT})
     .OUTPUT(data, TensorType({DT_FLOAT}))
     .OP_END_FACTORY_REG(NPUClearFloatStatus)
 
+/**
+*@brief Get the value of address 0x40000.
+
+*@par Inputs:
+*@li addr: A tensor of type float32.
+
+*@par Outputs:
+*data: A Tensor of type float32.
+*/
 REG_OP(NPUGetFloatStatus)
     .INPUT(addr, TensorType{DT_FLOAT})
     .OUTPUT(data, TensorType({DT_FLOAT}))
diff --git a/third_party/fwkacllib/inc/ops/reduce_ops.h b/third_party/fwkacllib/inc/ops/reduce_ops.h
index daf82c51..d7882df3 100644
--- a/third_party/fwkacllib/inc/ops/reduce_ops.h
+++ b/third_party/fwkacllib/inc/ops/reduce_ops.h
@@ -153,6 +153,20 @@ REG_OP(ReduceAll)
     .ATTR(keep_dims, Bool, false)
     .OP_END_FACTORY_REG(ReduceAll)
 
+/**
+*@brief  Reduce a tensor on a certain axis based on product..
+
+*@par Inputs:
+*Two inputs, including:
+*@li x: A mutable Tensor. Must be the type of NumberType.
+*@li axis: A mutable Tensor. The dimensions to reduce.
+
+*@par Attributes:
+*@li keep_dims: A bool. If true, retains reduced dimensions with length 1. Defaults to "False".
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x".
+*/
 REG_OP(ReduceProd)
     .INPUT(x,TensorType::NumberType())
     .INPUT(axis, TensorType::IndexNumberType())
@@ -160,6 +174,23 @@ REG_OP(ReduceProd)
     .ATTR(keep_dims, Bool, false)
     .OP_END_FACTORY_REG(ReduceProd)
 
+/**
+*@brief Computes the product of elements across dimensions of a tensor.
+
+*@par Inputs:
+* One input: \n
+*x: A Tensor. Must be one of the following types: float16, float, int8, uint8.
+
+*@par Attributes:
+*@li axis: A required int8, int16, int32, or int64. Specifies the dimensions to reduce. No default value.
+*@li keep_dims: An optional bool. If "True", retains reduced dimensions with length 1. Defaults to "False".
+
+*@par Outputs:
+*y: A Tensor. Has the same type and format as input "x".
+
+*@attention Constraints:
+* "keep_dims" is in the range [-rank(input_tensor), rank(input_tensor)].
+*/
 REG_OP(ReduceProdD)
     .INPUT(x,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16}))
     .OUTPUT(y,TensorType({DT_FLOAT, DT_UINT8, DT_INT8, DT_INT32, DT_FLOAT16}))
diff --git a/third_party/fwkacllib/inc/ops/basic_lstm_cell.h b/third_party/fwkacllib/inc/ops/rnn.h
similarity index 98%
rename from third_party/fwkacllib/inc/ops/basic_lstm_cell.h
rename to third_party/fwkacllib/inc/ops/rnn.h
index 68267fdb..8b0157fb 100644
--- a/third_party/fwkacllib/inc/ops/basic_lstm_cell.h
+++ b/third_party/fwkacllib/inc/ops/rnn.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef GE_OP_BASIC_LSTM_CELL_H
-#define GE_OP_BASIC_LSTM_CELL_H
+#ifndef GE_OP_RNN_H
+#define GE_OP_RNN_H
 
 #include "../graph/operator_reg.h"
 
@@ -151,4 +151,4 @@ REG_OP(BasicLSTMCellCStateGrad)
     .OP_END_FACTORY_REG(BasicLSTMCellCStateGrad)
 }  // namespace ge
 
-#endif  // GE_OP_BASIC_LSTM_CELL_H
+#endif  // GE_OP_RNN_H
diff --git a/third_party/fwkacllib/inc/ops/rpn_ops.h b/third_party/fwkacllib/inc/ops/rpn_ops.h
index 1c45f1af..29c0fbc9 100644
--- a/third_party/fwkacllib/inc/ops/rpn_ops.h
+++ b/third_party/fwkacllib/inc/ops/rpn_ops.h
@@ -19,6 +19,32 @@
 
 #include "../graph/operator_reg.h"
 namespace ge {
+/**
+*@brief Iteratively removes lower scoring boxes which have an IoU greater than
+* iou_threshold with higher scoring box according to their
+* intersection-over-union (IoU).
+
+*@par Input:
+* @li box_scores: 2-D tensor with shape of [N, 8], including proposal boxes and
+* corresponding confidence scores.
+
+* @par Attributes:
+* @li iou_threshold: An optional float. The threshold for deciding whether boxes
+* overlap too much with respect to IOU.
+
+* @par Outputs:
+* @li selected_boxes: 2-D tensor with shape of [N,5], representing filtered
+* boxes including proposal boxes and corresponding confidence scores.
+* @li selected_idx: 1-D tensor with shape of [N], representing the index of
+* input proposal boxes.
+* @li selected_mask: 1-D tensor with shape of [N], the symbol judging whether
+* the output proposal boxes is valid.
+
+* @attention Constraints:
+* The 2nd-dim of input box_scores must be equal to 8.\n
+* Only supports 2864 input boxes at one time.\n
+
+*/
 REG_OP(NMSWithMask)
     .INPUT(box_scores, TensorType({DT_FLOAT, DT_FLOAT16}))
     .OUTPUT(selected_boxes, TensorType({DT_FLOAT, DT_FLOAT16}))
diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h
index 52b04f7f..07201762 100644
--- a/third_party/fwkacllib/inc/runtime/event.h
+++ b/third_party/fwkacllib/inc/runtime/event.h
@@ -108,28 +108,6 @@ RTS_API rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event);
  */
 RTS_API rtError_t rtNameEvent(rtEvent_t event_, const char *name);
 
-/**
- * @ingroup dvrt_event
- * @brief make event shared interprocess and assigned a name
- * @param [in] event  event to be shared
- * @param [in] name   identification name
- * @return RT_ERROR_NONE for ok
- * @return RT_ERROR_INVALID_VALUE for error input
- * @return RT_ERROR_INVALID_RESOURCE_HANDLE for invalid resource handle
- */
-RTS_API rtError_t rtIpcSetEventName(rtEvent_t event, char *name, uint32_t len);
-
-/**
- * @ingroup dvrt_event
- * @brief open a interprocess shared event
- * @param [in|out] event    event to be opened
- * @param [in] name   identification name
- * @return RT_ERROR_NONE for ok
- * @return RT_ERROR_INVALID_VALUE for error input of ptr, name
- * @return RT_ERROR_DRV_ERR for driver error
- */
-RTS_API rtError_t rtIpcOpenEvent(rtEvent_t *event, const char *name);
-
 /**
  * @ingroup dvrt_event
  * @brief Create a notify
diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h
index ed076a8b..a2c75bd3 100644
--- a/third_party/fwkacllib/inc/runtime/kernel.h
+++ b/third_party/fwkacllib/inc/runtime/kernel.h
@@ -24,7 +24,6 @@
 extern "C" {
 #endif  // __cplusplus
 
-/*lint -e148*/
 /**
  * @ingroup rt_kernel
  * @brief shared memory data control
@@ -41,7 +40,6 @@ typedef struct tagRtSmData {
   uint8_t reserved[2];             // reserved
 } rtSmData_t;
 
-/*lint -e148*/
 /**
  * @ingroup rt_kernel
  * @brief shared memory description
diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h
index 65c6dc61..ab740b11 100644
--- a/third_party/fwkacllib/inc/runtime/mem.h
+++ b/third_party/fwkacllib/inc/runtime/mem.h
@@ -17,9 +17,7 @@
 #ifndef __CCE_RUNTIME_MEM_H__
 #define __CCE_RUNTIME_MEM_H__
 
-/*lint -e7*/
 #include <stddef.h>
-/*lint +e7*/
 #include "base.h"
 #include "config.h"
 #include "stream.h"
diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h
index 8fe94424..c41a5a25 100644
--- a/third_party/fwkacllib/inc/runtime/rt_model.h
+++ b/third_party/fwkacllib/inc/runtime/rt_model.h
@@ -92,7 +92,6 @@ typedef struct tagAicpuModelInfo {
   uint64_t aicpuTaskPtr;
 } rtAicpuModelInfo_t;
 
-/* lint -e148 */
 typedef struct tagKernelTaskInfo {
   uint16_t blockDim;
   uint16_t argsCount;
@@ -102,7 +101,7 @@ typedef struct tagKernelTaskInfo {
   uint8_t *smDesc;
   uint8_t *args;
   uint16_t *argsOffset;
-} rtKernelTaskInfo_t; /* lint +e148 */
+} rtKernelTaskInfo_t;
 
 typedef struct tagKernelTaskInfoEx {
   uint32_t flags;
diff --git a/third_party/fwkacllib/inc/tdt/data_common.h b/third_party/fwkacllib/inc/tdt/data_common.h
index 81f79346..14145a60 100644
--- a/third_party/fwkacllib/inc/tdt/data_common.h
+++ b/third_party/fwkacllib/inc/tdt/data_common.h
@@ -32,6 +32,7 @@ enum TdtDataType {
   TDT_DATA_LABEL,      /**< Data label*/
   TDT_END_OF_SEQUENCE, /**< End of Sequence*/
   TDT_TENSOR,          /**< Tensor*/
+  TDT_ABNORMAL,        /**< ABNORMAL*/
   TDT_DATATYPE_MAX     /**< Max*/
 };
 #endif