From 666a9179562a65cb503e29fd75feba417a791dd8 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sat, 12 Dec 2020 17:58:28 +0800
Subject: [PATCH 01/11] Aicpu check op type

---
 ge/graph/build/model_builder.cc               |  44 +++++
 ge/graph/build/model_builder.h                |   2 +
 .../load/new_model_manager/davinci_model.cc   |   2 +
 .../load/new_model_manager/model_manager.cc   | 170 ++++++++++++++++++
 .../load/new_model_manager/model_manager.h    |   4 +
 ge/hybrid/model/hybrid_model_builder.cc       |  26 +++
 ge/hybrid/model/hybrid_model_builder.h        |   1 +
 7 files changed, 249 insertions(+)

diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 37eb499a..3c36d4ee 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -581,9 +581,15 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
   // Add TBE Kernels and custom aicpu op bin
   std::set<std::string> tbe_name_set;
   std::set<std::string> aicpu_name_set;
+  std::vector<std::string> aicpu_optype_list;
+  std::vector<std::string> aicpu_tf_optype_list;
+  std::set<std::string> aicpu_optype_set;
+  std::set<std::string> aicpu_tf_optype_set;
   for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
     auto node_op_desc = n->GetOpDesc();
     GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
+    // check aicpu op type
+    (void)CheckAicpuOp(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set);
     TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
     if (tbe_kernel == nullptr) {
       std::string kernel_name;
@@ -605,6 +611,25 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
     tbe_kernel_store_.AddTBEKernel(tbe_kernel);
   }
 
+  if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) {
+    GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
+    aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
+  }
+
+  if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) {
+    GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
+    aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
+  }
+
+  // reset list with set
+  aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
+  aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
+  GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.",
+         compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size());
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail.");
+
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail.");
+
   for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
     auto node_op_desc = n->GetOpDesc();
     GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
@@ -796,4 +821,23 @@ Status ModelBuilder::CompileSingleOp() {
   GE_TIMESTAMP_CALLNUM_END(BatchCompileOp, "GraphBuild::CompileOp");
   return ge::SUCCESS;
 }
+
+Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set) {
+  GE_CHECK_NOTNULL(op_desc);
+  std::string aicpu_optype;
+  bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
+  std::vector<std::string> tf_optypes;
+  bool has_attr_check_tf =  ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes);
+  if (has_attr_check_cpu && !aicpu_optype.empty()) {
+    GELOGI("Check Aicpu op type %s, op name: %s.", op_desc->GetType().c_str(), op_desc->GetName().c_str());
+    cpue_check_set.insert(aicpu_optype);
+  }
+
+  if (has_attr_check_tf && !tf_optypes.empty()) {
+    GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(), op_desc->GetName().c_str(), tf_optypes.size());
+    tf_engine_set.insert(tf_optypes.begin(), tf_optypes.end());
+  }
+  
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h
index e75521c7..8efd76e9 100644
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -83,6 +83,8 @@ class ModelBuilder {
 
   Status CompileSingleOp();
 
+  Status CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set);
+
   uint64_t session_id_;
 
   map<int64_t, size_t> mem_type_to_mem_offset_;
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index bc755e07..fceadc32 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -485,6 +485,8 @@ Status DavinciModel::DoTaskSink() {
 
   GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
 
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOp(ge_model_), "Check aicpu op type failed.");
+
   GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");
 
   GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index b595ac39..7a7afe5d 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -18,6 +18,7 @@
 
 #include <string>
 
+#include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
 #include "common/dump/dump_manager.h"
 #include "common/l2_cache_optimize.h"
 #include "common/profiling/profiling_manager.h"
@@ -30,6 +31,7 @@
 #include "graph/load/new_model_manager/davinci_model_parser.h"
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
+#include "graph/utils/attr_utils.h"
 #include "common/formats/utils/formats_trans_utils.h"
 #include "hybrid/hybrid_davinci_model.h"
 
@@ -1529,4 +1531,172 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
   return SUCCESS;
 }
 
+Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list, std::vector<std::string> &aicpu_tf_optype_list) {
+  std::string kernel_name = "checkOpType";
+  GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str());
+  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
+  std::vector<SysOpInfo> req_aicpu_op_info_list;
+  std::vector<SysOpInfo> res_aicpu_op_info_list;
+  std::vector<ReturnCode> res_ret_code_list;
+
+  if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) {
+    GELOGI("No need to check aicpu optype.");
+    return SUCCESS;
+  }
+
+  vector<void *> allocated_mem;
+  rtError_t status;
+  rtStream_t stream = nullptr;
+  void *args = nullptr;
+
+  void *d_req_op_list = nullptr;
+  void *d_res_op_list = nullptr;
+  void *d_ret_code_list = nullptr;
+
+  size_t aicpu_op_nums = aicpu_optype_list.size();
+  size_t tf_op_nums = aicpu_tf_optype_list.size();
+  size_t op_nums =  aicpu_op_nums + tf_op_nums;
+  // malloc sysOpInfoList in SysOpCheckInfo
+  status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
+  if (status != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+    return RT_ERROR_TO_GE_STATUS(status);
+  }
+  allocated_mem.push_back(d_req_op_list);
+
+  // malloc sysOpInfoList in SysOpCheckResp
+  status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
+  if (status != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+    return RT_ERROR_TO_GE_STATUS(status);
+  }
+  allocated_mem.push_back(d_res_op_list);
+
+  // malloc returnCodeList in SysOpCheckResp
+  status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
+  if (status != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+    return RT_ERROR_TO_GE_STATUS(status);
+  }
+  allocated_mem.push_back(d_ret_code_list);
+
+  for (const auto &op_type : aicpu_optype_list) {
+    SysOpInfo op_info;
+    // malloc op_type name in SysOpInfo
+    void *d_op_type_name = nullptr;
+    status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
+    if (status != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+      return RT_ERROR_TO_GE_STATUS(status);
+    }
+    allocated_mem.push_back(d_op_type_name);
+    GE_CHK_RT(rtMemcpy(d_op_type_name,  op_type.length(), op_type.c_str(),  op_type.length(), RT_MEMCPY_HOST_TO_DEVICE));
+    op_info.opType = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
+    op_info.opLen = op_type.length();
+    op_info.kernelsType = CPU_KERNEL;
+    req_aicpu_op_info_list.emplace_back(op_info);
+    //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo),  sizeof(SysOpInfo), &op_info,  sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE));
+  }
+
+  for (const auto &op_type : aicpu_tf_optype_list) {
+    SysOpInfo op_info;
+    // malloc op_type name in SysOpInfo
+    void *d_op_type_name = nullptr;
+    status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
+    if (status != RT_ERROR_NONE) {
+      GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+      return RT_ERROR_TO_GE_STATUS(status);
+    }
+    allocated_mem.push_back(d_op_type_name);
+    GE_CHK_RT(rtMemcpy(d_op_type_name,  op_type.size(), op_type.c_str(),  op_type.size(), RT_MEMCPY_HOST_TO_DEVICE));
+    op_info.opType = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
+    op_info.opLen = op_type.size();
+    op_info.kernelsType = TF_KERNEL;
+    req_aicpu_op_info_list.emplace_back(op_info);
+    //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo),  sizeof(SysOpInfo), &op_info,  sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE));
+  }
+
+  GE_CHK_RT(rtMemcpy(d_req_op_list,  sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),  sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));
+
+  SysOpCheckInfo op_check_info_req;
+  SysOpCheckResp op_check_info_res;
+  op_check_info_req.opListNum = op_nums;
+  op_check_info_req.offSetLen = sizeof(SysOpCheckInfo);
+  op_check_info_req.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list));
+
+  op_check_info_res.opListNum = op_nums;
+  op_check_info_res.returnCodeList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list));
+  op_check_info_res.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list));
+
+  uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp);
+  status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
+  if (status != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
+    return RT_ERROR_TO_GE_STATUS(status);
+  }
+  allocated_mem.push_back(args);
+  GE_CHK_RT(rtMemcpy(args,  sizeof(SysOpCheckInfo), &op_check_info_req,  sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen),  sizeof(SysOpCheckResp), &op_check_info_res,  sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
+
+  GE_CHK_RT(rtStreamCreate(&stream, 0));
+  GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));
+
+  status = rtStreamSynchronize(stream);
+  if (status != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
+    return RT_ERROR_TO_GE_STATUS(status);
+  }
+
+  // Check the response
+  void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen);
+  op_check_info_res = {};
+  GE_CHK_RT(rtMemcpy(&op_check_info_res,  sizeof(SysOpCheckResp), d_op_check_info_res,  sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST));
+  std::function<void()> callback = [&]() {
+    for (auto mem : allocated_mem) {
+      GE_CHK_RT(rtFree(mem));
+    }
+    GE_CHK_RT(rtStreamDestroy(stream));
+  };
+
+  uint64_t res_op_nums = op_check_info_res.opListNum;
+  if (res_op_nums != 0) {
+    GE_CHK_RT(rtMemcpy(res_ret_code_list.data(),  sizeof(ReturnCode) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)),  sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
+    GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(),  sizeof(SysOpInfo) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),  sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
+    if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
+      GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
+      GE_MAKE_GUARD(release, callback);
+    }
+    std::string fail_reason = "Check aicpu op_type failed. details: ";
+    for (uint32_t i = 0; i < res_op_nums; i++) {
+      ReturnCode ret_code = res_ret_code_list.at(i);
+      SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
+      std::vector<char> op_name;
+      GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
+      std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
+      string op_name_str(op_name.data());
+      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " " + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> not support.";
+    }
+    GELOGE(FAILED, "%s", fail_reason.c_str());
+    GE_MAKE_GUARD(release, callback);
+    return FAILED;
+  }
+
+  GE_MAKE_GUARD(release, callback);
+  GELOGI("Cpu kernel launch check optype task success.");
+  return SUCCESS;
+}
+
+Status ModelManager::CheckAicpuOp(GeModelPtr ge_model) {
+  std::vector<std::string> aicpu_optype_list;
+  std::vector<std::string> aicpu_tf_optype_list;
+  bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
+  bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
+  if (!aicpu_need_check  && !tf_need_check) {
+    GELOGI("No need to check aicpu optype.");
+    return SUCCESS;
+  }
+  GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed.");
+  return SUCCESS;
+}
+
 }  // namespace ge
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index fc98d9c2..4463b3ef 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -295,6 +295,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   ge::Status LaunchKernelCustAicpuSo(const string &kernel_name);
 
+  ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list, std::vector<std::string> &aicpu_tf_optype_list);
+
+  ge::Status CheckAicpuOp(GeModelPtr ge_model);
+
   ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
 
   ge::Status GenSessionId(uint64_t &session_id);
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index d519c35b..e1c35008 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -21,6 +21,7 @@
 #include "graph/build/memory/var_mem_assign_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/model_utils.h"
+#include "graph/load/new_model_manager/model_manager.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/host_mem_manager.h"
 #include "graph/manager/trans_var_data_utils.h"
@@ -921,6 +922,7 @@ Status HybridModelBuilder::InitWeights() {
 }
 
 Status HybridModelBuilder::LoadTasks() {
+  GE_CHK_STATUS_RET(CheckAicpuOp(), "Check Aicpu op failed.");
   for (auto &it : hybrid_model_.node_items_) {
     auto &node_item = it.second;
     auto &node_ptr = node_item->node;
@@ -1557,5 +1559,29 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,
 
   return SUCCESS;
 }
+
+Status HybridModelBuilder::CheckAicpuOp() {
+  std::vector<std::string> aicpu_optype_list;
+  std::vector<std::string> aicpu_tf_optype_list;
+  std::set<std::string> aicpu_optype_set;
+  std::set<std::string> aicpu_tf_optype_set;
+  const auto &root_graph = ge_root_model_->GetRootGraph();
+  for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
+    auto &name = it.first;
+    auto &ge_model = it.second;
+    GE_CHECK_NOTNULL(ge_model);
+    if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) {
+      aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
+    }
+
+    if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) {
+      aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
+    }
+  }
+  // reset list with set
+  aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
+  aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed.");
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h
index b90ec982..a549a9f1 100644
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -78,6 +78,7 @@ class HybridModelBuilder {
   Status ParseVarOutputs(NodeItem &node_item);
   Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
   Status RecoverGraphUnknownFlag();
+  Status CheckAicpuOp();
 
   const char* GetGraphName() const {
     return hybrid_model_.model_name_.c_str();

From b1c70681ec2711f6990fe134ac67187c716bf690 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sat, 12 Dec 2020 18:54:04 +0800
Subject: [PATCH 02/11] Update some diff from yellow zone

---
 ge/graph/build/model_builder.cc               | 53 ++++++++++---------
 ge/graph/build/model_builder.h                |  4 +-
 .../load/new_model_manager/davinci_model.cc   |  2 +-
 .../load/new_model_manager/model_manager.cc   | 31 ++++++++---
 .../load/new_model_manager/model_manager.h    |  2 +-
 5 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 3c36d4ee..1eb758c3 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -581,15 +581,11 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
   // Add TBE Kernels and custom aicpu op bin
   std::set<std::string> tbe_name_set;
   std::set<std::string> aicpu_name_set;
-  std::vector<std::string> aicpu_optype_list;
-  std::vector<std::string> aicpu_tf_optype_list;
-  std::set<std::string> aicpu_optype_set;
-  std::set<std::string> aicpu_tf_optype_set;
   for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
     auto node_op_desc = n->GetOpDesc();
     GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
     // check aicpu op type
-    (void)CheckAicpuOp(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set);
+    (void)CollectCheckAicpuAttr(node_op_desc, aicpu_optype_set, aicpu_tf_optype_set);
     TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
     if (tbe_kernel == nullptr) {
       std::string kernel_name;
@@ -611,24 +607,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
     tbe_kernel_store_.AddTBEKernel(tbe_kernel);
   }
 
-  if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) {
-    GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
-    aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
-  }
-
-  if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) {
-    GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
-    aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
-  }
-
-  // reset list with set
-  aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
-  aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
-  GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.",
-         compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size());
-  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail.");
-
-  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail.");
+  (void)SetModelAicpuCheckAttr(model);
 
   for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
     auto node_op_desc = n->GetOpDesc();
@@ -822,7 +801,7 @@ Status ModelBuilder::CompileSingleOp() {
   return ge::SUCCESS;
 }
 
-Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set) {
+Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set) {
   GE_CHECK_NOTNULL(op_desc);
   std::string aicpu_optype;
   bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
@@ -840,4 +819,30 @@ Status ModelBuilder::CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string
   
   return SUCCESS;
 }
+
+Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model) {
+  std::vector<std::string> aicpu_optype_list;
+  std::vector<std::string> aicpu_tf_optype_list;
+  std::set<std::string> aicpu_optype_set;
+  std::set<std::string> aicpu_tf_optype_set;
+  if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
+    GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
+    aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
+  }
+
+  if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) {
+    GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
+    aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
+  }
+
+  // reset list with set
+  aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
+  aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
+  GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.",
+         compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size());
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail.");
+
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail.");
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h
index 8efd76e9..e4dfbf0f 100644
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -83,7 +83,9 @@ class ModelBuilder {
 
   Status CompileSingleOp();
 
-  Status CheckAicpuOp(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set);
+  Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set);
+
+  Status SetModelCheckAicpuAttr(ge::Model &model);
 
   uint64_t session_id_;
 
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index fceadc32..d1ea833a 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -485,7 +485,7 @@ Status DavinciModel::DoTaskSink() {
 
   GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
 
-  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOp(ge_model_), "Check aicpu op type failed.");
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");
 
   GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");
 
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 7a7afe5d..5973b030 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -54,6 +54,7 @@ const char *const kDeleteCustOp = "deleteCustOp";
 const int kTimeSpecNano = 1000000000;
 const int kTimeSpecMiro = 1000000;
 const int kSessionMaxBias = 100;
+const int kOpNameMaxSize = 100;
 struct CustAicpuSoBuf {
   uint64_t kernelSoBuf;
   uint32_t kernelSoBufLen;
@@ -1540,7 +1541,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   std::vector<ReturnCode> res_ret_code_list;
 
   if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) {
-    GELOGI("No need to check aicpu optype.");
+    GELOGI("No need to check aicpu op type.");
     return SUCCESS;
   }
 
@@ -1624,7 +1625,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   op_check_info_req.offSetLen = sizeof(SysOpCheckInfo);
   op_check_info_req.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list));
 
-  op_check_info_res.opListNum = op_nums;
+  op_check_info_res.opListNum = 0;
   op_check_info_res.returnCodeList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list));
   op_check_info_res.sysOpInfoList = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list));
 
@@ -1636,7 +1637,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   }
   allocated_mem.push_back(args);
   GE_CHK_RT(rtMemcpy(args,  sizeof(SysOpCheckInfo), &op_check_info_req,  sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
-  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen),  sizeof(SysOpCheckResp), &op_check_info_res,  sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen),  sizeof(SysOpCheckResp), &op_check_info_res,  sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
 
   GE_CHK_RT(rtStreamCreate(&stream, 0));
   GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));
@@ -1648,8 +1649,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   }
 
   // Check the response
-  void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen);
-  op_check_info_res = {};
+  void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen);
   GE_CHK_RT(rtMemcpy(&op_check_info_res,  sizeof(SysOpCheckResp), d_op_check_info_res,  sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST));
   std::function<void()> callback = [&]() {
     for (auto mem : allocated_mem) {
@@ -1658,25 +1658,40 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     GE_CHK_RT(rtStreamDestroy(stream));
   };
 
+  if (op_check_info_res.isWithoutJson) {
+    GELOGI("No need to check aicpu in this scenoria.");
+    GE_MAKE_GUARD(release, callback);
+    return SUCCESS;
+  }
   uint64_t res_op_nums = op_check_info_res.opListNum;
+  GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums);
   if (res_op_nums != 0) {
+    res_ret_code_list.clear();
+    res_ret_code_list.resize(res_op_nums);
+    res_aicpu_op_info_list.clear();
+    res_aicpu_op_info_list.resize(res_op_nums);
     GE_CHK_RT(rtMemcpy(res_ret_code_list.data(),  sizeof(ReturnCode) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)),  sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
     GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(),  sizeof(SysOpInfo) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),  sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
     if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
       GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
       GE_MAKE_GUARD(release, callback);
+      return FAILED;
     }
-    std::string fail_reason = "Check aicpu op_type failed. details: ";
+    std::string fail_reason;
     for (uint32_t i = 0; i < res_op_nums; i++) {
       ReturnCode ret_code = res_ret_code_list.at(i);
       SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
+      GELOGI("Not surpport aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
       std::vector<char> op_name;
+      op_name.clear();
+      op_name.resize(kOpNameMaxSize);
       GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
       std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
       string op_name_str(op_name.data());
-      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + " " + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> not support.";
+      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + "  ret code:" + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n";
     }
-    GELOGE(FAILED, "%s", fail_reason.c_str());
+    fail_reason += "not support.";
+    GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
     GE_MAKE_GUARD(release, callback);
     return FAILED;
   }
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index 4463b3ef..f1f404b5 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -297,7 +297,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list, std::vector<std::string> &aicpu_tf_optype_list);
 
-  ge::Status CheckAicpuOp(GeModelPtr ge_model);
+  ge::Status CheckAicpuOpList(GeModelPtr ge_model);
 
   ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);
 

From 72e7927e3d817fea5b345281e466a67baa4f0915 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sat, 12 Dec 2020 19:03:15 +0800
Subject: [PATCH 03/11] update some code

---
 ge/graph/load/new_model_manager/model_manager.cc |  4 ++--
 ge/hybrid/model/hybrid_model_builder.cc          | 10 +++++-----
 ge/hybrid/model/hybrid_model_builder.h           |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 5973b030..7d776eb7 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1681,14 +1681,14 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     for (uint32_t i = 0; i < res_op_nums; i++) {
       ReturnCode ret_code = res_ret_code_list.at(i);
       SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
-      GELOGI("Not surpport aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
+      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
       std::vector<char> op_name;
       op_name.clear();
       op_name.resize(kOpNameMaxSize);
       GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
       std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
       string op_name_str(op_name.data());
-      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + "  ret code:" + to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n";
+      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + "  ret code:" + std::to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n";
     }
     fail_reason += "not support.";
     GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index e1c35008..3ffb2dc6 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -922,7 +922,7 @@ Status HybridModelBuilder::InitWeights() {
 }
 
 Status HybridModelBuilder::LoadTasks() {
-  GE_CHK_STATUS_RET(CheckAicpuOp(), "Check Aicpu op failed.");
+  GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed.");
   for (auto &it : hybrid_model_.node_items_) {
     auto &node_item = it.second;
     auto &node_ptr = node_item->node;
@@ -1560,21 +1560,20 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,
   return SUCCESS;
 }
 
-Status HybridModelBuilder::CheckAicpuOp() {
+Status HybridModelBuilder::CheckAicpuOpList() {
   std::vector<std::string> aicpu_optype_list;
   std::vector<std::string> aicpu_tf_optype_list;
   std::set<std::string> aicpu_optype_set;
   std::set<std::string> aicpu_tf_optype_set;
-  const auto &root_graph = ge_root_model_->GetRootGraph();
   for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
     auto &name = it.first;
     auto &ge_model = it.second;
     GE_CHECK_NOTNULL(ge_model);
-    if (ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list)) {
+    if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) {
       aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
     }
 
-    if (ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list)) {
+    if (ge::AttrUtils::GetListStr(*ge_model, "needCheckTf", aicpu_tf_optype_list)) {
       aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
     }
   }
@@ -1582,6 +1581,7 @@ Status HybridModelBuilder::CheckAicpuOp() {
   aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
   aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
   GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed.");
+  return SUCCESS;
 }
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h
index a549a9f1..bb349d86 100644
--- a/ge/hybrid/model/hybrid_model_builder.h
+++ b/ge/hybrid/model/hybrid_model_builder.h
@@ -78,7 +78,7 @@ class HybridModelBuilder {
   Status ParseVarOutputs(NodeItem &node_item);
   Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
   Status RecoverGraphUnknownFlag();
-  Status CheckAicpuOp();
+  Status CheckAicpuOpList();
 
   const char* GetGraphName() const {
     return hybrid_model_.model_name_.c_str();

From ee03aa5dec0711d13ce68bb7fb9473b4e05fbb1d Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sat, 12 Dec 2020 19:41:36 +0800
Subject: [PATCH 04/11] update aicpu model builder

---
 ge/graph/build/model_builder.cc                  | 8 ++++----
 ge/graph/build/model_builder.h                   | 2 +-
 ge/graph/load/new_model_manager/model_manager.cc | 2 +-
 ge/hybrid/model/hybrid_model_builder.cc          | 1 -
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 1eb758c3..b68b5f60 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -581,6 +581,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
   // Add TBE Kernels and custom aicpu op bin
   std::set<std::string> tbe_name_set;
   std::set<std::string> aicpu_name_set;
+  std::set<std::string> aicpu_optype_set;
+  std::set<std::string> aicpu_tf_optype_set;
   for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
     auto node_op_desc = n->GetOpDesc();
     GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
@@ -607,7 +609,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
     tbe_kernel_store_.AddTBEKernel(tbe_kernel);
   }
 
-  (void)SetModelAicpuCheckAttr(model);
+  (void)SetModelCheckAicpuAttr(model);
 
   for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
     auto node_op_desc = n->GetOpDesc();
@@ -820,11 +822,9 @@ Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<st
   return SUCCESS;
 }
 
-Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model) {
+Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_optype_set, std::set<std::string> &aicpu_tf_optype_set) {
   std::vector<std::string> aicpu_optype_list;
   std::vector<std::string> aicpu_tf_optype_list;
-  std::set<std::string> aicpu_optype_set;
-  std::set<std::string> aicpu_tf_optype_set;
   if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
     GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
     aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h
index e4dfbf0f..b133b98b 100644
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -85,7 +85,7 @@ class ModelBuilder {
 
   Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set);
 
-  Status SetModelCheckAicpuAttr(ge::Model &model);
+  Status SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_optype_set, std::set<std::string> &aicpu_tf_optype_set);
 
   uint64_t session_id_;
 
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 7d776eb7..fc0ebb8a 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1701,7 +1701,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
   return SUCCESS;
 }
 
-Status ModelManager::CheckAicpuOp(GeModelPtr ge_model) {
+Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
   std::vector<std::string> aicpu_optype_list;
   std::vector<std::string> aicpu_tf_optype_list;
   bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index 3ffb2dc6..e0d0913e 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -1566,7 +1566,6 @@ Status HybridModelBuilder::CheckAicpuOpList() {
   std::set<std::string> aicpu_optype_set;
   std::set<std::string> aicpu_tf_optype_set;
   for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
-    auto &name = it.first;
     auto &ge_model = it.second;
     GE_CHECK_NOTNULL(ge_model);
     if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) {

From a77f2c39bf4f00484826e45eb1f7d9a8e5987c52 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sat, 12 Dec 2020 19:47:25 +0800
Subject: [PATCH 05/11] add info log to check requs

---
 ge/graph/load/new_model_manager/model_manager.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index fc0ebb8a..30deb00b 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1616,7 +1616,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     req_aicpu_op_info_list.emplace_back(op_info);
     //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo),  sizeof(SysOpInfo), &op_info,  sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE));
   }
-
+  GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size());
   GE_CHK_RT(rtMemcpy(d_req_op_list,  sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),  sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));
 
   SysOpCheckInfo op_check_info_req;

From 80795addb1155350fee77a89eb06d7abe99b3e8f Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sat, 12 Dec 2020 20:04:19 +0800
Subject: [PATCH 06/11] update parameter

---
 ge/graph/build/model_builder.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index b68b5f60..b451b897 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -609,7 +609,7 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
     tbe_kernel_store_.AddTBEKernel(tbe_kernel);
   }
 
-  (void)SetModelCheckAicpuAttr(model);
+  (void)SetModelCheckAicpuAttr(model, aicpu_optype_set, aicpu_tf_optype_set);
 
   for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
     auto node_op_desc = n->GetOpDesc();

From aecc1c9b79a0c97a6cd87496791bbe6d378b7d10 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sun, 13 Dec 2020 15:10:29 +0800
Subject: [PATCH 07/11] update print log

---
 ge/graph/load/new_model_manager/model_manager.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 30deb00b..a6c70a78 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1707,7 +1707,7 @@ Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
   bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
   bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
   if (!aicpu_need_check  && !tf_need_check) {
-    GELOGI("No need to check aicpu optype.");
+    GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str());
     return SUCCESS;
   }
   GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed.");

From 78e31e9856b21a997f578ccff70f741aea2f0ddd Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Sun, 13 Dec 2020 16:31:54 +0800
Subject: [PATCH 08/11] update clang-formate

---
 ge/graph/build/model_builder.cc               | 28 +++++++----
 .../load/new_model_manager/model_manager.cc   | 49 ++++++++++++-------
 ge/hybrid/model/hybrid_model_builder.cc       |  3 +-
 3 files changed, 51 insertions(+), 29 deletions(-)

diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index b451b897..d639433e 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -803,26 +803,29 @@ Status ModelBuilder::CompileSingleOp() {
   return ge::SUCCESS;
 }
 
-Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set) {
+Status ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set,
+                                           std::set<std::string> &tf_engine_set) {
   GE_CHECK_NOTNULL(op_desc);
   std::string aicpu_optype;
   bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
   std::vector<std::string> tf_optypes;
-  bool has_attr_check_tf =  ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes);
+  bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes);
   if (has_attr_check_cpu && !aicpu_optype.empty()) {
     GELOGI("Check Aicpu op type %s, op name: %s.", op_desc->GetType().c_str(), op_desc->GetName().c_str());
     cpue_check_set.insert(aicpu_optype);
   }
 
   if (has_attr_check_tf && !tf_optypes.empty()) {
-    GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(), op_desc->GetName().c_str(), tf_optypes.size());
+    GELOGI("Check Tf op type %s, op name: %s, tf_optypes size: %zu.", op_desc->GetType().c_str(),
+           op_desc->GetName().c_str(), tf_optypes.size());
     tf_engine_set.insert(tf_optypes.begin(), tf_optypes.end());
   }
-  
+
   return SUCCESS;
 }
 
-Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_optype_set, std::set<std::string> &aicpu_tf_optype_set) {
+Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_optype_set,
+                                            std::set<std::string> &aicpu_tf_optype_set) {
   std::vector<std::string> aicpu_optype_list;
   std::vector<std::string> aicpu_tf_optype_list;
   if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
@@ -838,11 +841,16 @@ Status ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::stri
   // reset list with set
   aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
   aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
-  GELOGI("Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, aicpu_tf_optype_list:%zu.",
-         compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(), aicpu_tf_optype_list.size());
-  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED, "Set attr needCheckCpu fail.");
-
-  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED, "Set attr needCheckTf fail.");
+  GELOGI(
+      "Check Aicpu op types ComputeGraph: %s aicpu_optype_set: %zu, aicpu_optype_list: %zu, aicpu_tf_optype_set: %zu, "
+      "aicpu_tf_optype_list:%zu.",
+      compute_graph_->GetName().c_str(), aicpu_optype_set.size(), aicpu_optype_list.size(), aicpu_tf_optype_set.size(),
+      aicpu_tf_optype_list.size());
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return FAILED,
+                   "Set attr needCheckCpu fail.");
+
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return FAILED,
+                   "Set attr needCheckTf fail.");
   return SUCCESS;
 }
 }  // namespace ge
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index a6c70a78..99b47878 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1532,7 +1532,8 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
   return SUCCESS;
 }
 
-Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list, std::vector<std::string> &aicpu_tf_optype_list) {
+Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
+                                              std::vector<std::string> &aicpu_tf_optype_list) {
   std::string kernel_name = "checkOpType";
   GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str());
   std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
@@ -1556,7 +1557,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
 
   size_t aicpu_op_nums = aicpu_optype_list.size();
   size_t tf_op_nums = aicpu_tf_optype_list.size();
-  size_t op_nums =  aicpu_op_nums + tf_op_nums;
+  size_t op_nums = aicpu_op_nums + tf_op_nums;
   // malloc sysOpInfoList in SysOpCheckInfo
   status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
   if (status != RT_ERROR_NONE) {
@@ -1591,12 +1592,11 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
       return RT_ERROR_TO_GE_STATUS(status);
     }
     allocated_mem.push_back(d_op_type_name);
-    GE_CHK_RT(rtMemcpy(d_op_type_name,  op_type.length(), op_type.c_str(),  op_type.length(), RT_MEMCPY_HOST_TO_DEVICE));
+    GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE));
     op_info.opType = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
     op_info.opLen = op_type.length();
     op_info.kernelsType = CPU_KERNEL;
     req_aicpu_op_info_list.emplace_back(op_info);
-    //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo),  sizeof(SysOpInfo), &op_info,  sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE));
   }
 
   for (const auto &op_type : aicpu_tf_optype_list) {
@@ -1609,15 +1609,15 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
       return RT_ERROR_TO_GE_STATUS(status);
     }
     allocated_mem.push_back(d_op_type_name);
-    GE_CHK_RT(rtMemcpy(d_op_type_name,  op_type.size(), op_type.c_str(),  op_type.size(), RT_MEMCPY_HOST_TO_DEVICE));
+    GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE));
     op_info.opType = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
     op_info.opLen = op_type.size();
     op_info.kernelsType = TF_KERNEL;
     req_aicpu_op_info_list.emplace_back(op_info);
-    //GE_CHK_RT(rtMemcpy((char *)d_req_op_list + i * sizeof(SysOpInfo),  sizeof(SysOpInfo), &op_info,  sizeof(SysOpInfo), RT_MEMCPY_HOST_TO_DEVICE));
   }
   GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size());
-  GE_CHK_RT(rtMemcpy(d_req_op_list,  sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),  sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),
+                     sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));
 
   SysOpCheckInfo op_check_info_req;
   SysOpCheckResp op_check_info_res;
@@ -1636,8 +1636,10 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     return RT_ERROR_TO_GE_STATUS(status);
   }
   allocated_mem.push_back(args);
-  GE_CHK_RT(rtMemcpy(args,  sizeof(SysOpCheckInfo), &op_check_info_req,  sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
-  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen),  sizeof(SysOpCheckResp), &op_check_info_res,  sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT(
+      rtMemcpy(args, sizeof(SysOpCheckInfo), &op_check_info_req, sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen),
+                     sizeof(SysOpCheckResp), &op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
 
   GE_CHK_RT(rtStreamCreate(&stream, 0));
   GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));
@@ -1650,7 +1652,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
 
   // Check the response
   void *d_op_check_info_res = reinterpret_cast<void *>(reinterpret_cast<uint64_t>(args) + op_check_info_req.offSetLen);
-  GE_CHK_RT(rtMemcpy(&op_check_info_res,  sizeof(SysOpCheckResp), d_op_check_info_res,  sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST));
+  GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp),
+                     RT_MEMCPY_DEVICE_TO_HOST));
   std::function<void()> callback = [&]() {
     for (auto mem : allocated_mem) {
       GE_CHK_RT(rtFree(mem));
@@ -1670,8 +1673,12 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     res_ret_code_list.resize(res_op_nums);
     res_aicpu_op_info_list.clear();
     res_aicpu_op_info_list.resize(res_op_nums);
-    GE_CHK_RT(rtMemcpy(res_ret_code_list.data(),  sizeof(ReturnCode) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)),  sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
-    GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(),  sizeof(SysOpInfo) * res_op_nums, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),  sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
+    GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums,
+                       reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.returnCodeList)),
+                       sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
+    GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums,
+                       reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),
+                       sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
     if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
       GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
       GE_MAKE_GUARD(release, callback);
@@ -1681,14 +1688,19 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
     for (uint32_t i = 0; i < res_op_nums; i++) {
       ReturnCode ret_code = res_ret_code_list.at(i);
       SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
-      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
+      GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType,
+             aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
       std::vector<char> op_name;
       op_name.clear();
       op_name.resize(kOpNameMaxSize);
-      GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType), aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
-      std::string kernel_type = (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
+      GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType),
+                         aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
+      std::string kernel_type =
+          (static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
       string op_name_str(op_name.data());
-      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type + "  ret code:" + std::to_string(static_cast<int>(ret_code)) + "<0: op_type, 1: format, 2: datatype> \n";
+      fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type +
+                     "  ret code:" + std::to_string(static_cast<int>(ret_code)) +
+                     "<0: op_type, 1: format, 2: datatype> \n";
     }
     fail_reason += "not support.";
     GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
@@ -1706,11 +1718,12 @@ Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
   std::vector<std::string> aicpu_tf_optype_list;
   bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
   bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
-  if (!aicpu_need_check  && !tf_need_check) {
+  if (!aicpu_need_check && !tf_need_check) {
     GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str());
     return SUCCESS;
   }
-  GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed.");
+  GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
+                    "Launch check aicpu op type failed.");
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index e0d0913e..b9b8e6d0 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -1579,7 +1579,8 @@ Status HybridModelBuilder::CheckAicpuOpList() {
   // reset list with set
   aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
   aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
-  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), "Launch check aicpu op type failed.");
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
+                    "Launch check aicpu op type failed.");
   return SUCCESS;
 }
 }  // namespace hybrid

From 0adb4ffc4b95940bce8076eb457130092e6d5b7e Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Mon, 14 Dec 2020 15:25:11 +0800
Subject: [PATCH 09/11] update codex warnings

---
 CMakeLists.txt                           | 4 ++--
 ge/common/profiling/profiling_manager.cc | 3 ++-
 ge/common/profiling/profiling_manager.h  | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 86d0184b..7416a130 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -115,7 +115,7 @@ if (ENABLE_OPEN_SRC)
                 find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
             endif()
         elseif(PLATFORM STREQUAL "all")
-            find_module(msprofiler libmsprofiler.a ${ASCEND_DRIVER_COMMON_DIR})
+            find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
             find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
             find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
             find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
@@ -123,7 +123,7 @@ if (ENABLE_OPEN_SRC)
             find_module(resource libresource.so ${ASCEND_ATC_DIR})
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
-            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_ACL_DIR})
+            find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR})
             find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
             #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
         else()
diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc
index 456cb0a4..0df6773a 100644
--- a/ge/common/profiling/profiling_manager.cc
+++ b/ge/common/profiling/profiling_manager.cc
@@ -802,7 +802,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
   if (!fp_point_.empty() && !bp_point_.empty()) {
     fp_point = fp_point_;
     bp_point = bp_point_;
-    GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str());
+    GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", 
+           bp_point.c_str(), fp_point.c_str());
     return;
   }
   // ProfApi mode and training trace is set
diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h
index 5fa4fac4..87822fb0 100755
--- a/ge/common/profiling/profiling_manager.h
+++ b/ge/common/profiling/profiling_manager.h
@@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
   bool ProfilingModelLoadOn() const { return is_load_profiling_; }
   bool ProfilingModelExecuteOn() const;
-  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env
+  // is_execute_profiling_ only used by ge option and env
+  bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
   void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
                            const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
   void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,

From 66ab5d7dae49fa45901e822d6baa963fa9194436 Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Mon, 14 Dec 2020 16:24:22 +0800
Subject: [PATCH 10/11] update new line

---
 ge/graph/build/model_builder.h                  | 6 ++++--
 ge/graph/load/new_model_manager/model_manager.h | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h
index b133b98b..715c16de 100644
--- a/ge/graph/build/model_builder.h
+++ b/ge/graph/build/model_builder.h
@@ -83,9 +83,11 @@ class ModelBuilder {
 
   Status CompileSingleOp();
 
-  Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set, std::set<std::string> &tf_engine_set);
+  Status CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &cpue_check_set,
+                               std::set<std::string> &tf_engine_set);
 
-  Status SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_optype_set, std::set<std::string> &aicpu_tf_optype_set);
+  Status SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_optype_set,
+                                std::set<std::string> &aicpu_tf_optype_set);
 
   uint64_t session_id_;
 
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index f1f404b5..dc685519 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -295,7 +295,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   ge::Status LaunchKernelCustAicpuSo(const string &kernel_name);
 
-  ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list, std::vector<std::string> &aicpu_tf_optype_list);
+  ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
+                                      std::vector<std::string> &aicpu_tf_optype_list);
 
   ge::Status CheckAicpuOpList(GeModelPtr ge_model);
 

From e929e6de314b575037d990ed900f803567809b3b Mon Sep 17 00:00:00 2001
From: taoxiangdong <taoxiangdong1@huawei.com>
Date: Mon, 14 Dec 2020 20:04:43 +0800
Subject: [PATCH 11/11] update thirdparty includes

---
 CMakeLists.txt                                |   1 +
 .../aicpu/aicpu_schedule/aicpu_op_type_list.h |  60 ++++
 .../inc/aicpu/common/aicpu_task_struct.h      |   4 +-
 third_party/fwkacllib/inc/cce/aicpu_engine.h  |  16 +-
 .../fwkacllib/inc/cce/aicpu_engine_struct.h   |   8 +-
 .../fwkacllib/inc/cce/fwk_adpt_struct.h       |  17 +-
 third_party/fwkacllib/inc/hccl/base.h         |  30 +-
 third_party/fwkacllib/inc/hccl/hcom.h         | 214 +++++------
 .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h   |  58 +--
 .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h     |   6 +
 third_party/fwkacllib/inc/runtime/base.h      | 332 ++----------------
 third_party/fwkacllib/inc/runtime/config.h    |  23 +-
 third_party/fwkacllib/inc/runtime/dev.h       |   4 +-
 third_party/fwkacllib/inc/runtime/rt.h        |   2 +-
 third_party/fwkacllib/inc/tdt/status.h        |   7 +
 third_party/fwkacllib/inc/tdt/tsd_client.h    |  28 +-
 .../fwkacllib/inc/toolchain/prof_callback.h   | 135 +++++++
 .../fwkacllib/inc/toolchain/prof_reporter.h   |  26 +-
 third_party/fwkacllib/inc/toolchain/slog.h    |  25 ++
 19 files changed, 504 insertions(+), 492 deletions(-)
 create mode 100644 third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
 create mode 100644 third_party/fwkacllib/inc/toolchain/prof_callback.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7416a130..3df71320 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -105,6 +105,7 @@ if (ENABLE_OPEN_SRC)
             find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
             find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
             find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
+            find_module(msprofiler_fwk libmsprofiler.a ${ASCEND_ACL_DIR})
 	        #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
             if(PRODUCT STREQUAL "flr3")
             elseif(PRODUCT STREQUAL "flr1")
diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
new file mode 100644
index 00000000..7e0f94a8
--- /dev/null
+++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h
@@ -0,0 +1,60 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef AICPU_OP_TYPE_LIST_H_
+#define AICPU_OP_TYPE_LIST_H_
+
+enum OpKernelType {
+    TF_KERNEL,
+    CPU_KERNEL
+};
+
+enum ReturnCode {
+    OP_TYPE_NOT_SUPPORT,
+    FORMAT_NOT_SUPPORT,
+    DTYPE_NOT_SUPPORT
+};
+
+#pragma pack(push, 1)
+//One byte alignment
+struct SysOpInfo {
+    uint64_t opLen;
+    uint64_t opType;
+    OpKernelType kernelsType;
+};
+
+struct OpParamInfo {
+    uint64_t num;
+    uint64_t dtypeList;
+    uint64_t formatList;
+};
+
+struct SysOpCheckInfo {
+    uint64_t opListNum;
+    uint64_t offSetLen;
+    uint64_t sysOpInfoList;
+    uint64_t opParamInfoList;
+};
+
+struct SysOpCheckResp {
+    uint64_t opListNum;
+    bool isWithoutJson;
+    uint64_t returnCodeList;
+    uint64_t sysOpInfoList;
+    uint64_t opParamInfoList;
+};
+#pragma pack(pop)
+#endif  // AICPU_OP_TYPE_LIST_H_
diff --git a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
index c3672663..72e21f6f 100644
--- a/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
+++ b/third_party/fwkacllib/inc/aicpu/common/aicpu_task_struct.h
@@ -21,13 +21,15 @@
 
 namespace aicpu {
 
+#pragma pack(push, 1)
 struct AicpuParamHead
 {
     uint32_t        length;                    // Total length: include cunstom message
     uint32_t        ioAddrNum;                 // Input and output address number
     uint32_t        extInfoLength;             // extInfo struct Length
     uint64_t        extInfoAddr;               // extInfo address
-} __attribute__ ((packed));
+};
+#pragma pack(pop)
 
 }  // namespace aicpu
 
diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine.h b/third_party/fwkacllib/inc/cce/aicpu_engine.h
index 740f1200..b83731a8 100644
--- a/third_party/fwkacllib/inc/cce/aicpu_engine.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine.h
@@ -13,10 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #ifndef AICPU_ENGINE_H__
 #define AICPU_ENGINE_H__
 
+#include <stdint.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -36,12 +37,23 @@ typedef enum {
 /**
  * @ingroup aicpu engine
  * @brief aeCallInterface:
- *          a interface to call  a function in a op kernfel lib
+ *          a interface to call a function in a op kernfel lib
  * @param [in] addr     void *,  should be STR_KERNEL * format
  * @return aeStatus_t
  */
 aeStatus_t aeCallInterface(void *addr);
 
+/**
+ * @ingroup aicpu engine
+ * @brief aeBatchLoadKernelSo:
+ *          a interface to load kernel so
+ * @param [in] loadSoNum  load so number
+ * @param [in] soPaths    load so paths
+ * @param [in] soNames    load so names
+ * @return aeStatus_t
+ */
+aeStatus_t aeBatchLoadKernelSo(const uint32_t loadSoNum, const char *soPaths[], const char *soNames[]);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
index a5f43be9..8c0c1847 100644
--- a/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
+++ b/third_party/fwkacllib/inc/cce/aicpu_engine_struct.h
@@ -33,18 +33,22 @@ typedef enum {
   FMK_KERNEL_TYPE_RESERVED
 } FwkkernelType_t;
 
+#pragma pack(push, 1)
 typedef struct {
   uint32_t fwkKernelType;  // FwkkernelType_t
   union {
     ::aicpu::FWKAdapter::FWKOperateParam fwk_kernel;
   } fwkKernelBase;
-} __attribute__((packed)) STR_FWK_OP_KERNEL;
+} STR_FWK_OP_KERNEL;
+#pragma pack(pop)
 
+#pragma pack(push, 1)
 struct SessionInfo {
   uint64_t sessionId;
   uint64_t kernelId;
   bool sessFlag;
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
 #ifdef __cplusplus
 }
diff --git a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
index 79d94023..50b39d91 100644
--- a/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
+++ b/third_party/fwkacllib/inc/cce/fwk_adpt_struct.h
@@ -70,6 +70,7 @@ enum FWKExtUpdateAddrType {
   FWK_ADPT_UPDATE_INPUT_OUTPUT
 };
 
+#pragma pack(push, 1)
 // API Parameter Structure
 struct StrFWKKernel {
   FWKOperateType opType;
@@ -89,31 +90,39 @@ struct StrFWKKernel {
 
   uint64_t extInfoLen;         // extend info total length
   uint64_t extInfoAddr;        // extend info addr, ExtInfo structure
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
 typedef StrFWKKernel FWKOperateParam;
 
 // Extent info ShapeAndType
 const uint32_t kMaxShapeDims = 8;
+#pragma pack(push, 1)
 struct ShapeAndType {
   int32_t type;
   int64_t dims[kMaxShapeDims];
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
 // Extend info structure for extInfoAddr
 const uint32_t kExtInfoHeadSize = 8;
+
+#pragma pack(push, 1)
 struct ExtInfo {
   int32_t  infoType;    // extend type
   uint32_t infoLen;     // length for infoMsg
   char     infoMsg[0];  // extend value
-} __attribute__((packed));
+};
+#pragma pack(pop)
 
+#pragma pack(push, 1)
 struct ResultSummary {
   uint64_t shape_data_ptr;   // shape data addr, need convert to void*
   uint64_t shape_data_size;  // num of dims
   uint64_t raw_data_ptr;     // raw data addr,  need convert to void*
   uint64_t raw_data_size;    // size of raw data
-} __attribute__((packed));
+};
+#pragma pack(pop)
 }  // end  namespace FWKAdapter
 }  // namespace aicpu
 
diff --git a/third_party/fwkacllib/inc/hccl/base.h b/third_party/fwkacllib/inc/hccl/base.h
index 8194097e..9facd20c 100644
--- a/third_party/fwkacllib/inc/hccl/base.h
+++ b/third_party/fwkacllib/inc/hccl/base.h
@@ -22,7 +22,8 @@
 
 #ifndef HCCL_BASE_H_
 #define HCCL_BASE_H_
-
+#include <hccl/hccl_types.h>
+#include <string>
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
@@ -95,6 +96,33 @@ typedef void *rtStream_t;
 */
 typedef void *rtModel_t;
 
+struct HcomOperation {
+    std::string hcclType;
+    void *inputPtr;
+    void *outputPtr;
+    u64 count;
+    HcclDataType dataType;
+    HcclReduceOp opType;
+    u32 root;
+
+    HcomOperation()
+    {
+        inputPtr = nullptr;
+        outputPtr = nullptr;
+        count = 0;
+        dataType = HCCL_DATA_TYPE_RESERVED;
+        opType = HCCL_REDUCE_RESERVED;
+        root = 0;
+    }
+};
+
+struct HcomRemoteAccessAddrInfo {
+    u32 remotetRankID;
+    u64 remoteAddr;  // host embedding table address
+    u64 localAddr;  // device HBM address
+    u64 length;   // Memory Length in Bytes 
+};
+
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h
index 90b96ac7..e491d43f 100644
--- a/third_party/fwkacllib/inc/hccl/hcom.h
+++ b/third_party/fwkacllib/inc/hccl/hcom.h
@@ -24,145 +24,96 @@
 
 #include <hccl/base.h>
 #include <hccl/hccl_types.h>
+#include <functional>
+#include <vector>
 
 #ifdef __cplusplus
 extern "C" {
 #endif // __cplusplus
 
-/**
- * @brief Initialize HCOM.
- *
- * @param rank_table A string identifying the rank table file path, include file name.
- * @param identify A string identifying the identify for the rank.
- * @return HcclResult
- * @see hcom_destroy()
- */
-extern HcclResult hcom_init(const char *rank_table, const char *identify);
 
-/**
- * @brief Destroy HCOM
- *
- * @return HcclResult
- * @see hcom_init()
- */
-extern HcclResult hcom_destroy(void);
-
-/**
- * @brief Bind the model.
- *
- * @param model A pointer identifying the model information.
- * @param stream A pointer identifying the stream information.
- * @return HcclResult
- * @see hcom_unbind_model()
- */
-extern HcclResult hcom_bind_model(rtModel_t model, rtStream_t stream);
 
 /**
- * @brief Unbind the model.
+ * @brief Get the rank number in the group.
  *
- * @param model An pointer identifying the model information.
- * @return HcclResult
- * @see hcom_unbind_model()
+ * @param group A string identifying the group name.
+ * @param rankSize A pointer identifying the rank number.
+ * @return HcclResult 
  */
-extern HcclResult hcom_unbind_model(rtModel_t model);
+HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
 
 /**
- * @brief All-gather operator.
+ * @brief Get the rank number in the group.
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param inputCount An integer(u64) identifying the number of the input data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param rankSize A pointer identifying the rank number.
  * @return HcclResult 
  */
-extern HcclResult hcom_all_gather(const char *tag, void *inputPtr, void *outputPtr, u64 inputCount,
-                                  HcclDataType dataType, const char *group, rtStream_t stream);
+HcclResult HcomGetRankSize(const char *group, u32 *rankSize);
 
 /**
- * @brief All-reduce operator.
+ * @brief Get the rank number of this rank's server within the group.
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param count An integer(u64) identifying the number of the output data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param localRankSize A pointer identifying the rank number.
  * @return HcclResult 
  */
-extern HcclResult hcom_all_reduce(const char *tag, void *inputPtr, void *outputPtr, u64 count,
-                                  HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
+HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
 
 /**
- * @brief Broadcast operator.
+ * @brief Get the rank number of this rank's server within the group.
  *
- * @param tag A string identifying the tag of the operator.
- * @param ptr A pointer identifying the data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param root An integer(u32) identifying the the root rank in the operator.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param localRankSize A pointer identifying the rank number.
  * @return HcclResult 
  */
-extern HcclResult hcom_broadcast(const char *tag, void *ptr, u64 count, HcclDataType dataType, u32 root,
-                                   const char *group, rtStream_t stream);
+HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize);
 
 /**
- * @brief Reduce-scatter operator.
+ * @brief Get the rank id of this rank.
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod.
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
+ * @param rankId A pointer identifying the rank id.
  * @return HcclResult 
  */
-extern HcclResult hcom_reduce_scatter(const char *tag, void *inputPtr, void *outputPtr, u64 count,
-                                      HcclDataType dataType, HcclReduceOp op, const char *group, rtStream_t stream);
+HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
 
 /**
- * @brief Get the rank number in the group.
+ * @brief Get the rank id of this rank.
  *
  * @param group A string identifying the group name.
- * @param rankSize A pointer identifying the rank number.
+ * @param rankId A pointer identifying the rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_rank_size(const char *group, u32 *rankSize);
+HcclResult HcomGetRankId(const char *group, u32 *rankId);
 
 /**
- * @brief Get the rank number of this rank's server within the group.
+ * @brief Get the local rank id of this rank's server within the group.
  *
  * @param group A string identifying the group name.
- * @param localRankSize A pointer identifying the rank number.
+ * @param localRankId A pointer identifying the local rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize);
+HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
 
 /**
- * @brief Get the rank id of this rank.
+ * @brief Get the local rank id of this rank's server within the group.
  *
  * @param group A string identifying the group name.
- * @param rankId A pointer identifying the rank id.
+ * @param localRankId A pointer identifying the local rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_rank_id(const char *group, u32 *rankId);
+HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId);
 
 /**
- * @brief Get the local rank id of this rank's server within the group.
+ * @brief Get the world rank id according to the group rank id.
  *
  * @param group A string identifying the group name.
- * @param localRankId A pointer identifying the local rank id.
+ * @param groupRank An integer(u32) identifying the group rank id.
+ * @param worldRank A pointer identifying the world rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
+HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
 
 /**
  * @brief Get the world rank id according to the group rank id.
@@ -172,7 +123,7 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId);
  * @param worldRank A pointer identifying the world rank id.
  * @return HcclResult 
  */
-HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank);
+HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank);
 
 /**
  * @brief Get the group rank id according to the world rank id.
@@ -184,6 +135,16 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank,
  */
 HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank);
 
+/**
+ * @brief Get the group rank id according to the world rank id.
+ *
+ * @param worldRank An integer(u32) identifying the world rank id.
+ * @param group A string identifying the group name.
+ * @param groupRank A pointer identifying the group rank id.
+ * @return HcclResult 
+ */
+HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank);
+
 /**
  * @brief Create group.
  *
@@ -195,60 +156,40 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group,
 HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds);
 
 /**
- * @brief Destroy group
+ * @brief Create group.
  *
  * @param group A string identifying the group name.
+ * @param rankNum An integer(u32) identifying the number of ranks in the group.
+ * @param rankIds A list identifying the ranks in the group.
  * @return HcclResult 
  */
-HcclResult hcom_destroy_group(const char *group);
+HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds);
 
 /**
- * @brief Send operator.
+ * @brief Destroy group
  *
- * @param tag A string identifying the tag of the operator.
- * @param inputPtr A pointer identifying the input data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param destRank An integer identifying the destination rank.
- * @param srTag An integer identifying the send/recv message tag.
- * The message will be send by the receive operator with the same "sr_tag".
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
  * @return HcclResult 
  */
-HcclResult hcom_send(const char *tag, void *inputPtr, u64 count, HcclDataType dataType,
-    u32 destRank, u32 srTag, const char *group, rtStream_t stream);
+HcclResult hcom_destroy_group(const char *group);
 
 /**
- * @brief Receive operator.
+ * @brief Destroy group
  *
- * @param tag A string identifying the tag of the operator.
- * @param outputPtr A pointer identifying the output data address of the operator.
- * @param count An integer(u64) identifying the number of the data.
- * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32.
- * @param srcRank An integer identifying the source rank.
- * @param srTag An integer identifying the send/recv message tag. 
- * The message will be send by the send operator with the same "sr_tag".
- * @param group A string identifying the group name of ranks participating in the operator.
- * @param stream A pointer identifying the stream information.
+ * @param group A string identifying the group name.
  * @return HcclResult 
  */
-HcclResult hcom_receive(const char *tag, void *outputPtr, u64 count, HcclDataType dataType,
-    u32 srcRank, u32 srTag, const char *group, rtStream_t stream);
+HcclResult HcomDestroyGroup(const char *group);
 
 /**
- * @brief Get the gradient split strategy with in the group.
+ * @brief Set the gradient split strategy with in the group, according to gradient index.
  *
  * @param group A string identifying the group name.
- * @param feature A pointer identifying the feature of the model.
- * @param maxSegmentNum An integer(u32) identifying the max segments of gradients.
- * @param segmentNum A pointer identifying the segments number of gradients.
- * @param segmentIdx A list identifying the index of end gradient in each segment.
- * @return HcclResult 
+ * @param segmentNum An integer(u32) identifying the segments number of gradients.
+ * @param IdxList A list identifying the index of end gradient in each segment.
+ * @return HcclResult
  */
-HcclResult hcom_get_split_strategy(const char *group, const struct model_feature *feature, u32 maxSegmentNum,
-    u32 *segmentNum, u32 *segmentIdx, GradSplitForceMode force = FORCE_NONE,
-    OriginalGraphShapeType shapeType = KNOWN_SHAPE);
+extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
 
 /**
  * @brief Set the gradient split strategy with in the group, according to gradient index.
@@ -258,7 +199,7 @@ HcclResult hcom_get_split_strategy(const char *group, const struct model_feature
  * @param IdxList A list identifying the index of end gradient in each segment.
  * @return HcclResult
  */
-extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList);
+extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList);
 
 /**
  * @brief Set the gradient split strategy with in the group, according to gradient data size.
@@ -270,6 +211,16 @@ extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmen
  */
 extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList);
 
+/**
+ * @brief Set the gradient split strategy with in the group, according to gradient data size.
+ *
+ * @param group A string identifying the group name.
+ * @param segmentNum An integer(u32) identifying the segments number of gradients.
+ * @param sizeList A list identifying the percent of each segment.
+ * @return HcclResult
+ */
+extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList);
+
 /**
  * @brief Register memories and init resources for remote access.
  *
@@ -279,6 +230,25 @@ extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segment
  */
 extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count);
 
+/**
+ * @brief Register memories and init resources for remote access.
+ *
+ * @param addrList memory addresses for remote access.
+ * @param count number of remote memory addresses.
+ * @return HcclResult
+ */
+extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count);
+
+HcclResult HcomExecInitialize();
+
+HcclResult HcomExecFinalize();
+
+HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function<void(HcclResult status)> callback);
+
+HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType,
+                               const std::vector<HcomRemoteAccessAddrInfo>& addrInfos,
+                               std::function<void(HcclResult status)> callback);
+
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
index ea51f497..ad48f70b 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h
@@ -215,6 +215,10 @@ typedef struct {
 #define S_IWRITE S_IWUSR
 #endif
 
+#define mm_no_argument        no_argument
+#define mm_required_argument  required_argument
+#define mm_optional_argument  optional_argument
+
 #define M_FILE_RDONLY O_RDONLY
 #define M_FILE_WRONLY O_WRONLY
 #define M_FILE_RDWR O_RDWR
@@ -227,6 +231,7 @@ typedef struct {
 #define M_BINARY O_RDONLY
 #define M_TRUNC O_TRUNC
 #define M_IRWXU S_IRWXU
+#define M_APPEND O_APPEND
 
 #define M_IN_CREATE IN_CREATE
 #define M_IN_CLOSE_WRITE IN_CLOSE_WRITE
@@ -342,17 +347,17 @@ MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag);
 MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd,
-                            VOID *sendMsg,
-                            INT32 sendLen,
-                            UINT32 sendFlag,
-                            const mmSockAddr* addr,
-                            INT32 tolen);
+                                          VOID *sendMsg,
+                                          INT32 sendLen,
+                                          UINT32 sendFlag,
+                                          const mmSockAddr* addr,
+                                          INT32 tolen);
 MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd,
-                                  VOID *recvBuf,
-                                  mmSize recvLen,
-                                  UINT32 recvFlag,
-                                  mmSockAddr* addr,
-                                  mmSocklen_t *FromLen);
+                                                VOID *recvBuf,
+                                                mmSize recvLen,
+                                                UINT32 recvFlag,
+                                                mmSockAddr* addr,
+                                                mmSocklen_t *FromLen);
 MMPA_FUNC_VISIBILITY INT32 mmSAStartup();
 MMPA_FUNC_VISIBILITY INT32 mmSACleanup();
 MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode);
@@ -360,7 +365,10 @@ MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info);
 MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName);
 MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle);
 MMPA_FUNC_VISIBILITY CHAR *mmDlerror();
-MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period);
+MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle,
+                                               mmUserBlock_t *timerBlock,
+                                               UINT milliSecond,
+                                               UINT period);
 MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle);
 MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer);
 MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer);
@@ -408,8 +416,12 @@ MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount);
 // Poll related interface
 MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort();
 MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle);
-MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP,
-                                    pmmPollData polledData, mmPollBack pollBack);
+MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds,
+                                  INT32 fdCount,
+                                  INT32 timeout,
+                                  mmCompletionHandle handleIOCP,
+                                  pmmPollData polledData,
+                                  mmPollBack pollBack);
 MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode();
 MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size);
 MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone);
@@ -454,8 +466,11 @@ MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt);
 MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg();
 MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg);
 MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts);
-MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts,
-                                          INT32 *longIndex);
+MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc,
+                                        char *const *argv,
+                                        const char *opts,
+                                        const mmStructOption *longOpts,
+                                        INT32 *longIndex);
 
 MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag);
 MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length);
@@ -521,11 +536,14 @@ MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count);
 MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count);
 MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count);
 MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count);
-MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile,
-                                            mmProcess *id);
-
-MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock,
-                                                        const mmThreadAttr *threadAttr);
+MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName,
+                                           const mmArgvEnv *env,
+                                           const char *stdoutRedirectFile,
+                                           mmProcess *id);
+
+MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle,
+                                                      const mmUserBlock_t *funcBlock,
+                                                      const mmThreadAttr *threadAttr);
 MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode);
 MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name);
 MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags);
diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
index 5db6bbf8..cecdd4a7 100644
--- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
+++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h
@@ -237,6 +237,11 @@ typedef struct {
 } mmThreadAttr;
 
 typedef VOID (*mmPf)(VOID);
+
+#define mm_no_argument        0
+#define mm_required_argument  1
+#define mm_optional_argument  2
+
 #define M_FILE_RDONLY GENERIC_READ
 #define M_FILE_WRONLY GENERIC_WRITE
 #define M_FILE_RDWR (GENERIC_READ | GENERIC_WRITE)
@@ -249,6 +254,7 @@ typedef VOID (*mmPf)(VOID);
 #define M_CREAT _O_CREAT
 #define M_BINARY _O_BINARY
 #define M_TRUNC _O_TRUNC
+#define M_APPEND _O_APPEND
 
 #define M_IREAD _S_IREAD
 #define M_IRUSR _S_IREAD
diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h
index 4e735438..b9b2cbe5 100644
--- a/third_party/fwkacllib/inc/runtime/base.h
+++ b/third_party/fwkacllib/inc/runtime/base.h
@@ -18,6 +18,7 @@
 #define __CCE_RUNTIME_BASE_H__
 
 #include <stdint.h>
+#include "toolchain/prof_callback.h"
 
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 extern "C" {
@@ -32,309 +33,8 @@ extern "C" {
 #endif
 #endif
 
-/**
- * @ingroup dvrt_base
- * @brief runtime error numbers.
- */
-typedef enum tagRtError {
-    RT_ERROR_NONE = 0x0,                    // success
-    
-    RT_ERROR_DEVICE_BASE                    = 0x07010000,
-    RT_ERROR_DEVICE_NULL,
-    RT_ERROR_DEVICE_NEW,
-    RT_ERROR_DEVICE_ID,
-    RT_ERROR_DEVICE_CHIPTYPE,
-    RT_ERROR_DEVICE_DEPLOY,
-    RT_ERROR_DEVICE_RETAIN,
-    RT_ERROR_DEVICE_PLATFORM,
-    RT_ERROR_DEVICE_LOADER,
-    RT_ERROR_DEVICE_LIMIT,
-    RT_ERROR_DEVICE_PROC_HANG_OUT,
-    RT_ERROR_DEVICE_POWER_UP_FAIL,
-    RT_ERROR_DEVICE_POWER_DOWN_FAIL,
-    RT_ERROR_DEVICE_INVALID,
-
-    RT_ERROR_DRV_BASE                       = 0x07020000,
-    RT_ERROR_DRV_NULL,
-    RT_ERROR_DRV_NEW,
-    RT_ERROR_DRV_MEMORY,
-    RT_ERROR_DRV_INPUT,
-    RT_ERROR_DRV_PTRNULL,
-    RT_ERROR_DRV_OPEN_AICPU,
-    RT_ERROR_DRV_CLOSE_AICPU,
-    RT_ERROR_DRV_SYM_AICPU,
-    RT_ERROR_DRV_OPEN_TSD,
-    RT_ERROR_DRV_CLOSE_TSD,
-    RT_ERROR_DRV_SYM_TSD,
-    RT_ERROR_DRV_SOURCE,
-    RT_ERROR_DRV_REPORT,
-    RT_ERROR_DRV_COMMAND,
-    RT_ERROR_DRV_OCCUPY,
-    RT_ERROR_DRV_ERR,
-
-    RT_ERROR_STREAM_BASE                    = 0x07030000,
-    RT_ERROR_STREAM_NULL,
-    RT_ERROR_STREAM_NEW,
-    RT_ERROR_STREAM_CONTEXT,
-    RT_ERROR_STREAM_INVALID,
-    RT_ERROR_STREAM_MODEL,
-    RT_ERROR_STREAM_FUSION,
-    RT_ERROR_STREAM_FULL,
-    RT_ERROR_STREAM_EMPTY,
-    RT_ERROR_STREAM_NOT_COMPLETE,
-    RT_ERROR_STREAM_SYNC,
-    RT_ERROR_STREAM_NO_CB_REG,
-    RT_ERROR_STREAM_DUPLICATE,
-    RT_ERROR_STREAM_NOT_EXIST,
-    RT_ERROR_SQ_NO_EXIST_SQ_TO_REUSE,
-    RT_ERROR_SQID_FULL,
-
-    RT_ERROR_MODEL_BASE                     = 0x07040000,
-    RT_ERROR_MODEL_NULL,
-    RT_ERROR_MODEL_NEW,
-    RT_ERROR_MODEL_CONTEXT,
-    RT_ERROR_MODEL_ENDGRAPH,
-    RT_ERROR_MODEL_STREAM,
-    RT_ERROR_MODEL_EXCUTOR,
-    RT_ERROR_MODEL_SETUP,
-    RT_ERROR_MODEL_ID,
-    RT_ERROR_MODEL_EXE_FAILED,
-    RT_ERROR_END_OF_SEQUENCE,               // end of sequence
-    RT_ERROR_MODEL_EXIT,
-    RT_ERROR_MODEL_EXIT_STREAM_UNBIND,
-    RT_ERROR_MODEL_EXIT_ID,
-    RT_ERROR_MODEL_ABORT_NORMAL,
-
-    RT_ERROR_EVENT_BASE                     = 0x07050000,
-    RT_ERROR_EVENT_NULL,
-    RT_ERROR_EVENT_NEW,
-    RT_ERROR_EVENT_RECORDER_NULL,
-    RT_ERROR_EVENT_TIMESTAMP_INVALID,
-    RT_ERROR_EVENT_TIMESTAMP_REVERSAL,
-    RT_ERROR_EVENT_NOT_COMPLETE,
-
-    RT_ERROR_NOTIFY_BASE                    = 0x07060000,
-    RT_ERROR_NOTIFY_NULL,
-    RT_ERROR_NOTIFY_NEW,
-    RT_ERROR_NOTIFY_TYPE,
-    RT_ERROR_NOTIFY_NOT_COMPLETE,
-
-    RT_ERROR_CONTEXT_BASE                   = 0x07070000,
-    RT_ERROR_CONTEXT_NULL,
-    RT_ERROR_CONTEXT_NEW,
-    RT_ERROR_CONTEXT_DEL,
-    RT_ERROR_CONTEXT_DEFAULT_STREAM_NULL,
-    RT_ERROR_CONTEXT_ONLINE_STREAM_NULL,
-
-    RT_ERROR_KERNEL_BASE                    = 0x07080000,
-    RT_ERROR_KERNEL_NULL,
-    RT_ERROR_KERNEL_NEW,
-    RT_ERROR_KERNEL_LOOKUP,
-    RT_ERROR_KERNEL_NAME,
-    RT_ERROR_KERNEL_TYPE,
-    RT_ERROR_KERNEL_OFFSET,
-    RT_ERROR_KERNEL_DUPLICATE,
-    RT_ERROR_KERNEL_UNREGISTERING,
-
-    RT_ERROR_PROGRAM_BASE                   = 0x07090000,
-    RT_ERROR_PROGRAM_NULL,
-    RT_ERROR_PROGRAM_NEW,
-    RT_ERROR_PROGRAM_DATA,
-    RT_ERROR_PROGRAM_SIZE,
-    RT_ERROR_PROGRAM_MEM_TYPE,
-    RT_ERROR_PROGRAM_MACHINE_TYPE,
-    RT_ERROR_PROGRAM_USEOUT,
-
-    RT_ERROR_MODULE_BASE                    = 0x070a0000,
-    RT_ERROR_MODULE_NULL,
-    RT_ERROR_MODULE_NEW,
-
-    RT_ERROR_INSTANCE_BASE                  = 0x070b0000,
-    RT_ERROR_INSTANCE_NULL,
-    RT_ERROR_INSTANCE_NEW,
-    RT_ERROR_INSTANCE_VERSION,
-
-    RT_ERROR_API_BASE                       = 0x070c0000,
-    RT_ERROR_API_NULL,
-    RT_ERROR_API_NEW,
-
-    RT_ERROR_DATADUMP_BASE                  = 0x070d0000,
-    RT_ERROR_DATADUMP_NULL,
-    RT_ERROR_DATADUMP_NEW,
-    RT_ERROR_DATADUMP_TIME,
-    RT_ERROR_DATADUMP_FILE,
-    RT_ERROR_DATADUMP_ADDRESS,
-    RT_ERROR_DATADUMP_LOAD_FAILED,
-    RT_ERROR_DUMP_ADDR_SET_FAILED,
-
-    RT_ERROR_PROF_BASE                      = 0x070e0000,
-    RT_ERROR_PROF_NULL,
-    RT_ERROR_PROF_NEW,
-    RT_ERROR_PROF_START,
-    RT_ERROR_PROF_DEVICE_MEM,
-    RT_ERROR_PROF_HOST_MEM,
-    RT_ERROR_PROF_SET_DIR,
-    RT_ERROR_PROF_OPER,
-    RT_ERROR_PROF_FULL,
-    RT_ERROR_PROF_NAME,
-
-    RT_ERROR_PCTRACE_BASE                   = 0x070f0000,
-    RT_ERROR_PCTRACE_NULL,
-    RT_ERROR_PCTRACE_NEW,
-    RT_ERROR_PCTRACE_TIME,
-    RT_ERROR_PCTRACE_FILE,
-
-    RT_ERROR_TASK_BASE                      = 0x07100000,
-    RT_ERROR_TASK_NULL,
-    RT_ERROR_TASK_NEW,
-    RT_ERROR_TASK_TYPE,
-    RT_ERROR_TASK_ALLOCATOR,
-
-    RT_ERROR_COMMON_BASE                    = 0x07110000,
-    RT_ERROR_INVALID_VALUE,             // RT_ERROR_INPUT_INVALID
-    RT_ERROR_MEMORY_ADDRESS_UNALIGNED,
-    RT_ERROR_SEC_HANDLE,
-    RT_ERROR_OS_HANDLE,
-    RT_ERROR_MUTEX_LOCK,
-    RT_ERROR_MUTEX_UNLOCK,
-    RT_ERROR_CALLOC,
-    RT_ERROR_POOL_RESOURCE,
-    RT_ERROR_TRANS_ARGS,
-    RT_ERROR_METADATA,
-    RT_ERROR_LOST_HEARTBEAT,
-    RT_ERROR_REPORT_TIMEOUT,
-    RT_ERROR_FEATURE_NOT_SUPPROT,
-    RT_ERROR_MEMORY_ALLOCATION,
-    RT_ERROR_MEMORY_FREE,
-    RT_ERROR_INVALID_MEMORY_TYPE,
-
-    RT_ERROR_DEBUG_BASE                     = 0x07120000,
-    RT_ERROR_DEBUG_NULL,
-    RT_ERROR_DEBUG_NEW,
-    RT_ERROR_DEBUG_SIGNAL,
-    RT_ERROR_DEBUG_OPEN,
-    RT_ERROR_DEBUG_WRITE,
-    RT_ERROR_DEBUG_REGISTER_FAILED,
-    RT_ERROR_DEBUG_UNREGISTER_FAILED,
-
-    RT_ERROR_ENGINE_BASE                    = 0x07130000,
-    RT_ERROR_ENGINE_NULL,
-    RT_ERROR_ENGINE_NEW,
-    RT_ERROR_ENGINE_THREAD,
-
-    RT_ERROR_LABEL_BASE                     = 0x07140000,
-    RT_ERROR_LABEL_NULL,
-    RT_ERROR_LABEL_NEW,
-    RT_ERROR_LABEL_CONTEXT,
-    RT_ERROR_LABEL_STREAM,
-    RT_ERROR_LABEL_MODEL,
-    RT_ERROR_LABEL_ALLOCATOR,
-    RT_ERROR_LABEL_FREE,
-    RT_ERROR_LABEL_SET,
-    RT_ERROR_LABEL_ID,
-
-    RT_ERROR_TSFW_BASE                      = 0x07150000,
-    RT_ERROR_TSFW_UNKNOWN,
-    RT_ERROR_TSFW_NULL_PTR,
-    RT_ERROR_TSFW_ILLEGAL_AI_CORE_ID,
-    RT_ERROR_TSFW_ILLEGAL_PARAM,
-    RT_ERROR_TSFW_TASK_CMD_QUEUE_FULL,
-    RT_ERROR_TSFW_TASK_CMD_QUEUE_EMPTY,
-    RT_ERROR_TSFW_TASK_REPORT_QUEUE_FULL,
-    RT_ERROR_TSFW_TASK_REPORT_QUEUE_EMPTY,
-    RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_OCCUPYED,
-    RT_ERROR_TSFW_TASK_NODE_BUFF_ALL_FREED,
-    RT_ERROR_TSFW_L2_MEM_INSUFFICIENT_SPACE,
-    RT_ERROR_TSFW_L2_MALLOC_FAILED,
-    RT_ERROR_TSFW_DMA_CHANNEL_ALL_OCCUPYED,
-    RT_ERROR_TSFW_MEMCPY_OP_FAILED,
-    RT_ERROR_TSFW_BS_SLOT_ALL_OCCUPYED,
-    RT_ERROR_TSFW_TBS_SLOT_REPEAT_FREE,
-    RT_ERROR_TSFW_PRIORITY_TASK_LIST_FULL,
-    RT_ERROR_TSFW_PRIORITY_TASK_LIST_EMPTY,
-    RT_ERROR_TSFW_NO_STREAM_LIST_NEED_TO_BE_PROCESSED,
-    RT_ERROR_TSFW_REPEAT_MARK_STREAM_NEED_SERVICE,
-    RT_ERROR_TSFW_SYS_DMA_CHANNEL_ALL_OCCUPAPYED,
-    RT_ERROR_TSFW_NO_HBML2TASKNODE_FOUND,
-    RT_ERROR_TSFW_SQNODE_NODE_SLOT_ALL_OCCUPAPYED,
-    RT_ERROR_TSFW_CQNODE_NODE_SLOT_ALL_OCCUPAPYED,
-    RT_ERROR_TSFW_SQNODE_NOT_ENOUGH,
-    RT_ERROR_TSFW_SQNODE_SLOT_REPEAT_FREE,
-    RT_ERROR_TSFW_CQNODE_SLOT_REPEAT_FREE,
-    RT_ERROR_TSFW_CQ_REPORT_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_RESET_SUCCESS,
-    RT_ERROR_TSFW_SYS_DMA_RESET_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_TRNSFER_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_MEMADDRALIGN_FAILED,
-    RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_FULL,
-    RT_ERROR_TSFW_SYS_DMA_ERROR_QUEUE_EMPTY,
-    RT_ERROR_TSFW_TIMER_EVENT_FULL,
-    RT_ERROR_TSFW_TASK_L2_DESC_ENTRY_NOT_ENOUGH,
-    RT_ERROR_TSFW_AICORE_TIMEOUT,
-    RT_ERROR_TSFW_AICORE_EXCEPTION,
-    RT_ERROR_TSFW_AICORE_TRAP_EXCEPTION,
-    RT_ERROR_TSFW_AICPU_TIMEOUT,
-    RT_ERROR_TSFW_SDMA_L2_TO_DDR_MALLOC_FAIL,
-    RT_ERROR_TSFW_AICPU_EXCEPTION,
-    RT_ERROR_TSFW_AICPU_DATADUMP_RSP_ERR,
-    RT_ERROR_TSFW_AICPU_MODEL_RSP_ERR,
-    RT_ERROR_TSFW_REPEAT_ACTIVE_MODEL_STREAM,
-    RT_ERROR_TSFW_REPEAT_NOTIFY_WAIT,
-    RT_ERROR_TSFW_DEBUG_INVALID_SQCQ,
-    RT_ERROR_TSFW_DEBUG_WRONG_COMMAND_TYPE,
-    RT_ERROR_TSFW_DEBUG_CMD_PROCESS,
-    RT_ERROR_TSFW_DEBUG_INVALID_DEVICE_STATUS,
-    RT_ERROR_TSFW_DEBUG_NOT_IN_DEBUG_STATUS,
-    RT_ERROR_TSFW_DEBUG_INVALID_TASK_STATUS,
-    RT_ERROR_TSFW_DEBUG_TASK_EMPTY,
-    RT_ERROR_TSFW_DEBUG_TASK_FULL,
-    RT_ERROR_TSFW_DEBUG_TASK_NOT_EXIST,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_FULL,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_NOT_EXIST,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_EXCEPTION,
-    RT_ERROR_TSFW_DEBUG_AI_CORE_TIMEOUT,
-    RT_ERROR_TSFW_DEBUG_BREAKPOINT_FULL,
-    RT_ERROR_TSFW_DEBUG_READ_ERROR,
-    RT_ERROR_TSFW_DEBUG_WRITE_FAIL,
-    RT_ERROR_TSFW_QUEUE_FULL,
-    RT_ERROR_TSFW_QUEUE_EMPTY,
-    RT_ERROR_TSFW_QUEUE_ALLOC_MEM_FAIL,
-    RT_ERROR_TSFW_QUEUE_DATA_SIZE_UNMATCH,
-    RT_ERROR_TSFW_PCIE_DMA_INVLD_CPY_TYPE,
-    RT_ERROR_TSFW_INVLD_CPY_DIR,
-    RT_ERROR_TSFW_PCIE_DMA_INVLD_CQ_DES,
-    RT_ERROR_TSFW_PCIE_DMA_CPY_ERR,
-    RT_ERROR_TSFW_PCIE_DMA_LNK_CHN_BUSY,
-    RT_ERROR_TSFW_PROFILE_BUFF_FULL,
-    RT_ERROR_TSFW_PROFILE_MODE_CONFLICT,
-    RT_ERROR_TSFW_PROFILE_OTHER_PID_ON,
-    RT_ERROR_TSFW_SCHD_AIC_TASK_PRELOAD_FAILED,
-    RT_ERROR_TSFW_TSCPU_CLOSE_FAILED,
-    RT_ERROR_TSFW_EXPECT_FAIL,
-    RT_ERROR_TSFW_REPEAT_MODEL_STREAM,
-    RT_ERROR_TSFW_STREAM_MODEL_UNBIND,
-    RT_ERROR_TSFW_MODEL_EXE_FAILED,
-    RT_ERROR_TSFW_IPC_SEND_FAILED,
-    RT_ERROR_TSFW_IPC_PROC_REG_FAILED,
-    RT_ERROR_TSFW_STREAM_FULL,
-    RT_ERROR_TSFW_END_OF_SEQUENCE,
-    RT_ERROR_TSFW_SWITCH_STREAM_LABEL,
-    RT_ERROR_TSFW_TRANS_SQE_FAIL,
-    RT_ERROR_TSFW_RESERVED,
-
-    RT_ERROR_SUBSCRIBE_BASE                = 0x07160000,
-    RT_ERROR_SUBSCRIBE_NULL,
-    RT_ERROR_SUBSCRIBE_NEW,
-    RT_ERROR_SUBSCRIBE_STREAM,
-    RT_ERROR_SUBSCRIBE_THREAD,
-    RT_ERROR_SUBSCRIBE_GROUP,
-
-    RT_ERROR_GROUP_BASE                    = 0x07170000,
-    RT_ERROR_GROUP_NOT_SET,
-    RT_ERROR_GROUP_NOT_CREATE,
-
-    RT_ERROR_RESERVED                      = 0x07ff0000,
-  }rtError_t;
+typedef int32_t rtError_t;
+static const int32_t RT_ERROR_NONE = 0; // success
 
 /**
  * @ingroup dvrt_base
@@ -387,10 +87,20 @@ typedef struct rtExceptionInfo {
     uint32_t deviceid;
 } rtExceptionInfo;
 
+typedef struct rtTaskFailInfo {
+    uint32_t taskid;
+    uint32_t streamid;
+    uint32_t tid;
+    uint32_t deviceid;
+    uint32_t retcode;
+} rtTaskFailInfo;
+
 typedef void (*rtErrorCallback)(rtExceptionType);
 
 typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo);
 
+typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo);
+
 typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen);
 
 /**
@@ -447,6 +157,12 @@ RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t*
  */
 RTS_API rtError_t rtProfilerTrace(uint64_t id, bool notify, uint32_t flags, rtStream_t stream);
 
+/**
+ * @ingroup profiling_base
+ * @brief ts set profiling reporter callback.
+ */
+RTS_API rtError_t rtSetMsprofReporterCallback(MsprofReporterCallback callback);
+
 /**
  * @ingroup dvrt_base
  * @brief Returns the last error from a runtime call.
@@ -485,6 +201,16 @@ RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback);
  */
 RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCallback callback);
 
+/**
+ * @ingroup dvrt_base
+ * @brief register callback for fail task 
+ * @param [in] uniName unique register name, can't be null
+ * @param [in] callback fail task callback function
+ * @param [out] NA
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback);
+
 /**
  * @ingroup dvrt_base
  * @brief notify handle.
diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h
index f1a70eaa..12a407d7 100644
--- a/third_party/fwkacllib/inc/runtime/config.h
+++ b/third_party/fwkacllib/inc/runtime/config.h
@@ -121,14 +121,6 @@ typedef struct tagRtMemoryConfig {
 
 typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t;
 
-/**
- * @ingroup
- * @brief get platform
- * @param [in] platForm
- * @return platForm
- */
-RTS_API rtError_t rtGetPlatformConfig(rtPlatformConfig_t *platForm);
-
 /**
  * @ingroup
  * @brief get AI core count
@@ -169,13 +161,6 @@ RTS_API rtError_t rtGetAiCoreMemoryRates(rtAiCoreMemoryRates_t *aiCoreMemoryRate
  */
 RTS_API rtError_t rtGetMemoryConfig(rtMemoryConfig_t *memoryConfig);
 
-/**
- * @ingroup
- * @brief set platform in gen ctx
- * @param [in] platForm
- * @return RT_ERROR_NONE for ok, errno for failed
- */
-RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
 
 /**
  * @ingroup
@@ -185,6 +170,14 @@ RTS_API rtError_t rtSetPlatformType(rtPlatformType_t platformType);
  */
 RTS_API rtError_t rtMemGetL2Info(rtStream_t stream, void **ptr, uint32_t *size);
 
+/**
+ * @ingroup
+ * @brief get runtime version. The version is returned as (1000 major + 10 minor). For example, RUNTIME 9.2 would be represented by 9020.
+ * @param [out] runtimeVersion
+ * @return RT_ERROR_NONE for ok
+ * @return RT_ERROR_INVALID_VALUE for error input
+ */
+RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
 #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
 }
 #endif
diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h
index b378e3b0..d1a91a9b 100644
--- a/third_party/fwkacllib/inc/runtime/dev.h
+++ b/third_party/fwkacllib/inc/runtime/dev.h
@@ -330,12 +330,12 @@ RTS_API rtError_t rtGetPairDevicesInfo(uint32_t devId, uint32_t otherDevId, int3
                     FEATURE_TYPE_MEMCPY = 0,
                     FEATURE_TYPE_RSV,
                } rtFeatureType_t;
- * @param [in] infoType   info type
+ * @param [in] featureInfo  info type
                typedef enum tagMemcpyInfo {
                     MEMCPY_INFO_SUPPORT_ZEROCOPY = 0,
                     MEMCPY_INFO _RSV,
                } rtMemcpyInfo_t;
- * @param [out] value   the capability info
+ * @param [out] value  the capability info RT_CAPABILITY_SUPPORT or RT_CAPABILITY_NOT_SUPPORT
  * @return RT_ERROR_NONE for ok
  */
 RTS_API rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, int64_t *value);
diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h
index d3d5956f..83cafa3c 100644
--- a/third_party/fwkacllib/inc/runtime/rt.h
+++ b/third_party/fwkacllib/inc/runtime/rt.h
@@ -28,4 +28,4 @@
 #include "rt_model.h"
 #include "stream.h"
 
-#endif  // __CCE_RUNTIME_RT_H__
\ No newline at end of file
+#endif  // __CCE_RUNTIME_RT_H__
diff --git a/third_party/fwkacllib/inc/tdt/status.h b/third_party/fwkacllib/inc/tdt/status.h
index d30564b8..d5050f35 100644
--- a/third_party/fwkacllib/inc/tdt/status.h
+++ b/third_party/fwkacllib/inc/tdt/status.h
@@ -34,9 +34,16 @@ using TDT_StatusT = uint32_t;
 typedef uint32_t TDT_StatusT;
 #endif
 
+#define LINUX 0
+#define WINDOWS 1
+
 #ifndef TDT_LIB_EXPORT
+#if(TARGET_SYSTEM_NAME == WINDOWS)
+#define TDT_LIB_EXPORT __declspec(dllexport)
+#else
 #define TDT_LIB_EXPORT __attribute__((visibility("default")))
 #endif
+#endif
 /**
  * @ingroup  tdt status.
  *
diff --git a/third_party/fwkacllib/inc/tdt/tsd_client.h b/third_party/fwkacllib/inc/tdt/tsd_client.h
index 6066a12e..665c8b82 100644
--- a/third_party/fwkacllib/inc/tdt/tsd_client.h
+++ b/third_party/fwkacllib/inc/tdt/tsd_client.h
@@ -23,6 +23,7 @@
 #include <mutex>
 #include "tdt/status.h"
 #include "tdt/data_common.h"
+#include "toolchain/prof_callback.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -37,7 +38,7 @@ extern "C" {
 * Used for the Framework process to communicate with the TSDDaemon process,
 * and notify TSD to complete the initialization of other processes
 *
-* @param phyDeviceId [IN] type #unsigned int. Physical device ID
+* @param logicDeviceId [IN] type #unsigned int. Logic device ID
 * @param rankSize [IN] type #unsigned int. The rankSize of the training.
 * The default value is 1. When rankSize is greater than 1,
 * HCCP will be pulled to perform set communication related operations.
@@ -49,7 +50,7 @@ extern "C" {
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t rankSize);
+TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t logicDeviceId, const uint32_t rankSize);
 
 /**
 * @ingroup Close
@@ -67,7 +68,7 @@ TDT_LIB_EXPORT TDT_StatusT TsdOpen(const uint32_t phyDeviceId, const uint32_t ra
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
+TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t logicDeviceId);
 
 /**
 * @ingroup UpdateProfilingMode
@@ -85,7 +86,26 @@ TDT_LIB_EXPORT TDT_StatusT TsdClose(const uint32_t phyDeviceId);
 * @li tsd_client.h: Header file where the interface declaration is located.
 * @li data_common.h: Header file where 'TDT_StatusT' defined
 */
-TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t phyDeviceId, const uint32_t flag);
+TDT_LIB_EXPORT TDT_StatusT UpdateProfilingMode(const uint32_t logicDeviceId, const uint32_t flag);
+
+/**
+* @ingroup TsdSetMsprofReporterCallback
+* @brief 用于推理场景下设置aicpu的profilng的callback函数
+*
+* @par Function
+* 设置offline模式下aicpu_sd进程的profiling的callback函数
+*
+* @param callback [IN] type #MsprofReporterCallback. 回调函数
+* @retval TDT_OK Success
+* @retval OtherValues Failure
+*
+* @par Dependency
+* @li libtsdclient.so: Library to which the interface belongs.
+* @li tsd_client.h: Header file where the interface declaration is located.
+* @li data_common.h: Header file where 'TDT_StatusT' defined
+* @li prof_callback.h: Headerfile where 'MsprofReporterCallback' defined
+*/
+TDT_LIB_EXPORT TDT_StatusT TsdSetMsprofReporterCallback(MsprofReporterCallback callback);
 
 /**
 * @ingroup CreateCmdParameterObj
diff --git a/third_party/fwkacllib/inc/toolchain/prof_callback.h b/third_party/fwkacllib/inc/toolchain/prof_callback.h
new file mode 100644
index 00000000..3fad74bc
--- /dev/null
+++ b/third_party/fwkacllib/inc/toolchain/prof_callback.h
@@ -0,0 +1,135 @@
+/**
+ * Copyright 2020-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * @file prof_callback.h
+ * @brief declaraion of profiling callbacks
+ */
+
+#ifndef MSPROFILER_PROF_CALLBACK_H_
+#define MSPROFILER_PROF_CALLBACK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+
+#include "stddef.h"
+#include "stdint.h"
+
+/**
+ * @name  MsprofErrorCode
+ * @brief error code
+ */
+enum MsprofErrorCode {
+    MSPROF_ERROR_NONE = 0,
+    MSPROF_ERROR_MEM_NOT_ENOUGH,
+    MSPROF_ERROR_GET_ENV,
+    MSPROF_ERROR_CONFIG_INVALID,
+    MSPROF_ERROR_ACL_JSON_OFF,
+    MSPROF_ERROR,
+};
+
+#define MSPROF_ENGINE_MAX_TAG_LEN (31)
+
+/**
+ * @name  ReporterData
+ * @brief struct of data to report
+ */
+struct ReporterData {
+    char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1];  // the sub-type of the module, data with different tag will be writen
+    int deviceId;                             // the index of device
+    size_t dataLen;                           // the length of send data
+    unsigned char *data;                      // the data content
+};
+
+/**
+ * @name  MsprofReporterModuleId
+ * @brief module id of data to report
+ */
+enum MsprofReporterModuleId {
+    MSPROF_MODULE_DATA_PREPROCESS = 0,    // DATA_PREPROCESS
+    MSPROF_MODULE_HCCL,                   // HCCL
+    MSPROF_MODULE_ACL,                    // AclModule
+    MSPROF_MODULE_FRAMEWORK,              // Framework
+    MSPROF_MODULE_RUNTIME                 // runtime
+};
+
+/**
+ * @name  MsprofReporterCallbackType
+ * @brief reporter callback request type
+ */
+enum MsprofReporterCallbackType {
+    MSPROF_REPORTER_REPORT = 0,           // report data
+    MSPROF_REPORTER_INIT,                 // init reporter
+    MSPROF_REPORTER_UNINIT,               // uninit reporter
+};
+
+/**
+ * @name  MsprofReporterCallback
+ * @brief callback to start reporter/stop reporter/report date
+ * @param moduleId  [IN] enum MsprofReporterModuleId
+ * @param type      [IN] enum MsprofReporterCallbackType
+ * @param data      [IN] callback data (nullptr on INTI/UNINIT)
+ * @param len       [IN] callback data size (0 on INIT/UNINIT)
+ * @return enum MsprofErrorCode
+ */
+typedef int32_t (*MsprofReporterCallback)(uint32_t moduleId, uint32_t type, void *data, uint32_t len);
+
+
+#define MSPROF_OPTIONS_DEF_LEN_MAX (2048)
+
+/**
+ * @name  MsprofGeOptions
+ * @brief struct of MSPROF_CTRL_INIT_GE_OPTIONS
+ */
+struct MsprofGeOptions {
+    char jobId[MSPROF_OPTIONS_DEF_LEN_MAX];
+    char options[MSPROF_OPTIONS_DEF_LEN_MAX];
+};
+
+/**
+ * @name  MsprofCtrlCallbackType
+ * @brief ctrl callback request type
+ */
+enum MsprofCtrlCallbackType {
+    MSPROF_CTRL_INIT_ACL_ENV = 0,           // start profiling with acl env
+    MSPROF_CTRL_INIT_ACL_JSON,              // start profiling with acl.json
+    MSPROF_CTRL_INIT_GE_OPTIONS,            // start profiling with ge env and options
+    MSPROF_CTRL_FINALIZE                    // stop profiling
+};
+
+/**
+ * @name  MsprofCtrlCallback
+ * @brief callback to start/stop profiling
+ * @param type      [IN] enum MsprofCtrlCallbackType
+ * @param data      [IN] callback data
+ * @param len       [IN] callback data size
+ * @return enum MsprofErrorCode
+ */
+typedef int32_t (*MsprofCtrlCallback)(uint32_t type, void *data, uint32_t len);
+
+/**
+ * @name  MsprofSetDeviceCallback
+ * @brief callback to notify set/reset device
+ * @param devId     [IN] device id
+ * @param isOpenDevice  [IN] true: set device, false: reset device
+ */
+typedef void (*MsprofSetDeviceCallback)(uint32_t devId, bool isOpenDevice);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MSPROFILER_PROF_CALLBACK_H_
diff --git a/third_party/fwkacllib/inc/toolchain/prof_reporter.h b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
index c734380c..ff91351b 100644
--- a/third_party/fwkacllib/inc/toolchain/prof_reporter.h
+++ b/third_party/fwkacllib/inc/toolchain/prof_reporter.h
@@ -16,7 +16,17 @@
 
 #ifndef MSPROF_ENGINE_PROF_REPORTER_H_
 #define MSPROF_ENGINE_PROF_REPORTER_H_
+#ifndef OS_TYPE
+#define OS_TYPE 0
+#endif // OS_TYPE
+
+#if (OS_TYPE != LINUX)
+#define MSVP_PROF_API __declspec(dllexport)
+#else
 #define MSVP_PROF_API __attribute__((visibility("default")))
+#endif
+
+#include "prof_callback.h"
 
 /**
  * @file prof_reporter.h
@@ -25,20 +35,6 @@
  */
 namespace Msprof {
 namespace Engine {
-/// the max tag length
-#define MSPROF_ENGINE_MAX_TAG_LEN (31)
-/**
- * @ingroup reporter
- * @brief struct ReporterData
- * the sturct of the data send to libmsprof
- */
-struct ReporterData {
-  char tag[MSPROF_ENGINE_MAX_TAG_LEN + 1];  ///< the sub-type of the module, data with different tag will be writen
-  int deviceId;                             ///< the physical id of device
-  size_t dataLen;                           ///< the length of send data
-  unsigned char *data;                      ///< the data content
-};
-
 /**
  * @ingroup reporter
  * @brief class Reporter
@@ -86,4 +82,4 @@ class MSVP_PROF_API Reporter {
 }  // namespace Engine
 }  // namespace Msprof
 
-#endif  // MSPROF_ENGINE_PROF_REPORTER_H_
\ No newline at end of file
+#endif  // MSPROF_ENGINE_PROF_REPORTER_H_
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index bce58f32..5faca0ae 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -18,7 +18,9 @@
 #define D_SYSLOG_H_
 
 #ifdef __cplusplus
+#ifndef LOG_CPP
 extern "C" {
+#endif
 #endif // __cplusplus
 
 #ifndef LINUX
@@ -105,6 +107,7 @@ extern "C" {
 #define SECURITY_LOG_MASK   (0x00100000)
 #define RUN_LOG_MASK        (0x01000000)
 #define OPERATION_LOG_MASK  (0x10000000)
+#define RESERVERD_LENGTH 52
 
 typedef struct tagDCODE {
   const char *cName;
@@ -116,6 +119,18 @@ typedef struct tagKV {
   char *value;
 } KeyValue;
 
+typedef enum {
+    APPLICATION = 0,
+    SYSTEM
+} ProcessType;
+
+typedef struct {
+    ProcessType type;
+    unsigned int pid;
+    unsigned int deviceId;
+    char reserved[RESERVERD_LENGTH];
+} LogAttr;
+
 /**
  * @ingroup slog
  *
@@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent);
  */
 DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel);
 
+/**
+ * @ingroup slog
+ * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION
+ * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID)
+ * @return: 0: SUCCEED, others: FAILED
+ */
+DLL_EXPORT int DlogSetAttr(LogAttr logAttr);
+
 /**
  * @ingroup slog
  * @brief dlog_error: print error log
@@ -367,6 +390,8 @@ void DlogInner(int moduleId, int level, const char *fmt, ...);
 void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...);
 
 #ifdef __cplusplus
+#ifndef LOG_CPP
 }
+#endif // LOG_CPP
 #endif // __cplusplus
 #endif // D_SYSLOG_H_