Support for custom operator.

3 years ago · b7928898f3
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -1058,6 +1058,14 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTe
 }

 Status GeExecutor::ReleaseSingleOpResource(void *stream) {
  // get current context
  rtContext_t rt_cur_ctx = nullptr;
  auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
  if (rt_err != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
    return RT_FAILED;
  }
  ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx);
  return SingleOpManager::GetInstance().ReleaseResource(stream);
 }

--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -236,6 +236,7 @@ ModelManager::~ModelManager() {
  std::lock_guard<std::mutex> lock(map_mutex_);
  model_map_.clear();
  model_aicpu_kernel_.clear();
  cust_aicpu_so_.clear();

  GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0)));
 }
@@ -399,7 +400,6 @@ Status ModelManager::Unload(uint32_t model_id) {
  }
  std::lock_guard<std::mutex> lock(exeception_infos_mutex_);
  exception_infos_.clear();
  cust_aicpu_so_.clear();
  return SUCCESS;
 }

@@ -1096,15 +1096,47 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
  return SUCCESS;
 }

 Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) {
 Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) {
  GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str());
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  auto it = cust_aicpu_so_.find(so_name);
  // get current context
  rtContext_t rt_cur_ctx = nullptr;
  auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
  if (rt_err != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
    return RT_FAILED;
  }
  // use current context as resource key instead
  resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
  auto it = cust_aicpu_so_.find(resource_id);
  if (it == cust_aicpu_so_.end()) {
    GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s",
                      op_desc->GetName().c_str(), so_name.c_str());
    (void)cust_aicpu_so_.insert(so_name);
    GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str());
    std::set<string> so_name_set;
    so_name_set.insert(so_name);
    cust_aicpu_so_[resource_id] = so_name_set;
    GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id);
    return SUCCESS;
  }
  auto it_so_name = it->second.find(so_name);
  if (it_so_name == it->second.end()) {
    GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s",
                      op_desc->GetName().c_str(), so_name.c_str());
    it->second.insert(so_name);
    GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id);
  }
  return SUCCESS;
 }

 Status ModelManager::ClearAICPUSo(void *ctx) {
  auto ctx_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
  GELOGI("ClearAICPUSo in. resource_id = 0x%lx.", static_cast<uint64_t>(ctx_id));
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  auto it = cust_aicpu_so_.find(ctx_id);
  if (it == cust_aicpu_so_.end()) {
    return SUCCESS;
  }
  (void)cust_aicpu_so_.erase(it);
  return SUCCESS;
 }

@@ -1140,6 +1172,7 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name)
  GE_CHK_RT(rtStreamCreate(&stream, 0));
  GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream));

  GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size());
  status = rtStreamSynchronize(stream);
  if (status != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -270,9 +270,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

  ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name);

  ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name);
  ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name);
  ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name);
  ge::Status ClearAICPUSo(void *ctx);

  ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);

@@ -340,7 +340,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  std::set<uint64_t> sess_ids_;
  std::vector<rtExceptionInfo> exception_infos_;
  std::mutex cust_aicpu_mutex_;
  std::set<std::string> cust_aicpu_so_;
  std::map<uintptr_t, std::set<std::string>> cust_aicpu_so_;

  static DumpProperties dump_properties_;
 };
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -190,6 +190,7 @@ Status SingleOpModel::LoadAllNodes() {
    }

    ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc);
    ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc);
  }

  return SUCCESS;
@@ -247,7 +248,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
        single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
        ParseArgTable(tbe_task, single_op);
        single_op.tasks_.emplace_back(tbe_task);
      } else if (kernel_type == cce::ccKernelType::AI_CPU) {
      } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
        GELOGD("Building AICPU_CC task");
        OpTask *task = nullptr;
        auto ret = BuildCpuKernelTask(task_def.kernel(), &task);
@@ -256,7 +257,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
        }
        single_op.tasks_.emplace_back(task);
      } else {
        GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type());
        GELOGE(UNSUPPORTED, "Only TBE kernel, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
        return UNSUPPORTED;
      }
    } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
@@ -391,13 +392,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
    TbeOpTask *tbe_task = nullptr;
    GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
    single_op.op_task_.reset(tbe_task);
  } else if (kernel_type == cce::ccKernelType::AI_CPU) {
  } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
    GELOGD("Building AICPU_CC task");
    OpTask *task = nullptr;
    GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task));
    single_op.op_task_.reset(task);
  } else {
    GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type());
    GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
    return UNSUPPORTED;
  }
  return SUCCESS;
--- a/ge/single_op/task/aicpu_kernel_task_builder.cc
+++ b/ge/single_op/task/aicpu_kernel_task_builder.cc
@@ -15,6 +15,8 @@
 */

 #include "single_op/task/aicpu_kernel_task_builder.h"
 #include "cce/taskdown_common.hpp"
 #include "graph/load/new_model_manager/model_manager.h"

 namespace ge {
 AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def)
@@ -55,6 +57,14 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) {
  task.SetkernelName(kernel_name);
  task.op_desc_ = op_desc_;

  const auto &context = kernel_def_.context();
  auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
  if (kernel_type == cce::ccKernelType::CUST_AI_CPU) {
    task.is_custom_ = true;
    task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed");
  }

  task.num_inputs_ = op_desc_->GetInputsSize();
  task.num_outputs_ = op_desc_->GetOutputsSize();

--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -280,8 +280,6 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc,
    for (size_t j = 0; j < num_outputs_; ++j) {
      GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]),
                        "Output[%zu] UpdateOutputShapeAndType failed.", j);
      // debug code
      GELOGD("No input and output, no need update ext info.");
    }
  }

@@ -669,9 +667,10 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) {
         kernel_name_.data());
  // sm_desc is nullptr, because l2 buffer does not support
  auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
  auto ret =
      rtCpuKernelLaunch(static_cast<const void *>(so_name_.data()), static_cast<const void *>(kernel_name_.data()),
                        block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream);
  auto ret = rtCpuKernelLaunchWithFlag(static_cast<const void *>(so_name_.data()),
                                       static_cast<const void *>(kernel_name_.data()),
                                       block_dim_, args_.get(), static_cast<uint32_t>(arg_size_),
                                       sm_desc, stream, dump_flag_);
  if (ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret);
    return RT_FAILED;
--- a/ge/single_op/task/op_task.h
+++ b/ge/single_op/task/op_task.h
@@ -234,6 +234,8 @@ private:
  uint32_t block_dim_ = 1;
  void *sm_desc_ = nullptr;
  void *io_addr_ = nullptr;
  bool is_custom_ = false;
  uint32_t dump_flag_ = RT_KERNEL_DEFAULT;
 };
 }  // namespace ge