diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 2e668755..12b726ab 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -1058,6 +1058,14 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector(rt_err)); + return RT_FAILED; + } + ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx); return SingleOpManager::GetInstance().ReleaseResource(stream); } diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 4224d3f2..960b814e 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -236,6 +236,7 @@ ModelManager::~ModelManager() { std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); + cust_aicpu_so_.clear(); GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); } @@ -399,7 +400,6 @@ Status ModelManager::Unload(uint32_t model_id) { } std::lock_guard lock(exeception_infos_mutex_); exception_infos_.clear(); - cust_aicpu_so_.clear(); return SUCCESS; } @@ -1096,15 +1096,47 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } -Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) { +Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { + GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); - auto it = cust_aicpu_so_.find(so_name); + // get current context + rtContext_t rt_cur_ctx = nullptr; + auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); + if (rt_err != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_err)); + return RT_FAILED; + } + // use current context as resource key instead + resource_id = reinterpret_cast(rt_cur_ctx); + auto it = cust_aicpu_so_.find(resource_id); if (it == cust_aicpu_so_.end()) { GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", op_desc->GetName().c_str(), so_name.c_str()); - (void)cust_aicpu_so_.insert(so_name); - GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); + std::set so_name_set; + so_name_set.insert(so_name); + cust_aicpu_so_[resource_id] = so_name_set; + GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id); + return SUCCESS; + } + auto it_so_name = it->second.find(so_name); + if (it_so_name == it->second.end()) { + GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", + op_desc->GetName().c_str(), so_name.c_str()); + it->second.insert(so_name); + GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id); + } + return SUCCESS; +} + +Status ModelManager::ClearAICPUSo(void *ctx) { + auto ctx_id = reinterpret_cast(rt_cur_ctx); + GELOGI("ClearAICPUSo in. resource_id = 0x%lx.", static_cast(ctx_id)); + std::lock_guard lock(cust_aicpu_mutex_); + auto it = cust_aicpu_so_.find(ctx_id); + if (it == cust_aicpu_so_.end()) { + return SUCCESS; } + (void)cust_aicpu_so_.erase(it); return SUCCESS; } @@ -1140,6 +1172,7 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); + GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size()); status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index d4852a53..9e8f61db 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -270,9 +270,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); - ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name); - - ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name); + ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name); + ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name); + ge::Status ClearAICPUSo(void *ctx); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); @@ -340,7 +340,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::set sess_ids_; std::vector exception_infos_; std::mutex cust_aicpu_mutex_; - std::set cust_aicpu_so_; + std::map> cust_aicpu_so_; static DumpProperties dump_properties_; }; diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 4892b7a1..3c5043cc 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -190,6 +190,7 @@ Status SingleOpModel::LoadAllNodes() { } ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc); + ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc); } return SUCCESS; @@ -247,7 +248,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); ParseArgTable(tbe_task, single_op); single_op.tasks_.emplace_back(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU) { + } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; auto ret = BuildCpuKernelTask(task_def.kernel(), &task); @@ -256,7 +257,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { } single_op.tasks_.emplace_back(task); } else { - GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); + GELOGE(UNSUPPORTED, "Only TBE kernel, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); return UNSUPPORTED; } } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { @@ -391,13 +392,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); single_op.op_task_.reset(tbe_task); - } else if (kernel_type == cce::ccKernelType::AI_CPU) { + } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); OpTask *task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); single_op.op_task_.reset(task); } else { - GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); + GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); return UNSUPPORTED; } return SUCCESS; diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index fc7a9f97..b9c5b9d0 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -15,6 +15,8 @@ */ #include "single_op/task/aicpu_kernel_task_builder.h" +#include "cce/taskdown_common.hpp" +#include "graph/load/new_model_manager/model_manager.h" namespace ge { AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def) @@ -55,6 +57,14 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { task.SetkernelName(kernel_name); task.op_desc_ = op_desc_; + const auto &context = kernel_def_.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + task.is_custom_ = true; + task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); + } + task.num_inputs_ = op_desc_->GetInputsSize(); task.num_outputs_ = op_desc_->GetOutputsSize(); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index b138983a..f1d293d5 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -280,8 +280,6 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, for (size_t j = 0; j < num_outputs_; ++j) { GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), "Output[%zu] UpdateOutputShapeAndType failed.", j); - // debug code - GELOGD("No input and output, no need update ext info."); } } @@ -669,9 +667,10 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { kernel_name_.data()); // sm_desc is nullptr, because l2 buffer does not support auto *sm_desc = reinterpret_cast(sm_desc_); - auto ret = - rtCpuKernelLaunch(static_cast(so_name_.data()), static_cast(kernel_name_.data()), - block_dim_, args_.get(), static_cast(arg_size_), sm_desc, stream); + auto ret = rtCpuKernelLaunchWithFlag(static_cast(so_name_.data()), + static_cast(kernel_name_.data()), + block_dim_, args_.get(), static_cast(arg_size_), + sm_desc, stream, dump_flag_); if (ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); return RT_FAILED; diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 57be92ef..4325da33 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -234,6 +234,8 @@ private: uint32_t block_dim_ = 1; void *sm_desc_ = nullptr; void *io_addr_ = nullptr; + bool is_custom_ = false; + uint32_t dump_flag_ = RT_KERNEL_DEFAULT; }; } // namespace ge