From 8291221f64bfebc1c572a1410899830594de9d16 Mon Sep 17 00:00:00 2001 From: weiyang Date: Sat, 5 Dec 2020 15:07:17 +0800 Subject: [PATCH] fix cust aicpu --- ge/graph/load/new_model_manager/model_manager.cc | 14 ++++++++++---- ge/graph/load/new_model_manager/model_manager.h | 2 +- .../task_info/kernel_task_info.cc | 4 +++- .../task_info/super_kernel/super_kernel.cc | 2 +- .../node_executor/aicpu/aicpu_node_executor.cc | 8 ++++++-- ge/single_op/task/aicpu_kernel_task_builder.cc | 8 ++++++-- 6 files changed, 27 insertions(+), 11 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 74c37a1b..5d9b6e65 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1243,8 +1243,8 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } -Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) { - GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); +Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) { + GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); if (aicpu_kernel == nullptr) { @@ -1267,18 +1267,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ std::map new_so_name; new_so_name.insert({so_name, aicpu_kernel}); cust_aicpu_so_[resource_id] = new_so_name; - GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); return SUCCESS; } auto it_so_name = it->second.find(so_name); if (it_so_name == it->second.end()) { it->second.insert({so_name, aicpu_kernel}); - GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id); + loaded = false; + GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id); + return SUCCESS; } + loaded = true; + GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str()); return SUCCESS; } Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { + GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); if (cust_aicpu_so_.size() == 0) return SUCCESS; // get current context diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 9821a4ab..c1faed82 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -286,7 +286,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); - ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name); + ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded); ge::Status LaunchCustAicpuSo(); diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 3e3a715d..7b11c53e 100755 --- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -875,7 +875,9 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k } if (kernel_type_ == ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed"); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded), + "launch cust aicpu so failed"); } // copy args to new host memory diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc index e94fa425..a4d14fb0 100644 --- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc +++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc @@ -25,7 +25,7 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { const void *args[] = {this->GetNavTablePtr(), reinterpret_cast(static_cast(this->GetNavTableSize()))}; - rtError_t rt_ret = rtMalloc(reinterpret_cast(device_args_addr_), sizeof(args), RT_MEMORY_HBM); + rtError_t rt_ret = rtMalloc(reinterpret_cast(&device_args_addr_), sizeof(args), RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(reinterpret_cast(device_args_addr_), sizeof(args), (void *)args, sizeof(args), diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 573739bc..38407160 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -644,8 +644,12 @@ Status AicpuNodeTask::Init(const HybridModel &model) { const auto &context = kernel_def.context(); auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::CUST_AI_CPU) { - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded), + "load cust aicpu so failed."); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + } } GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index 600c9c29..0b459e7a 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -62,8 +62,12 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { if (kernel_type == ccKernelType::CUST_AI_CPU) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + bool loaded = false; + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded), + "launch cust aicpu so failed"); + if (!loaded) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); + } } task.num_inputs_ = op_desc_->GetInputsSize();