Merge pull request !147 from 赵之轩/developmenttags/v1.1.0
| @@ -1058,6 +1058,14 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTe | |||||
| } | } | ||||
| Status GeExecutor::ReleaseSingleOpResource(void *stream) { | Status GeExecutor::ReleaseSingleOpResource(void *stream) { | ||||
| // get current context | |||||
| rtContext_t rt_cur_ctx = nullptr; | |||||
| auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | |||||
| if (rt_err != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | |||||
| return RT_FAILED; | |||||
| } | |||||
| ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx); | |||||
| return SingleOpManager::GetInstance().ReleaseResource(stream); | return SingleOpManager::GetInstance().ReleaseResource(stream); | ||||
| } | } | ||||
| @@ -236,6 +236,7 @@ ModelManager::~ModelManager() { | |||||
| std::lock_guard<std::mutex> lock(map_mutex_); | std::lock_guard<std::mutex> lock(map_mutex_); | ||||
| model_map_.clear(); | model_map_.clear(); | ||||
| model_aicpu_kernel_.clear(); | model_aicpu_kernel_.clear(); | ||||
| cust_aicpu_so_.clear(); | |||||
| GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | ||||
| } | } | ||||
| @@ -399,7 +400,6 @@ Status ModelManager::Unload(uint32_t model_id) { | |||||
| } | } | ||||
| std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | ||||
| exception_infos_.clear(); | exception_infos_.clear(); | ||||
| cust_aicpu_so_.clear(); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -1096,19 +1096,51 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) { | |||||
| Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { | |||||
| GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); | |||||
| std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | ||||
| auto it = cust_aicpu_so_.find(so_name); | |||||
| // get current context | |||||
| rtContext_t rt_cur_ctx = nullptr; | |||||
| auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | |||||
| if (rt_err != RT_ERROR_NONE) { | |||||
| GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | |||||
| return RT_FAILED; | |||||
| } | |||||
| // use current context as resource key instead | |||||
| uintptr_t resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx); | |||||
| auto it = cust_aicpu_so_.find(resource_id); | |||||
| if (it == cust_aicpu_so_.end()) { | if (it == cust_aicpu_so_.end()) { | ||||
| GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", | GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", | ||||
| op_desc->GetName().c_str(), so_name.c_str()); | op_desc->GetName().c_str(), so_name.c_str()); | ||||
| (void)cust_aicpu_so_.insert(so_name); | |||||
| GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); | |||||
| std::set<string> so_name_set; | |||||
| so_name_set.insert(so_name); | |||||
| cust_aicpu_so_[resource_id] = so_name_set; | |||||
| GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id); | |||||
| return SUCCESS; | |||||
| } | |||||
| auto it_so_name = it->second.find(so_name); | |||||
| if (it_so_name == it->second.end()) { | |||||
| GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", | |||||
| op_desc->GetName().c_str(), so_name.c_str()); | |||||
| it->second.insert(so_name); | |||||
| GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status ModelManager::ClearAICPUSo(void *ctx) { | |||||
| auto ctx_id = reinterpret_cast<uintptr_t>(ctx); | |||||
| GELOGI("ClearAICPUSo in. resource id = 0x%lx", static_cast<uint64_t>(ctx_id)); | |||||
| std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | |||||
| auto it = cust_aicpu_so_.find(ctx_id); | |||||
| if (it == cust_aicpu_so_.end()) { | |||||
| return SUCCESS; | |||||
| } | } | ||||
| (void)cust_aicpu_so_.erase(it); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) { | |||||
| Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { | |||||
| CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); | CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); | ||||
| if (aicpu_kernel == nullptr) { | if (aicpu_kernel == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); | GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); | ||||
| @@ -1140,6 +1172,7 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) | |||||
| GE_CHK_RT(rtStreamCreate(&stream, 0)); | GE_CHK_RT(rtStreamCreate(&stream, 0)); | ||||
| GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); | GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); | ||||
| GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size()); | |||||
| status = rtStreamSynchronize(stream); | status = rtStreamSynchronize(stream); | ||||
| if (status != RT_ERROR_NONE) { | if (status != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); | GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); | ||||
| @@ -270,9 +270,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | ||||
| ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name); | |||||
| ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name); | |||||
| ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name); | |||||
| ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name); | |||||
| ge::Status ClearAICPUSo(void *ctx); | |||||
| ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | ||||
| @@ -340,7 +340,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
| std::set<uint64_t> sess_ids_; | std::set<uint64_t> sess_ids_; | ||||
| std::vector<rtExceptionInfo> exception_infos_; | std::vector<rtExceptionInfo> exception_infos_; | ||||
| std::mutex cust_aicpu_mutex_; | std::mutex cust_aicpu_mutex_; | ||||
| std::set<std::string> cust_aicpu_so_; | |||||
| std::map<uintptr_t, std::set<std::string>> cust_aicpu_so_; | |||||
| static DumpProperties dump_properties_; | static DumpProperties dump_properties_; | ||||
| }; | }; | ||||
| @@ -190,6 +190,7 @@ Status SingleOpModel::LoadAllNodes() { | |||||
| } | } | ||||
| ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc); | ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc); | ||||
| ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc); | |||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -247,7 +248,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); | single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); | ||||
| ParseArgTable(tbe_task, single_op); | ParseArgTable(tbe_task, single_op); | ||||
| single_op.tasks_.emplace_back(tbe_task); | single_op.tasks_.emplace_back(tbe_task); | ||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU) { | |||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||||
| GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
| OpTask *task = nullptr; | OpTask *task = nullptr; | ||||
| auto ret = BuildCpuKernelTask(task_def.kernel(), &task); | auto ret = BuildCpuKernelTask(task_def.kernel(), &task); | ||||
| @@ -256,7 +257,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| } | } | ||||
| single_op.tasks_.emplace_back(task); | single_op.tasks_.emplace_back(task); | ||||
| } else { | } else { | ||||
| GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||||
| GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | ||||
| @@ -391,13 +392,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | ||||
| single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU) { | |||||
| } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||||
| GELOGD("Building AICPU_CC task"); | GELOGD("Building AICPU_CC task"); | ||||
| OpTask *task = nullptr; | OpTask *task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); | GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); | ||||
| single_op.op_task_.reset(task); | single_op.op_task_.reset(task); | ||||
| } else { | } else { | ||||
| GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||||
| GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||||
| return UNSUPPORTED; | return UNSUPPORTED; | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -15,6 +15,8 @@ | |||||
| */ | */ | ||||
| #include "single_op/task/aicpu_kernel_task_builder.h" | #include "single_op/task/aicpu_kernel_task_builder.h" | ||||
| #include "cce/taskdown_common.hpp" | |||||
| #include "graph/load/new_model_manager/model_manager.h" | |||||
| namespace ge { | namespace ge { | ||||
| AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def) | AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def) | ||||
| @@ -55,6 +57,14 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||||
| task.SetkernelName(kernel_name); | task.SetkernelName(kernel_name); | ||||
| task.op_desc_ = op_desc_; | task.op_desc_ = op_desc_; | ||||
| const auto &context = kernel_def_.context(); | |||||
| auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type()); | |||||
| if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||||
| task.is_custom_ = true; | |||||
| task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | |||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); | |||||
| } | |||||
| task.num_inputs_ = op_desc_->GetInputsSize(); | task.num_inputs_ = op_desc_->GetInputsSize(); | ||||
| task.num_outputs_ = op_desc_->GetOutputsSize(); | task.num_outputs_ = op_desc_->GetOutputsSize(); | ||||
| @@ -280,8 +280,6 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
| for (size_t j = 0; j < num_outputs_; ++j) { | for (size_t j = 0; j < num_outputs_; ++j) { | ||||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), | GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), | ||||
| "Output[%zu] UpdateOutputShapeAndType failed.", j); | "Output[%zu] UpdateOutputShapeAndType failed.", j); | ||||
| // debug code | |||||
| GELOGD("No input and output, no need update ext info."); | |||||
| } | } | ||||
| } | } | ||||
| @@ -669,9 +667,10 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { | |||||
| kernel_name_.data()); | kernel_name_.data()); | ||||
| // sm_desc is nullptr, because l2 buffer does not support | // sm_desc is nullptr, because l2 buffer does not support | ||||
| auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | ||||
| auto ret = | |||||
| rtCpuKernelLaunch(static_cast<const void *>(so_name_.data()), static_cast<const void *>(kernel_name_.data()), | |||||
| block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream); | |||||
| auto ret = rtCpuKernelLaunchWithFlag(static_cast<const void *>(so_name_.data()), | |||||
| static_cast<const void *>(kernel_name_.data()), | |||||
| block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), | |||||
| sm_desc, stream, dump_flag_); | |||||
| if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); | GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); | ||||
| return RT_FAILED; | return RT_FAILED; | ||||
| @@ -234,6 +234,8 @@ private: | |||||
| uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
| void *sm_desc_ = nullptr; | void *sm_desc_ = nullptr; | ||||
| void *io_addr_ = nullptr; | void *io_addr_ = nullptr; | ||||
| bool is_custom_ = false; | |||||
| uint32_t dump_flag_ = RT_KERNEL_DEFAULT; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||