From 1c10ac4821c2e8230a7ad2532ee9581acad8e232 Mon Sep 17 00:00:00 2001 From: weiyang Date: Wed, 28 Oct 2020 20:08:30 +0800 Subject: [PATCH] for cust aicpu --- ge/executor/ge_executor.cc | 9 +- .../load/new_model_manager/davinci_model.cc | 2 + .../load/new_model_manager/model_manager.cc | 180 ++++++++++++------ .../load/new_model_manager/model_manager.h | 12 +- .../aicpu/aicpu_node_executor.cc | 16 ++ ge/single_op/single_op_model.cc | 3 + 6 files changed, 149 insertions(+), 73 deletions(-) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 12b726ab..ad2879c2 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -1058,14 +1058,7 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector(rt_err)); - return RT_FAILED; - } - ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx); + ModelManager::GetInstance()->ClearAicpuSo(); return SingleOpManager::GetInstance().ReleaseResource(stream); } diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 6d255cd1..cf9be89a 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -463,6 +463,8 @@ Status DavinciModel::DoTaskSink() { GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 8d4cd01e..27ff0614 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -44,12 +44,18 @@ const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; const std::string kCmdTypeProfStop = "prof_stop"; const char *const kLoadOpFromBuf = "loadOpFromBuf"; +const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; +const char *const kDeleteCustOp = "deleteCustOp"; struct CustAicpuSoBuf { uint64_t kernelSoBuf; uint32_t kernelSoBufLen; uint64_t kernelSoName; uint32_t kernelSoNameLen; } __attribute__((packed)); +struct BatchLoadOpFromBufArgs { + uint32_t soNum; + uint64_t args; +} __attribute__((packed)); } // namespace DumpProperties ModelManager::dump_properties_; @@ -1096,97 +1102,149 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { return SUCCESS; } -Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { - GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); +Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) { + GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); + CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); + if (aicpu_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + // get current context rtContext_t rt_cur_ctx = nullptr; - auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); - if (rt_err != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_err)); + auto rt_error = rtCtxGetCurrent(&rt_cur_ctx); + if (rt_error != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_error)); return RT_FAILED; } - // use current context as resource key instead + + // use current context as resource key uintptr_t resource_id = reinterpret_cast(rt_cur_ctx); auto it = cust_aicpu_so_.find(resource_id); if (it == cust_aicpu_so_.end()) { - GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", - op_desc->GetName().c_str(), so_name.c_str()); - std::set so_name_set; - so_name_set.insert(so_name); - cust_aicpu_so_[resource_id] = so_name_set; - GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id); + std::map new_so_name; + new_so_name.insert({so_name, aicpu_kernel}); + cust_aicpu_so_[resource_id] = new_so_name; + GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id); return SUCCESS; } auto it_so_name = it->second.find(so_name); if (it_so_name == it->second.end()) { - GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", - op_desc->GetName().c_str(), so_name.c_str()); - it->second.insert(so_name); - GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id); + it->second.insert({so_name, aicpu_kernel}); + GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id); } return SUCCESS; } -Status ModelManager::ClearAICPUSo(void *ctx) { - auto ctx_id = reinterpret_cast(ctx); - GELOGI("ClearAICPUSo in. resource id = 0x%lx", static_cast(ctx_id)); +Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { + GELOGI("LaunchCustAucpuSo in, kernel name %s", kernel_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); - auto it = cust_aicpu_so_.find(ctx_id); + if (cust_aicpu_so_.size() == 0) return SUCCESS; + // get current context + rtContext_t rt_cur_ctx = nullptr; + auto rt_error = rtCtxGetCurrent(&rt_cur_ctx); + if (rt_error != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_error)); + return RT_FAILED; + } + uintptr_t resource_id = reinterpret_cast(rt_cur_ctx); + auto it = cust_aicpu_so_.find(resource_id); if (it == cust_aicpu_so_.end()) { + GELOGI("Cust aicpu so map is empty, context id %lu", resource_id); return SUCCESS; } - (void)cust_aicpu_so_.erase(it); - return SUCCESS; -} - -Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { - CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); - if (aicpu_kernel == nullptr) { - GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); - return INTERNAL_ERROR; - } - const void *aicpu_data = aicpu_kernel->GetBinData(); - uint32_t aicpu_data_length = aicpu_kernel->GetBinDataSize(); - void *d_aicpu_data = nullptr; - void *d_so_name = nullptr; - void *args = nullptr; + vector allocated_mem; rtError_t status; rtStream_t stream = nullptr; - GE_CHK_RT(rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM)); - GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT(rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM)); - GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), - so_name.size(), RT_MEMCPY_HOST_TO_DEVICE)); - - CustAicpuSoBuf cust_aicpu_so_buf; - cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast(reinterpret_cast(d_aicpu_data)); - cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length; - cust_aicpu_so_buf.kernelSoName = reinterpret_cast(reinterpret_cast(d_so_name)); - cust_aicpu_so_buf.kernelSoNameLen = so_name.size(); - - uint32_t args_size = sizeof(CustAicpuSoBuf); - GE_CHK_RT(rtMalloc(&args, args_size, RT_MEMORY_HBM)); - GE_CHK_RT(rtMemcpy(args, args_size, static_cast(&cust_aicpu_so_buf), args_size, RT_MEMCPY_HOST_TO_DEVICE)); + vector v_cust_so; + void *args = nullptr; + + for (const auto &it_so : it->second) { + const void *aicpu_data = it_so.second->GetBinData(); + uint32_t aicpu_data_length = it_so.second->GetBinDataSize(); + string so_name = it_so.first; + void *d_aicpu_data = nullptr; + void *d_so_name = nullptr; + + status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_aicpu_data); + status = rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(d_so_name); + GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), + so_name.size(), RT_MEMCPY_HOST_TO_DEVICE)); + + CustAicpuSoBuf cust_aicpu_so_buf; + cust_aicpu_so_buf.kernelSoBuf = reinterpret_cast(reinterpret_cast(d_aicpu_data)); + cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length; + cust_aicpu_so_buf.kernelSoName = reinterpret_cast(reinterpret_cast(d_so_name)); + cust_aicpu_so_buf.kernelSoNameLen = so_name.size(); + v_cust_so.push_back(cust_aicpu_so_buf); + } + if (kernel_name == kDeleteCustOp) { + (void)cust_aicpu_so_.erase(it); + } + + uint32_t args_size = sizeof(CustAicpuSoBuf) * v_cust_so.size(); + status = rtMalloc(&args, args_size, RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(args); + GE_CHK_RT(rtMemcpy(args, args_size, v_cust_so.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE)); + + BatchLoadOpFromBufArgs batch_cust_so; + batch_cust_so.soNum = v_cust_so.size(); + batch_cust_so.args = reinterpret_cast(reinterpret_cast(args)); + + void *batch_args = nullptr; + uint32_t batch_args_size = sizeof(BatchLoadOpFromBufArgs); + status = rtMalloc(&batch_args, batch_args_size, RT_MEMORY_HBM); + if (status != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + return RT_ERROR_TO_GE_STATUS(status); + } + allocated_mem.push_back(batch_args); + GE_CHK_RT(rtMemcpy(batch_args, batch_args_size, static_cast(&batch_cust_so), + batch_args_size, RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtStreamCreate(&stream, 0)); - GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); + GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, batch_args, batch_args_size, nullptr, stream)); - GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size()); status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); - GE_CHK_RT(rtStreamDestroy(stream)); - GE_CHK_RT(rtFree(args)); - GE_CHK_RT(rtFree(d_aicpu_data)); - GE_CHK_RT(rtFree(d_so_name)); return RT_ERROR_TO_GE_STATUS(status); } - GE_CHK_RT(rtStreamDestroy(stream)); - GE_CHK_RT(rtFree(args)); - GE_CHK_RT(rtFree(d_aicpu_data)); - GE_CHK_RT(rtFree(d_so_name)); - GELOGI("Cpu kernel launch loadOpFromBuf task success."); + std::function callback = [&]() { + for (auto mem : allocated_mem) { + GE_CHK_RT(rtFree(mem)); + } + GE_CHK_RT(rtStreamDestroy(stream)); + }; + GE_MAKE_GUARD(release, callback); + GELOGI("Cpu kernel launch task success."); + return SUCCESS; +} + +Status ModelManager::ClearAicpuSo() { + GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kDeleteCustOp), "delete cust op so failed."); + return SUCCESS; +} + +Status ModelManager::LaunchCustAicpuSo() { + GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kBatchLoadBuf), "launch cust op so failed."); return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 9e8f61db..d6a89d6b 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -270,9 +270,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); - ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name); - ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name); - ge::Status ClearAICPUSo(void *ctx); + ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name); + + ge::Status LaunchCustAicpuSo(); + + ge::Status ClearAicpuSo(); + + ge::Status LaunchKernelCustAicpuSo(const string &kernel_name); ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); @@ -340,7 +344,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::set sess_ids_; std::vector exception_infos_; std::mutex cust_aicpu_mutex_; - std::map> cust_aicpu_so_; + std::map> cust_aicpu_so_; static DumpProperties dump_properties_; }; diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index fa379ed6..61af55dd 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -15,6 +15,7 @@ */ #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" +#include "cce/taskdown_common.hpp" #include "common/formats/formats.h" #include "aicpu/common/aicpu_task_struct.h" #include "graph/load/new_model_manager/model_manager.h" @@ -630,6 +631,16 @@ Status AicpuNodeTask::Init(const HybridModel &model) { auto &args = kernel_def.args(); args_size_ = kernel_def.args_size(); + const std::string &so_name = kernel_def.so_name(); + const OpDescPtr op_desc = MakeShared(*(node_item_->op_desc)); + const auto &context = kernel_def.context(); + auto kernel_type = static_cast(context.kernel_type()); + if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + + } + GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED, "Node[%s] task def args.size=%zu, but args_size=%u.", node_name.c_str(), args.size(), args_size_); @@ -716,7 +727,12 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { GELOGI("Node[%s] launch task start. unknown_type=%d.", node_name_.c_str(), unknown_type_); const auto &so_name = task_def_.kernel().so_name(); const auto &kernel_name = task_def_.kernel().kernel_name(); + const auto &kcontext = task_def_.kernel().context(); + auto kernel_type = static_cast(kcontext.kernel_type()); uint32_t flag = RT_KERNEL_DEFAULT; + if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { + flag |= RT_KERNEL_CUSTOM_AICPU; + } auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), reinterpret_cast(kernel_name.c_str()), 1, // default core dim is 1 diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index a1e1e7dd..98d56046 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -31,6 +31,7 @@ #include "task/aicpu_task_builder.h" #include "task/aicpu_kernel_task_builder.h" #include "task/tbe_task_builder.h" +#include "graph/load/new_model_manager/model_manager.h" static std::atomic aicpu_sessionid(0); @@ -277,6 +278,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { GELOGD("Skip task type: %d", static_cast(task_type)); } } + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); return SUCCESS; } @@ -448,6 +450,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { GELOGD("Skip task type: %d", static_cast(task_type)); } } + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); return SUCCESS; }