@@ -1058,6 +1058,14 @@ ge::Status GeExecutor::ExecuteAsync(DynamicSingleOp *executor, const vector<GeTe | |||
} | |||
Status GeExecutor::ReleaseSingleOpResource(void *stream) { | |||
// get current context | |||
rtContext_t rt_cur_ctx = nullptr; | |||
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | |||
if (rt_err != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | |||
return RT_FAILED; | |||
} | |||
ModelManager::GetInstance()->ClearAICPUSo(rt_cur_ctx); | |||
return SingleOpManager::GetInstance().ReleaseResource(stream); | |||
} | |||
@@ -236,6 +236,7 @@ ModelManager::~ModelManager() { | |||
std::lock_guard<std::mutex> lock(map_mutex_); | |||
model_map_.clear(); | |||
model_aicpu_kernel_.clear(); | |||
cust_aicpu_so_.clear(); | |||
GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); | |||
} | |||
@@ -399,7 +400,6 @@ Status ModelManager::Unload(uint32_t model_id) { | |||
} | |||
std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | |||
exception_infos_.clear(); | |||
cust_aicpu_so_.clear(); | |||
return SUCCESS; | |||
} | |||
@@ -1096,15 +1096,47 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) { | |||
return SUCCESS; | |||
} | |||
Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, string so_name) { | |||
Status ModelManager::LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name) { | |||
GELOGI("LoadCustAicpuSo in, op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); | |||
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | |||
auto it = cust_aicpu_so_.find(so_name); | |||
// get current context | |||
rtContext_t rt_cur_ctx = nullptr; | |||
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | |||
if (rt_err != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | |||
return RT_FAILED; | |||
} | |||
// use current context as resource key instead | |||
resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx); | |||
auto it = cust_aicpu_so_.find(resource_id); | |||
if (it == cust_aicpu_so_.end()) { | |||
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", | |||
op_desc->GetName().c_str(), so_name.c_str()); | |||
(void)cust_aicpu_so_.insert(so_name); | |||
GELOGI("LaunchCustAicpuSo op name %s, so_name %s.", op_desc->GetName().c_str(), so_name.c_str()); | |||
std::set<string> so_name_set; | |||
so_name_set.insert(so_name); | |||
cust_aicpu_so_[resource_id] = so_name_set; | |||
GELOGI("LoadCustAicpuSo new aicpu so resource_id %lu.", resource_id); | |||
return SUCCESS; | |||
} | |||
auto it_so_name = it->second.find(so_name); | |||
if (it_so_name == it->second.end()) { | |||
GE_CHK_STATUS_RET(LaunchCustAicpuSo(op_desc, so_name), "LaunchCustAicpuSo failed. op name %s, so_name %s", | |||
op_desc->GetName().c_str(), so_name.c_str()); | |||
it->second.insert(so_name); | |||
GELOGI("LoadCustAicpuSo add aicpu so resource_id %lu.", resource_id); | |||
} | |||
return SUCCESS; | |||
} | |||
Status ModelManager::ClearAICPUSo(void *ctx) { | |||
auto ctx_id = reinterpret_cast<uintptr_t>(rt_cur_ctx); | |||
GELOGI("ClearAICPUSo in. resource_id = 0x%lx.", static_cast<uint64_t>(ctx_id)); | |||
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | |||
auto it = cust_aicpu_so_.find(ctx_id); | |||
if (it == cust_aicpu_so_.end()) { | |||
return SUCCESS; | |||
} | |||
(void)cust_aicpu_so_.erase(it); | |||
return SUCCESS; | |||
} | |||
@@ -1140,6 +1172,7 @@ Status ModelManager::LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name) | |||
GE_CHK_RT(rtStreamCreate(&stream, 0)); | |||
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kLoadOpFromBuf, 1, args, args_size, nullptr, stream)); | |||
GELOGI("LaunchCustAicpuSo so buf len %u, so name len %u.", aicpu_data_length, so_name.size()); | |||
status = rtStreamSynchronize(stream); | |||
if (status != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); | |||
@@ -270,9 +270,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
ge::Status DestroyAicpuSessionForInfer(uint32_t model_id); | |||
ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, string so_name); | |||
ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, string so_name); | |||
ge::Status LoadCustAicpuSo(const OpDescPtr op_desc, const string &so_name); | |||
ge::Status LaunchCustAicpuSo(const OpDescPtr op_desc, const string &so_name); | |||
ge::Status ClearAICPUSo(void *ctx); | |||
ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); | |||
@@ -340,7 +340,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
std::set<uint64_t> sess_ids_; | |||
std::vector<rtExceptionInfo> exception_infos_; | |||
std::mutex cust_aicpu_mutex_; | |||
std::set<std::string> cust_aicpu_so_; | |||
std::map<uintptr_t, std::set<std::string>> cust_aicpu_so_; | |||
static DumpProperties dump_properties_; | |||
}; | |||
@@ -190,6 +190,7 @@ Status SingleOpModel::LoadAllNodes() { | |||
} | |||
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc); | |||
ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc); | |||
} | |||
return SUCCESS; | |||
@@ -247,7 +248,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||
single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size()); | |||
ParseArgTable(tbe_task, single_op); | |||
single_op.tasks_.emplace_back(tbe_task); | |||
} else if (kernel_type == cce::ccKernelType::AI_CPU) { | |||
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||
GELOGD("Building AICPU_CC task"); | |||
OpTask *task = nullptr; | |||
auto ret = BuildCpuKernelTask(task_def.kernel(), &task); | |||
@@ -256,7 +257,7 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||
} | |||
single_op.tasks_.emplace_back(task); | |||
} else { | |||
GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||
GELOGE(UNSUPPORTED, "Only TBE kernel, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||
return UNSUPPORTED; | |||
} | |||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||
@@ -391,13 +392,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||
TbeOpTask *tbe_task = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | |||
single_op.op_task_.reset(tbe_task); | |||
} else if (kernel_type == cce::ccKernelType::AI_CPU) { | |||
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||
GELOGD("Building AICPU_CC task"); | |||
OpTask *task = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); | |||
single_op.op_task_.reset(task); | |||
} else { | |||
GELOGE(UNSUPPORTED, "Only TBE kernel and AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||
GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type()); | |||
return UNSUPPORTED; | |||
} | |||
return SUCCESS; | |||
@@ -15,6 +15,8 @@ | |||
*/ | |||
#include "single_op/task/aicpu_kernel_task_builder.h" | |||
#include "cce/taskdown_common.hpp" | |||
#include "graph/load/new_model_manager/model_manager.h" | |||
namespace ge { | |||
AiCpuCCTaskBuilder::AiCpuCCTaskBuilder(const OpDescPtr &op_desc, const domi::KernelDef &kernel_def) | |||
@@ -55,6 +57,14 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||
task.SetkernelName(kernel_name); | |||
task.op_desc_ = op_desc_; | |||
const auto &context = kernel_def_.context(); | |||
auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type()); | |||
if (kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||
task.is_custom_ = true; | |||
task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | |||
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); | |||
} | |||
task.num_inputs_ = op_desc_->GetInputsSize(); | |||
task.num_outputs_ = op_desc_->GetOutputsSize(); | |||
@@ -280,8 +280,6 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||
for (size_t j = 0; j < num_outputs_; ++j) { | |||
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateOutputShapeAndType(j, output_desc[j]), | |||
"Output[%zu] UpdateOutputShapeAndType failed.", j); | |||
// debug code | |||
GELOGD("No input and output, no need update ext info."); | |||
} | |||
} | |||
@@ -669,9 +667,10 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { | |||
kernel_name_.data()); | |||
// sm_desc is nullptr, because l2 buffer does not support | |||
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | |||
auto ret = | |||
rtCpuKernelLaunch(static_cast<const void *>(so_name_.data()), static_cast<const void *>(kernel_name_.data()), | |||
block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream); | |||
auto ret = rtCpuKernelLaunchWithFlag(static_cast<const void *>(so_name_.data()), | |||
static_cast<const void *>(kernel_name_.data()), | |||
block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), | |||
sm_desc, stream, dump_flag_); | |||
if (ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); | |||
return RT_FAILED; | |||
@@ -234,6 +234,8 @@ private: | |||
uint32_t block_dim_ = 1; | |||
void *sm_desc_ = nullptr; | |||
void *io_addr_ = nullptr; | |||
bool is_custom_ = false; | |||
uint32_t dump_flag_ = RT_KERNEL_DEFAULT; | |||
}; | |||
} // namespace ge | |||