diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index a4b3de75..ee358b5c 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin // 1. Copy context from kernelExDef.private to workspace uint32_t op_index = kernel_ex_def.op_index(); OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); + op_desc_ = op_desc; if (op_desc == nullptr) { REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); @@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() { if (topic_type_flag_ > 0) { dump_flag_ = dump_flag_ | topic_type_flag_; } - rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); + rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index 1b77b715..7d07eb7f 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo { uint32_t dump_flag_; uint32_t kernel_buf_size_; DavinciModel *davinci_model_; + OpDescPtr op_desc_; void *kernel_buf_; void *input_output_addr_; void *ext_info_addr_; diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc index 07ad63ca..6ddaaf5c 100755 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -440,9 +440,9 @@ Status KernelTaskInfo::Distribute() { } GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); // blockDim is reserved parameter, set to 1 - rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name_.c_str()), - reinterpret_cast(kernel_name_.c_str()), 1, args_, args_size_, - nullptr, stream_, dump_flag_); + rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_desc_->GetName().c_str()}; + rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_, + nullptr, stream_, dump_flag_); call_save_dump_ = true; } else { /* default: not skt launch */ diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index 820c9b56..cf20303c 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); - GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), + GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), RT_KERNEL_DEFAULT, context.GetStream())); RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); @@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); uint32_t flag = RT_KERNEL_DEFAULT; RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); - GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); + GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(), + kernel_buf_->GetSize(), flag, context.GetStream())); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); GELOGD("Node[%s] launch end.", node_name_.c_str()); if (need_sync_) { @@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { if (kernel_type == ccKernelType::CUST_AI_CPU) { flag |= static_cast(RT_KERNEL_CUSTOM_AICPU); } - auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast(so_name.c_str()), - reinterpret_cast(kernel_name.c_str()), - 1, // default core dim is 1 - args_.get(), args_size_, - nullptr, context.GetStream(), flag); + rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()}; + auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, + 1, // default core dim is 1 + args_.get(), args_size_, + nullptr, context.GetStream(), flag); GE_CHK_RT_RET(rt_ret); GELOGD("Node[%s] launch task end.", node_name_.c_str()); return SUCCESS; diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 0c9e2c27..510eb1ad 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { return RT_ERROR_NONE; } + +rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) { + return RT_ERROR_NONE; +} + +rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, + uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) { + return RT_ERROR_NONE; +} + +rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, + uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) { + return RT_ERROR_NONE; +} + #ifdef __cplusplus } #endif