From 1db59ce1bce062c6cd32107d8d21392cb635a187 Mon Sep 17 00:00:00 2001 From: wuweikang Date: Tue, 9 Feb 2021 11:10:11 +0800 Subject: [PATCH] invoke sub kernel with tiling_key in dynamic shape & all scene --- ge/hybrid/model/hybrid_model_builder.cc | 4 +- .../node_executor/aicore/aicore_op_task.cc | 183 ++++++++++++++--- .../node_executor/aicore/aicore_op_task.h | 34 ++++ ge/single_op/single_op_model.cc | 20 +- ge/single_op/single_op_model.h | 2 +- ge/single_op/task/op_task.cc | 30 ++- ge/single_op/task/op_task.h | 8 + ge/single_op/task/tbe_task_builder.cc | 188 ++++++++++++++---- ge/single_op/task/tbe_task_builder.h | 33 ++- tests/depends/runtime/src/runtime_stub.cc | 7 + tests/ut/ge/CMakeLists.txt | 6 + tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 101 ++++++++++ .../ge/single_op/single_op_task_unittest.cc | 117 +++++++++++ third_party/fwkacllib/inc/runtime/kernel.h | 35 ++++ third_party/fwkacllib/inc/runtime/rt_model.h | 13 ++ 15 files changed, 694 insertions(+), 87 deletions(-) create mode 100644 tests/ut/ge/hybrid/ge_hybrid_unittest.cc create mode 100644 tests/ut/ge/single_op/single_op_task_unittest.cc diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index d2862553..7ea9e446 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() { op_index = task_def.kernel_ex().op_index(); } else if (task_type == RT_MODEL_TASK_HCCL) { op_index = task_def.kernel_hccl().op_index(); + } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { + op_index = task_def.kernel_with_handle().context().op_index(); } else { GELOGD("Skip task type: %d", static_cast(task_type)); continue; @@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() { } auto &node = iter->second; - if (task_type == RT_MODEL_TASK_KERNEL) { + if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 6ab62f3f..f3699b6c 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size"; constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; } // namespace +TbeHandleHolder::TbeHandleHolder(void *bin_handle) + : bin_handle_(bin_handle) {} + +TbeHandleHolder::~TbeHandleHolder() { + if (bin_handle_ != nullptr) { + GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); + } +} + +bool TbeHandleRegistry::AddHandle(std::unique_ptr &&holder) { + auto ret = registered_handles_.emplace(std::move(holder)); + return ret.second; +} + Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); @@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { if (rt_ret != RT_ERROR_NONE || is_single_op_) { void *bin_handle = nullptr; if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { - GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); rtDevBinary_t binary; std::string json_string; GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), @@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); } else { - GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); + GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); kernel_store.ReferTBEHandle(stub_name_.c_str()); } std::string kernel_name; @@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { return SUCCESS; } -Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { - GE_CHK_STATUS_RET(ValidateTaskDef(task_def), - "[%s] Failed to validate task def: [%s]", - op_desc.GetName().c_str(), - task_def.DebugString().c_str()); +Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { + TbeHandleRegistry ®istry = TbeHandleRegistry::GetInstance(); + auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); + if (tbe_kernel == nullptr) { + GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str()); + return INTERNAL_ERROR; + } + void *bin_handle = nullptr; + GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str()); + rtDevBinary_t binary; + std::string json_string; + GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), + GELOGI("Get original type of session_graph_id.")); + if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + } else { + GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); + return PARAM_INVALID; + } + binary.version = 0; + binary.data = tbe_kernel->GetBinData(); + binary.length = tbe_kernel->GetBinDataSize(); + GELOGI("TBE: binary.length: %lu", binary.length); + GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); + handle_ = bin_handle; + auto holder = std::unique_ptr(new (std::nothrow) TbeHandleHolder(handle_)); + if (holder == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + if (!registry.AddHandle(std::move(holder))) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + return SUCCESS; +} + +Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { const domi::KernelDef &kernel_def = task_def.kernel(); const domi::KernelContext &context = kernel_def.context(); stub_name_ = kernel_def.stub_func(); - GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); - GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); args_size_ = kernel_def.args_size(); block_dim_ = kernel_def.block_dim(); - // malloc args memory args_.reset(new(std::nothrow) uint8_t[args_size_]); GE_CHECK_NOTNULL(args_); + if (kernel_def.args().size() < args_size_) { + GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); + return INTERNAL_ERROR; + } errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); if (err != EOK) { GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); @@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef block_dim_, arg_base_, args_size_); + return SUCCESS; +} + +Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) { + const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); + const domi::KernelContext &context = kernel_with_handle.context(); + + GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc)); + original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_"; + node_info_ = kernel_with_handle.node_info() + "/"; + args_size_ = kernel_with_handle.args_size(); + block_dim_ = kernel_with_handle.block_dim(); + // malloc args memory + args_.reset(new(std::nothrow) uint8_t[args_size_]); + GE_CHECK_NOTNULL(args_); + if (kernel_with_handle.args().size() < args_size_) { + GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); + return INTERNAL_ERROR; + } + errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_); + + if (err != EOK) { + GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); + return INTERNAL_ERROR; + } + if (context.args_offset().size() < sizeof(uint16_t)) { + GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size()); + return INTERNAL_ERROR; + } + + const auto *args_offset_buffer = reinterpret_cast(context.args_offset().data()); + uint32_t offset = *args_offset_buffer; + if (offset > args_size_) { + GELOGE(INTERNAL_ERROR, + "[%s] Arg offset out of range. offset = %u, arg size = %u", + GetName().c_str(), + offset, + args_size_); + return INTERNAL_ERROR; + } + + arg_base_ = reinterpret_cast(args_.get() + offset); + max_arg_count_ = (args_size_ - offset) / sizeof(void *); + return SUCCESS; +} + +Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { + GE_CHK_STATUS_RET(ValidateTaskDef(task_def), + "[%s] Failed to validate task def: [%s]", + op_desc.GetName().c_str(), + task_def.DebugString().c_str()); + + if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) { + GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def)); + } else { + GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def)); + } return SUCCESS; } Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { auto task_type = static_cast(task_def.type()); - if (task_type != RT_MODEL_TASK_KERNEL) { + if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) { GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast(task_type)); return INTERNAL_ERROR; } - - const domi::KernelDef &kernel_def = task_def.kernel(); - const domi::KernelContext &context = kernel_def.context(); + const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : + task_def.kernel_with_handle().context(); auto kernel_type = static_cast(context.kernel_type()); if (kernel_type != ccKernelType::TE) { GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast(kernel_type)); @@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { } Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { - if (tiling_buffer_ != nullptr) { + if (is_dynamic_) { return UpdateTilingInfo(context); } - return SUCCESS; } @@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { clear_atomic_ = tiling_info.clear_atomic; tiling_data_ = tiling_info.tiling_data.str(); + tiling_key_ = tiling_info.tiling_key; + GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); if (tiling_data_.empty()) { - GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str()); + GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); + return SUCCESS; + } + if (tiling_buffer_ == nullptr) { + GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!"); return INTERNAL_ERROR; } @@ -296,16 +409,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { } Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { - GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); - GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); - GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); + if (handle_ != nullptr) { + std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_); + std::string kernel_info = node_info_ + std::to_string(tiling_key_); + GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(), + block_dim_); + GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr, + stream, kernel_info.c_str())); + GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(), + block_dim_); + } else { + GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); + GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); + GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); + } return SUCCESS; } Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { - bool dynamic_supported = false; - (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); - if (!dynamic_supported) { + (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_); + if (!is_dynamic_) { GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); return SUCCESS; } @@ -314,22 +437,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { int64_t max_size = -1; (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); - if (max_size <= 0) { + if (max_size < 0) { GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); return PARAM_INVALID; } auto allocator = NpuMemoryAllocator::GetAllocator(); GE_CHECK_NOTNULL(allocator); - tiling_buffer_ = TensorBuffer::Create(allocator, static_cast(max_size)); - GE_CHECK_NOTNULL(tiling_buffer_); + if (max_size > 0) { + tiling_buffer_ = TensorBuffer::Create(allocator, static_cast(max_size)); + GE_CHECK_NOTNULL(tiling_buffer_); + GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); + } else { + GELOGD("op_param_size is 0, no need to create tiling buffer."); + } - GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); return SUCCESS; } bool AiCoreOpTask::IsDynamicShapeSupported() { - return tiling_buffer_ != nullptr; + return is_dynamic_; } const std::string &AiCoreOpTask::GetName() const { diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 69a74ea9..af09c2af 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -28,6 +28,32 @@ namespace ge { namespace hybrid { +class TbeHandleHolder { + public: + TbeHandleHolder(void *bin_handle); + ~TbeHandleHolder(); + + void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } + void *GetBinHandle() { return bin_handle_; } + + private: + friend class TbeHandleRegistry; + void *bin_handle_ = nullptr; +}; + +class TbeHandleRegistry { + public: + static TbeHandleRegistry &GetInstance() { + static TbeHandleRegistry instance; + return instance; + } + + bool AddHandle(std::unique_ptr &&holder); + + private: + std::set> registered_handles_; +}; + class AiCoreOpTask { public: AiCoreOpTask() = default; @@ -67,6 +93,9 @@ class AiCoreOpTask { Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); Status InitTilingInfo(const OpDesc &op_desc); Status RegisterTbeHandle(const OpDesc &op_desc); + Status RegisterKernelHandle(const OpDesc &op_desc); + Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def); + Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def); std::string stub_name_; void *stub_func_ = nullptr; @@ -76,6 +105,11 @@ class AiCoreOpTask { bool clear_atomic_ = true; bool is_single_op_ = false; std::vector output_indices_to_skip_; + string original_kernel_key_; + string node_info_; + uint32_t tiling_key_ = 0; + void *handle_ = nullptr; + bool is_dynamic_ = false; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 7fcb0b8f..37297fdd 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; - auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); + auto ret = BuildKernelTask(task_def, &tbe_task); if (ret != SUCCESS) { return ret; } @@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { } } -Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) { +Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { GE_CHECK_NOTNULL(task); - const auto &context = kernel_def.context(); + auto task_type = static_cast(task_def.type()); + const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : + task_def.kernel_with_handle().context(); auto iter = op_list_.find(context.op_index()); if (iter == op_list_.end()) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index()); @@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa return ACL_ERROR_GE_MEMORY_ALLOCATION; } - auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def); + auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); auto ret = builder.BuildTask(*tbe_task, model_params_); if (ret != SUCCESS) { delete tbe_task; @@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { } Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { - const domi::KernelDef &kernel_def = task_def.kernel(); - const auto &context = kernel_def.context(); + auto task_type = static_cast(task_def.type()); + const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : + task_def.kernel_with_handle().context(); + auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::TE) { GELOGD("Building TBE task"); TbeOpTask *tbe_task = nullptr; - GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); + GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); tbe_task->SetModelArgs(model_name_, model_id_); single_op.op_task_.reset(tbe_task); } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { @@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), task_def.DebugString().c_str()); auto task_type = static_cast(task_def.type()); - if (task_type == RT_MODEL_TASK_KERNEL) { + if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { if (single_op.op_task_ != nullptr) { GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 6637271c..684dab77 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -67,7 +67,7 @@ class SingleOpModel { Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); - Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); + Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index ff200806..4f1c1f03 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -93,6 +93,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr &&args, size_t arg_size op_desc_ = op_desc; } +void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, + const OpDescPtr &op_desc, + const domi::KernelDefWithHandle &kernel_def_with_handle) { + SetKernelArgs(std::move(args), arg_size, block_dim, op_desc); + original_kernel_key_ = kernel_def_with_handle.original_kernel_key(); + node_info_ = kernel_def_with_handle.node_info(); +} + void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { @@ -165,6 +173,10 @@ const std::string &TbeOpTask::GetStubName() const { return stub_name_; } uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } +void TbeOpTask::SetHandle(void *handle) { + this->handle_ = handle; +} + Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); auto *sm_desc = reinterpret_cast(sm_desc_); @@ -204,8 +216,9 @@ Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const ve } block_dim_ = run_info.block_dim; tiling_data_ = run_info.tiling_data.str(); - GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, - tiling_data_.size()); + tiling_key_ = run_info.tiling_key; + GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, + tiling_data_.size(), tiling_key_); GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); return SUCCESS; @@ -329,8 +342,17 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, } GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); - GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); - GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); + if (handle_ == nullptr) { + GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); + GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); + } else { + std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); + std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); + GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, + stream, kernel_info.c_str())); + GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); + } + return SUCCESS; } diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 78e1f6f0..be7f4aab 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -78,6 +78,8 @@ class TbeOpTask : public OpTask { void SetSmDesc(void *sm_desc); void SetStubFunc(const std::string &name, const void *stub_func); void SetKernelArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); + void SetKernelWithHandleArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, + const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); Status UpdateRunInfo(const vector &input_desc, const vector &output_desc) override; @@ -87,6 +89,7 @@ class TbeOpTask : public OpTask { const std::string &GetStubName() const; void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); uint32_t GetTaskType() const override; + void SetHandle(void *handle); private: friend class SingleOpModel; @@ -107,6 +110,11 @@ class TbeOpTask : public OpTask { std::string tiling_data_; std::vector workspaces_; NodePtr node_; + + uint32_t tiling_key_ = 0; + void* handle_ = nullptr; + std::string original_kernel_key_; + std::string node_info_; }; class AiCpuBaseTask : public OpTask { diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 6eee61d0..606f8087 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() { } } +HandleHolder::HandleHolder(void *bin_handle) + : bin_handle_(bin_handle) {} + +HandleHolder::~HandleHolder() { + if (bin_handle_ != nullptr) { + GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); + } +} + const char *KernelBinRegistry::GetUnique(const string &stub_func) { std::lock_guard lock(mutex_); auto it = unique_stubs_.find(stub_func); @@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr< return ret.second; } -TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def) +bool HandleRegistry::AddHandle(std::unique_ptr &&holder) { + auto ret = registered_handles_.emplace(std::move(holder)); + return ret.second; +} + +TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) : node_(node), op_desc_(node->GetOpDesc()), - kernel_def_(kernel_def), + task_def_(task_def), + kernel_def_(task_def.kernel()), + kernel_def_with_handle_(task_def.kernel_with_handle()), stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, @@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi binary.data = kernel_bin.GetBinData(); binary.length = kernel_bin.GetBinDataSize(); binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; - auto ret = rtDevBinaryRegister(&binary, bin_handle); + Status ret = 0; + if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { + ret = rtRegisterAllKernel(&binary, bin_handle); + } else { + ret = rtDevBinaryRegister(&binary, bin_handle); + } if (ret != RT_ERROR_NONE) { - GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), + GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), param.core_type, static_cast(ret)); return ret; } @@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle, const SingleOpModelParam ¶m) { - std::string kernel_name; - GetKernelName(op_desc_, kernel_name); - void *handle = nullptr; auto ret = DoRegisterBinary(tbe_kernel, &handle, param); if (ret != SUCCESS) { return ret; } + if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { + *bin_handle = handle; + return SUCCESS; + } ret = DoRegisterMeta(handle); if (ret != SUCCESS) { @@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const return ret; } + std::string kernel_name; + GetKernelName(op_desc_, kernel_name); ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str()); if (ret != SUCCESS) { GE_CHK_RT(rtDevBinaryUnRegister(handle)); @@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam void *bin_handle = nullptr; auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param); - if (ret == SUCCESS) { - holder->SetBinHandle(bin_handle); - if (!registry.AddKernel(stub_name_, std::move(holder))) { - // should not happen. only one thread can reach here - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); - return ACL_ERROR_GE_INTERNAL_ERROR; - } + if (ret != SUCCESS) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + holder->SetBinHandle(bin_handle); + if (!registry.AddKernel(stub_name_, std::move(holder))) { + // should not happen. only one thread can reach here + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; } } @@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam return SUCCESS; } +Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m) { + GELOGD("RegisterKernelWithHandle begin."); + HandleRegistry ®istry = HandleRegistry::GetInstance(); + auto tbe_kernel = GetTbeKernel(op_desc_); + if (tbe_kernel == nullptr) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", + op_desc_->GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + void *bin_handle = nullptr; + auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param); + if (ret != SUCCESS) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + handle_ = bin_handle; + auto holder = std::unique_ptr(new (std::nothrow) HandleHolder(handle_)); + if (holder == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + if (!registry.AddHandle(std::move(holder))) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str()); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + + return SUCCESS; +} + Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const { const std::string &sm_desc_str = kernel_def_.sm_desc(); if (sm_desc_str.empty()) { @@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m } } - auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); - if (rtRet != RT_ERROR_NONE) { - GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast(rtRet)); - return rtRet; + auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast(rt_ret)); + return rt_ret; } - rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); - if (rtRet != RT_ERROR_NONE) { + rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); + if (rt_ret != RT_ERROR_NONE) { (void)rtMemFreeManaged(*sm_desc); - GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast(rtRet)); - return rtRet; + GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast(rt_ret)); + return rt_ret; } } @@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & auto args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); GE_CHECK_NOTNULL(args); - auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); - if (rtRet != RT_ERROR_NONE) { - GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rtRet)); - return RT_ERROR_TO_GE_STATUS(rtRet); + auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rt_ret)); + return RT_ERROR_TO_GE_STATUS(rt_ret); } const domi::KernelContext &context = kernel_def_.context(); @@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); - rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); - if (rtRet != RT_ERROR_NONE) { - GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast(rtRet)); - return RT_ERROR_TO_GE_STATUS(rtRet); + rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast(rt_ret)); + return RT_ERROR_TO_GE_STATUS(rt_ret); } } - task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); + + return SUCCESS; +} + +Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, + const OpDescPtr &op_desc) { + size_t arg_size = kernel_def_with_handle_.args_size(); + auto args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); + GE_CHECK_NOTNULL(args); + + auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rt_ret)); + return rt_ret; + } + + const domi::KernelContext &context = kernel_def_with_handle_.context(); + const auto *args_offset_tmp = reinterpret_cast(context.args_offset().data()); + uint16_t offset = *args_offset_tmp; + + bool is_dynamic = false; + (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); + if (is_dynamic) { + GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); + } else { + // copy args + std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); + void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); + uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); + rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast(rt_ret)); + return rt_ret; + } + } + task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, + kernel_def_with_handle_); + return SUCCESS; } Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { GELOGD("Build tbe task begin"); - auto ret = SetKernelArgs(task, param, op_desc_); + auto task_type = static_cast(task_def_.type()); + auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : + SetKernelArgs(task, param, op_desc_); if (ret != SUCCESS) { return ret; } - ret = RegisterKernel(task, param); + ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : + RegisterKernel(task, param); + task.SetHandle(handle_); if (ret != SUCCESS) { return ret; } + auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_); GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str()); - void *stub_func = nullptr; - auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); - if (rtRet != SUCCESS) { - GELOGE(rtRet, "rtGetFunctionByName failed."); - return RT_ERROR_TO_GE_STATUS(rtRet); + if (task_type != RT_MODEL_TASK_ALL_KERNEL) { + void *stub_func = nullptr; + auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func); + if (rt_ret != SUCCESS) { + GELOGE(rt_ret, "rtGetFunctionByName failed."); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + task.SetStubFunc(stub_name_, stub_func); } - task.SetStubFunc(stub_name_, stub_func); return SUCCESS; } @@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { int64_t max_size = -1; (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); - if (max_size <= 0) { + if (max_size < 0) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); return ACL_ERROR_GE_PARAM_INVALID; } - void *tiling_buffer = nullptr; - GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast(max_size), RT_MEMORY_HBM)); - GE_CHECK_NOTNULL(tiling_buffer); - GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); + if (max_size > 0) { + GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast(max_size), RT_MEMORY_HBM)); + GE_CHECK_NOTNULL(tiling_buffer); + GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); + } task.EnableDynamicSupport(node_, tiling_buffer, static_cast(max_size)); return SUCCESS; diff --git a/ge/single_op/task/tbe_task_builder.h b/ge/single_op/task/tbe_task_builder.h index 5cd5c463..8af9a68d 100755 --- a/ge/single_op/task/tbe_task_builder.h +++ b/ge/single_op/task/tbe_task_builder.h @@ -42,6 +42,19 @@ class KernelHolder { std::shared_ptr kernel_bin_; }; +class HandleHolder { + public: + HandleHolder(void *bin_handle); + ~HandleHolder(); + + void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } + void *GetBinHandle() { return bin_handle_; } + + private: + friend class HandleRegistry; + void *bin_handle_ = nullptr; +}; + class KernelBinRegistry { public: static KernelBinRegistry &GetInstance() { @@ -61,9 +74,22 @@ class KernelBinRegistry { std::mutex mutex_; }; +class HandleRegistry { + public: + static HandleRegistry &GetInstance() { + static HandleRegistry instance; + return instance; + } + + bool AddHandle(std::unique_ptr &&holder); + + private: + std::set> registered_handles_; +}; + class TbeTaskBuilder { public: - TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def); + TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); ~TbeTaskBuilder() = default; Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); @@ -71,9 +97,11 @@ class TbeTaskBuilder { private: Status InitTilingInfo(TbeOpTask &task); Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); + Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); + Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m); Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle, const SingleOpModelParam ¶m); Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; @@ -83,8 +111,11 @@ class TbeTaskBuilder { const NodePtr node_; const OpDescPtr op_desc_; + const domi::TaskDef &task_def_; const domi::KernelDef &kernel_def_; + const domi::KernelDefWithHandle &kernel_def_with_handle_; const std::string stub_name_; + void *handle_ = nullptr; }; } // namespace ge diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 1323a76a..3808e5d6 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } +rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } + rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; } +rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, + rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) { + return RT_ERROR_NONE; +} + rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc, rtStream_t stream) { return RT_ERROR_NONE; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 7c49c0a7..baba874f 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -759,12 +759,17 @@ set(SINGLE_OP_TEST_FILES #"single_op/single_op_model_unittest.cc" "single_op/single_op_manager_unittest.cc" "single_op/stream_resource_unittest.cc" + "single_op/single_op_task_unittest.cc" ) set(PROFILING_MNG_TEST_FILES "profiling/ge_profiling_manager_unittest.cc" ) +set(HYBRID_TEST_FILES + "hybrid/ge_hybrid_unittest.cc" +) + set(OTHERS_TEST_FILES "plugin_manager/ge_util_unittest.cc" ) @@ -1059,6 +1064,7 @@ add_executable(ut_libge_distinct_load_utest ${DISTINCT_GRAPH_LOAD_SRC_FILES} ${SINGLE_OP_TEST_FILES} ${PROFILING_MNG_TEST_FILES} + ${HYBRID_TEST_FILES} ) target_compile_options(ut_libge_distinct_load_utest PRIVATE diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc new file mode 100644 index 00000000..61f99950 --- /dev/null +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "runtime/rt.h" + +#define protected public +#define private public +#include "hybrid/model/hybrid_model_builder.h" +#include "hybrid/model/hybrid_model.h" +#include "model/ge_model.h" +#include "model/ge_root_model.h" + +#include "hybrid/node_executor/aicore/aicore_op_task.h" +#include "framework/common/taskdown_common.h" +#include "framework/common/debug/log.h" +#include "graph/ge_context.h" +#include "hybrid/executor/hybrid_execution_context.h" +#include "hybrid/node_executor/aicore/aicore_task_builder.h" +#include "graph/load/model_manager/tbe_handle_store.h" +#include "graph/types.h" + +#undef private +#undef protected + +using namespace std; +using namespace testing; +using namespace ge; + +class UtestGeHybrid : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { + auto op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + op_desc->SetId(0); + + op_desc->SetWorkspace({}); + ; + op_desc->SetWorkspaceBytes({}); + op_desc->SetInputOffset({}); + op_desc->SetOutputOffset({}); + + ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC"); + bool support_dynamic = true; + ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic); + return op_desc; +} + +TEST_F(UtestGeHybrid, aicore_op_task_init_success) { + // build aicore task + auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); + domi::TaskDef task_def; + task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); + domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); + kernel_with_handle->set_original_kernel_key(""); + kernel_with_handle->set_node_info(""); + kernel_with_handle->set_block_dim(32); + kernel_with_handle->set_args_size(64); + string args(64, '1'); + kernel_with_handle->set_args(args.data(), 64); + domi::KernelContext *context = kernel_with_handle->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + + OpDescPtr op_desc = CreateOpDesc("Add", "Add"); + std::vector kernelBin; + TBEKernelPtr tbe_kernel = std::make_shared("name/Add", std::move(kernelBin)); + op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); + std::string kernel_name("kernel/Add"); + AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); + ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); + char *handle = ""; + aicore_task->handle_ = handle; + aicore_task->tiling_key_ = 1; + ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); +} \ No newline at end of file diff --git a/tests/ut/ge/single_op/single_op_task_unittest.cc b/tests/ut/ge/single_op/single_op_task_unittest.cc new file mode 100644 index 00000000..a17c9012 --- /dev/null +++ b/tests/ut/ge/single_op/single_op_task_unittest.cc @@ -0,0 +1,117 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/load/model_manager/model_utils.h" +#include "graph/utils/graph_utils.h" +#include "runtime/rt.h" + +#define protected public +#define private public +#include "single_op/single_op_model.h" +#include "single_op/task/tbe_task_builder.h" +#include "single_op/task/op_task.h" +#include "single_op/task/tbe_task_builder.h" +#include "external/register/op_tiling_registry.h" +#undef private +#undef protected + +using namespace std; +using namespace testing; +using namespace ge; +using namespace optiling; + +class UtestSingleOpTask : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestSingleOpTask, test_build_kernel_task) { + string model_data_str = "123456789"; + SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); + model.input_offset_list_.push_back(0); + model.input_sizes_.push_back(16); + + model.output_offset_list_.push_back(0); + model.output_sizes_.push_back(16); + + auto graph = make_shared("graph"); + auto op_desc = make_shared("Add", "Add"); + std::vector kernelBin; + TBEKernelPtr tbe_kernel = std::make_shared("name/Add", std::move(kernelBin)); + op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); + std::string kernel_name("kernel/Add"); + AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); + + vector shape{16, 16}; + GeShape ge_shape(shape); + GeTensorDesc desc(ge_shape); + op_desc->AddInputDesc(desc); + op_desc->AddOutputDesc(desc); + auto node = graph->AddNode(op_desc); + + std::mutex stream_mu_; + rtStream_t stream_ = nullptr; + StreamResource stream_resource(0); + SingleOp single_op(&stream_resource, &stream_mu_, stream_); + + domi::TaskDef task_def; + task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); + domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); + kernel_with_handle->set_original_kernel_key(""); + kernel_with_handle->set_node_info(""); + kernel_with_handle->set_block_dim(32); + kernel_with_handle->set_args_size(64); + string args(64, '1'); + kernel_with_handle->set_args(args.data(), 64); + domi::KernelContext *context = kernel_with_handle->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + model.op_list_[1] = node; + + TbeOpTask task_tmp; + TbeOpTask *task = &task_tmp; + ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS); + vector input_desc; + vector input_buffers; + vector output_desc; + vector output_buffers; + task->node_ = node; + OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;}; + OpTilingRegistryInterf("Add", op_tiling_func); + ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key"); + ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json"); + char c = '0'; + char* buffer = &c; + task->tiling_buffer_ = buffer; + task->max_tiling_size_ = 64; + task->tiling_data_ = "tiling_data"; + task->arg_size_ = 64; + uint8_t task_args{0}; + task->args_.reset(&task_args); + + ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); + char handle_tmp = '0'; + char *handle = &handle_tmp; + task->SetHandle(handle); + ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); +} \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index dc16ca58..b4500e10 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData); #define RT_FUSION_KERNEL_DUMPFLAG (0x04) #define RT_KERNEL_CUSTOM_AICPU (0x08) +/** + * @ingroup rt_kernel + * @brief kernel mode + */ +#define RT_DEFAULT_KERNEL_MODE (0x00) +#define RT_NORMAL_KERNEL_MODE (0x01) +#define RT_ALL_KERNEL_MODE (0x02) + /** * @ingroup rt_kernel * @brief kernel L1 Fusion Dump bit flags @@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData); */ RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); +/** + * @ingroup rt_kernel + * @brief register device binary + * @param [in] bin device binary description + * @param [out] handle device binary handle + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle); + /** * @ingroup rt_kernel * @brief register fast memeory device binary @@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream); +/** + * @ingroup rt_kernel + * @brief launch kernel with handle to device + * @param [in] handle program + * @param [in] devFunc device function description + * @param [in] blockDim block dimentions + * @param [in] args argments address for kernel function + * @param [in] argsSize argements size + * @param [in] smDesc shared memory description + * @param [in] stream associated stream + * @param [in] kernelInfo kernel info + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, + rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo); + /** * @ingroup rt_kernel * @brief launch kernel to device diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 482486a8..798f63ae 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, RT_MODEL_TASK_STREAM_LABEL_GOTO, RT_MODEL_TASK_MODEL_EXIT, + RT_MODEL_TASK_ALL_KERNEL, } rtModelTaskType_t; typedef enum tagModelStreamType { @@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo { uint16_t *argsOffset; } rtKernelTaskInfo_t; +typedef struct tagAllKernelTaskInfo { + uint16_t blockDim; + uint16_t argsCount; + uint16_t argsSize; + uint16_t reserved; + const void *dev_func; + void *handle; + uint8_t *smDesc; + uint8_t *args; + uint16_t *argsOffset; +} rtAllKernelTaskInfo_t; typedef struct tagKernelTaskInfoEx { uint32_t flags; uint32_t argsSize; @@ -251,6 +263,7 @@ typedef struct tagTaskInfo { union { rtKernelTaskInfoEx_t kernelTaskEx; rtKernelTaskInfo_t kernelTask; + rtAllKernelTaskInfo_t allkernelTask; rtEventTaskInfo_t eventTask; rtStreamSwitchTaskInfo_t streamSwitchTask; rtStreamActiveTaskInfo_t streamActiveTask;