From: @HW_KK Reviewed-by: Signed-off-by:tags/v1.2.0
| @@ -1199,6 +1199,8 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||||
| op_index = task_def.kernel_ex().op_index(); | op_index = task_def.kernel_ex().op_index(); | ||||
| } else if (task_type == RT_MODEL_TASK_HCCL) { | } else if (task_type == RT_MODEL_TASK_HCCL) { | ||||
| op_index = task_def.kernel_hccl().op_index(); | op_index = task_def.kernel_hccl().op_index(); | ||||
| } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| op_index = task_def.kernel_with_handle().context().op_index(); | |||||
| } else { | } else { | ||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
| continue; | continue; | ||||
| @@ -1211,7 +1213,7 @@ Status HybridModelBuilder::IndexTaskDefs() { | |||||
| } | } | ||||
| auto &node = iter->second; | auto &node = iter->second; | ||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | ||||
| } | } | ||||
| @@ -33,6 +33,20 @@ constexpr char const *kAttrOpParamSize = "op_para_size"; | |||||
| constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | ||||
| } // namespace | } // namespace | ||||
| TbeHandleHolder::TbeHandleHolder(void *bin_handle) | |||||
| : bin_handle_(bin_handle) {} | |||||
| TbeHandleHolder::~TbeHandleHolder() { | |||||
| if (bin_handle_ != nullptr) { | |||||
| GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); | |||||
| } | |||||
| } | |||||
| bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { | |||||
| auto ret = registered_handles_.emplace(std::move(holder)); | |||||
| return ret.second; | |||||
| } | |||||
| Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
| GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | ||||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | ||||
| @@ -69,7 +83,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| if (rt_ret != RT_ERROR_NONE || is_single_op_) { | if (rt_ret != RT_ERROR_NONE || is_single_op_) { | ||||
| void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
| if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | ||||
| GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||||
| GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | |||||
| rtDevBinary_t binary; | rtDevBinary_t binary; | ||||
| std::string json_string; | std::string json_string; | ||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string), | ||||
| @@ -96,7 +110,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | ||||
| kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); | kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel); | ||||
| } else { | } else { | ||||
| GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | |||||
| GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str()); | |||||
| kernel_store.ReferTBEHandle(stub_name_.c_str()); | kernel_store.ReferTBEHandle(stub_name_.c_str()); | ||||
| } | } | ||||
| std::string kernel_name; | std::string kernel_name; | ||||
| @@ -108,25 +122,63 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||||
| "[%s] Failed to validate task def: [%s]", | |||||
| op_desc.GetName().c_str(), | |||||
| task_def.DebugString().c_str()); | |||||
| Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) { | |||||
| TbeHandleRegistry ®istry = TbeHandleRegistry::GetInstance(); | |||||
| auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| void *bin_handle = nullptr; | |||||
| GELOGD("Start to register kernel for node: [%s].", op_desc.GetName().c_str()); | |||||
| rtDevBinary_t binary; | |||||
| std::string json_string; | |||||
| GE_IF_BOOL_EXEC(AttrUtils::GetStr(&op_desc, TVM_ATTR_NAME_MAGIC, json_string), | |||||
| GELOGI("Get original type of session_graph_id.")); | |||||
| if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF; | |||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | |||||
| } else { | |||||
| GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str()); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| binary.version = 0; | |||||
| binary.data = tbe_kernel->GetBinData(); | |||||
| binary.length = tbe_kernel->GetBinDataSize(); | |||||
| GELOGI("TBE: binary.length: %lu", binary.length); | |||||
| GE_CHK_RT_RET(rtRegisterAllKernel(&binary, &bin_handle)); | |||||
| handle_ = bin_handle; | |||||
| auto holder = std::unique_ptr<TbeHandleHolder>(new (std::nothrow) TbeHandleHolder(handle_)); | |||||
| if (holder == nullptr) { | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
| } | |||||
| if (!registry.AddHandle(std::move(holder))) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | const domi::KernelDef &kernel_def = task_def.kernel(); | ||||
| const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
| stub_name_ = kernel_def.stub_func(); | stub_name_ = kernel_def.stub_func(); | ||||
| GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); | GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc)); | ||||
| GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_)); | ||||
| args_size_ = kernel_def.args_size(); | args_size_ = kernel_def.args_size(); | ||||
| block_dim_ = kernel_def.block_dim(); | block_dim_ = kernel_def.block_dim(); | ||||
| // malloc args memory | // malloc args memory | ||||
| args_.reset(new(std::nothrow) uint8_t[args_size_]); | args_.reset(new(std::nothrow) uint8_t[args_size_]); | ||||
| GE_CHECK_NOTNULL(args_); | GE_CHECK_NOTNULL(args_); | ||||
| if (kernel_def.args().size() < args_size_) { | |||||
| GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); | errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_); | ||||
| if (err != EOK) { | if (err != EOK) { | ||||
| GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | ||||
| @@ -157,19 +209,75 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef | |||||
| block_dim_, | block_dim_, | ||||
| arg_base_, | arg_base_, | ||||
| args_size_); | args_size_); | ||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| const domi::KernelDefWithHandle &kernel_with_handle = task_def.kernel_with_handle(); | |||||
| const domi::KernelContext &context = kernel_with_handle.context(); | |||||
| GE_CHK_STATUS_RET(RegisterKernelHandle(op_desc)); | |||||
| original_kernel_key_ = kernel_with_handle.original_kernel_key() + "_"; | |||||
| node_info_ = kernel_with_handle.node_info() + "/"; | |||||
| args_size_ = kernel_with_handle.args_size(); | |||||
| block_dim_ = kernel_with_handle.block_dim(); | |||||
| // malloc args memory | |||||
| args_.reset(new(std::nothrow) uint8_t[args_size_]); | |||||
| GE_CHECK_NOTNULL(args_); | |||||
| if (kernel_with_handle.args().size() < args_size_) { | |||||
| GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_"); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_); | |||||
| if (err != EOK) { | |||||
| GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| if (context.args_offset().size() < sizeof(uint16_t)) { | |||||
| GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size()); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||||
| uint32_t offset = *args_offset_buffer; | |||||
| if (offset > args_size_) { | |||||
| GELOGE(INTERNAL_ERROR, | |||||
| "[%s] Arg offset out of range. offset = %u, arg size = %u", | |||||
| GetName().c_str(), | |||||
| offset, | |||||
| args_size_); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset); | |||||
| max_arg_count_ = (args_size_ - offset) / sizeof(void *); | |||||
| return SUCCESS; | |||||
| } | |||||
| Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) { | |||||
| GE_CHK_STATUS_RET(ValidateTaskDef(task_def), | |||||
| "[%s] Failed to validate task def: [%s]", | |||||
| op_desc.GetName().c_str(), | |||||
| task_def.DebugString().c_str()); | |||||
| if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) { | |||||
| GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def)); | |||||
| } else { | |||||
| GE_CHK_STATUS_RET(InitWithKernelDefWithHandle(op_desc, task_def)); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| if (task_type != RT_MODEL_TASK_KERNEL) { | |||||
| if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) { | |||||
| GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type)); | GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type)); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| const domi::KernelContext &context = kernel_def.context(); | |||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||||
| task_def.kernel_with_handle().context(); | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | ||||
| if (kernel_type != ccKernelType::TE) { | if (kernel_type != ccKernelType::TE) { | ||||
| GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); | GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type)); | ||||
| @@ -180,10 +288,9 @@ Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) { | |||||
| } | } | ||||
| Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { | Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { | ||||
| if (tiling_buffer_ != nullptr) { | |||||
| if (is_dynamic_) { | |||||
| return UpdateTilingInfo(context); | return UpdateTilingInfo(context); | ||||
| } | } | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -212,8 +319,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { | |||||
| clear_atomic_ = tiling_info.clear_atomic; | clear_atomic_ = tiling_info.clear_atomic; | ||||
| tiling_data_ = tiling_info.tiling_data.str(); | tiling_data_ = tiling_info.tiling_data.str(); | ||||
| tiling_key_ = tiling_info.tiling_key; | |||||
| GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); | |||||
| if (tiling_data_.empty()) { | if (tiling_data_.empty()) { | ||||
| GELOGE(INTERNAL_ERROR, "[%s] Tiling data is empty.", stub_name_.c_str()); | |||||
| GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); | |||||
| return SUCCESS; | |||||
| } | |||||
| if (tiling_buffer_ == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!"); | |||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| @@ -296,16 +409,26 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { | |||||
| } | } | ||||
| Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { | Status AiCoreOpTask::LaunchKernel(rtStream_t stream) { | ||||
| GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | |||||
| GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| if (handle_ != nullptr) { | |||||
| std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_); | |||||
| std::string kernel_info = node_info_ + std::to_string(tiling_key_); | |||||
| GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(), | |||||
| block_dim_); | |||||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr, | |||||
| stream, kernel_info.c_str())); | |||||
| GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(), | |||||
| block_dim_); | |||||
| } else { | |||||
| GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream)); | |||||
| GELOGD("AiCoreOpTask LaunchKernel End (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | ||||
| bool dynamic_supported = false; | |||||
| (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, dynamic_supported); | |||||
| if (!dynamic_supported) { | |||||
| (void) AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, is_dynamic_); | |||||
| if (!is_dynamic_) { | |||||
| GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); | GELOGD("[%s] Dynamic shape is not supported.", op_desc.GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -314,22 +437,26 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) { | |||||
| int64_t max_size = -1; | int64_t max_size = -1; | ||||
| (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); | (void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size); | ||||
| GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); | GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size); | ||||
| if (max_size <= 0) { | |||||
| if (max_size < 0) { | |||||
| GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); | GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size); | ||||
| return PARAM_INVALID; | return PARAM_INVALID; | ||||
| } | } | ||||
| auto allocator = NpuMemoryAllocator::GetAllocator(); | auto allocator = NpuMemoryAllocator::GetAllocator(); | ||||
| GE_CHECK_NOTNULL(allocator); | GE_CHECK_NOTNULL(allocator); | ||||
| tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer_); | |||||
| if (max_size > 0) { | |||||
| tiling_buffer_ = TensorBuffer::Create(allocator, static_cast<size_t>(max_size)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer_); | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); | |||||
| } else { | |||||
| GELOGD("op_param_size is 0, no need to create tiling buffer."); | |||||
| } | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc.GetName().c_str(), max_size); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| bool AiCoreOpTask::IsDynamicShapeSupported() { | bool AiCoreOpTask::IsDynamicShapeSupported() { | ||||
| return tiling_buffer_ != nullptr; | |||||
| return is_dynamic_; | |||||
| } | } | ||||
| const std::string &AiCoreOpTask::GetName() const { | const std::string &AiCoreOpTask::GetName() const { | ||||
| @@ -28,6 +28,32 @@ | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| class TbeHandleHolder { | |||||
| public: | |||||
| TbeHandleHolder(void *bin_handle); | |||||
| ~TbeHandleHolder(); | |||||
| void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } | |||||
| void *GetBinHandle() { return bin_handle_; } | |||||
| private: | |||||
| friend class TbeHandleRegistry; | |||||
| void *bin_handle_ = nullptr; | |||||
| }; | |||||
| class TbeHandleRegistry { | |||||
| public: | |||||
| static TbeHandleRegistry &GetInstance() { | |||||
| static TbeHandleRegistry instance; | |||||
| return instance; | |||||
| } | |||||
| bool AddHandle(std::unique_ptr<TbeHandleHolder> &&holder); | |||||
| private: | |||||
| std::set<std::unique_ptr<TbeHandleHolder>> registered_handles_; | |||||
| }; | |||||
| class AiCoreOpTask { | class AiCoreOpTask { | ||||
| public: | public: | ||||
| AiCoreOpTask() = default; | AiCoreOpTask() = default; | ||||
| @@ -67,6 +93,9 @@ class AiCoreOpTask { | |||||
| Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def); | ||||
| Status InitTilingInfo(const OpDesc &op_desc); | Status InitTilingInfo(const OpDesc &op_desc); | ||||
| Status RegisterTbeHandle(const OpDesc &op_desc); | Status RegisterTbeHandle(const OpDesc &op_desc); | ||||
| Status RegisterKernelHandle(const OpDesc &op_desc); | |||||
| Status InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDef &task_def); | |||||
| Status InitWithKernelDefWithHandle(const OpDesc &node, const domi::TaskDef &task_def); | |||||
| std::string stub_name_; | std::string stub_name_; | ||||
| void *stub_func_ = nullptr; | void *stub_func_ = nullptr; | ||||
| @@ -76,6 +105,11 @@ class AiCoreOpTask { | |||||
| bool clear_atomic_ = true; | bool clear_atomic_ = true; | ||||
| bool is_single_op_ = false; | bool is_single_op_ = false; | ||||
| std::vector<int> output_indices_to_skip_; | std::vector<int> output_indices_to_skip_; | ||||
| string original_kernel_key_; | |||||
| string node_info_; | |||||
| uint32_t tiling_key_ = 0; | |||||
| void *handle_ = nullptr; | |||||
| bool is_dynamic_ = false; | |||||
| }; | }; | ||||
| class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
| @@ -261,7 +261,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s | |||||
| if (kernel_type == ccKernelType::TE) { | if (kernel_type == ccKernelType::TE) { | ||||
| GELOGD("Building TBE task"); | GELOGD("Building TBE task"); | ||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| auto ret = BuildKernelTask(task_def.kernel(), &tbe_task); | |||||
| auto ret = BuildKernelTask(task_def, &tbe_task); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -332,9 +332,11 @@ void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) { | |||||
| } | } | ||||
| } | } | ||||
| Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) { | |||||
| Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) { | |||||
| GE_CHECK_NOTNULL(task); | GE_CHECK_NOTNULL(task); | ||||
| const auto &context = kernel_def.context(); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||||
| task_def.kernel_with_handle().context(); | |||||
| auto iter = op_list_.find(context.op_index()); | auto iter = op_list_.find(context.op_index()); | ||||
| if (iter == op_list_.end()) { | if (iter == op_list_.end()) { | ||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index()); | GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index()); | ||||
| @@ -347,7 +349,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
| } | } | ||||
| auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def); | |||||
| auto builder = TbeTaskBuilder(model_name_, iter->second, task_def); | |||||
| auto ret = builder.BuildTask(*tbe_task, model_params_); | auto ret = builder.BuildTask(*tbe_task, model_params_); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| delete tbe_task; | delete tbe_task; | ||||
| @@ -418,13 +420,15 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { | |||||
| } | } | ||||
| Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { | ||||
| const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
| const auto &context = kernel_def.context(); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
| const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : | |||||
| task_def.kernel_with_handle().context(); | |||||
| auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | ||||
| if (kernel_type == ccKernelType::TE) { | if (kernel_type == ccKernelType::TE) { | ||||
| GELOGD("Building TBE task"); | GELOGD("Building TBE task"); | ||||
| TbeOpTask *tbe_task = nullptr; | TbeOpTask *tbe_task = nullptr; | ||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task)); | |||||
| GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); | |||||
| tbe_task->SetModelArgs(model_name_, model_id_); | tbe_task->SetModelArgs(model_name_, model_id_); | ||||
| single_op.op_task_.reset(tbe_task); | single_op.op_task_.reset(tbe_task); | ||||
| } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
| @@ -453,7 +457,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), | GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(), | ||||
| task_def.DebugString().c_str()); | task_def.DebugString().c_str()); | ||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
| if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
| if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| if (single_op.op_task_ != nullptr) { | if (single_op.op_task_ != nullptr) { | ||||
| GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | ||||
| return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | ||||
| @@ -67,7 +67,7 @@ class SingleOpModel { | |||||
| Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); | ||||
| Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | ||||
| Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); | |||||
| Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); | |||||
| Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | ||||
| bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | ||||
| Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | ||||
| @@ -93,6 +93,14 @@ void TbeOpTask::SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size | |||||
| op_desc_ = op_desc; | op_desc_ = op_desc; | ||||
| } | } | ||||
| void TbeOpTask::SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||||
| const OpDescPtr &op_desc, | |||||
| const domi::KernelDefWithHandle &kernel_def_with_handle) { | |||||
| SetKernelArgs(std::move(args), arg_size, block_dim, op_desc); | |||||
| original_kernel_key_ = kernel_def_with_handle.original_kernel_key(); | |||||
| node_info_ = kernel_def_with_handle.node_info(); | |||||
| } | |||||
| void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } | void TbeOpTask::SetSmDesc(void *sm_desc) { sm_desc_ = sm_desc; } | ||||
| void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | void OpTask::SetModelArgs(std::string model_name, uint32_t model_id) { | ||||
| @@ -165,6 +173,10 @@ const std::string &TbeOpTask::GetStubName() const { return stub_name_; } | |||||
| uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | uint32_t TbeOpTask::GetTaskType() const { return kTaskTypeAicore; } | ||||
| void TbeOpTask::SetHandle(void *handle) { | |||||
| this->handle_ = handle; | |||||
| } | |||||
| Status TbeOpTask::LaunchKernel(rtStream_t stream) { | Status TbeOpTask::LaunchKernel(rtStream_t stream) { | ||||
| GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); | ||||
| auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_); | ||||
| @@ -204,8 +216,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve | |||||
| } | } | ||||
| block_dim_ = run_info.block_dim; | block_dim_ = run_info.block_dim; | ||||
| tiling_data_ = run_info.tiling_data.str(); | tiling_data_ = run_info.tiling_data.str(); | ||||
| GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu", block_dim_, | |||||
| tiling_data_.size()); | |||||
| tiling_key_ = run_info.tiling_key; | |||||
| GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, | |||||
| tiling_data_.size(), tiling_key_); | |||||
| GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); | GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "Failed to allocate workspaces"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -329,8 +342,17 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
| } | } | ||||
| GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | ||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||||
| if (handle_ == nullptr) { | |||||
| GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); | |||||
| } else { | |||||
| std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); | |||||
| std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); | |||||
| GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, | |||||
| stream, kernel_info.c_str())); | |||||
| GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -78,6 +78,8 @@ class TbeOpTask : public OpTask { | |||||
| void SetSmDesc(void *sm_desc); | void SetSmDesc(void *sm_desc); | ||||
| void SetStubFunc(const std::string &name, const void *stub_func); | void SetStubFunc(const std::string &name, const void *stub_func); | ||||
| void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); | void SetKernelArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc); | ||||
| void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim, | |||||
| const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); | |||||
| Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc, | ||||
| const vector<GeTensorDesc> &output_desc) override; | const vector<GeTensorDesc> &output_desc) override; | ||||
| @@ -87,6 +89,7 @@ class TbeOpTask : public OpTask { | |||||
| const std::string &GetStubName() const; | const std::string &GetStubName() const; | ||||
| void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); | ||||
| uint32_t GetTaskType() const override; | uint32_t GetTaskType() const override; | ||||
| void SetHandle(void *handle); | |||||
| private: | private: | ||||
| friend class SingleOpModel; | friend class SingleOpModel; | ||||
| @@ -107,6 +110,11 @@ class TbeOpTask : public OpTask { | |||||
| std::string tiling_data_; | std::string tiling_data_; | ||||
| std::vector<void *> workspaces_; | std::vector<void *> workspaces_; | ||||
| NodePtr node_; | NodePtr node_; | ||||
| uint32_t tiling_key_ = 0; | |||||
| void* handle_ = nullptr; | |||||
| std::string original_kernel_key_; | |||||
| std::string node_info_; | |||||
| }; | }; | ||||
| class AiCpuBaseTask : public OpTask { | class AiCpuBaseTask : public OpTask { | ||||
| @@ -49,6 +49,15 @@ KernelHolder::~KernelHolder() { | |||||
| } | } | ||||
| } | } | ||||
| HandleHolder::HandleHolder(void *bin_handle) | |||||
| : bin_handle_(bin_handle) {} | |||||
| HandleHolder::~HandleHolder() { | |||||
| if (bin_handle_ != nullptr) { | |||||
| GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_)); | |||||
| } | |||||
| } | |||||
| const char *KernelBinRegistry::GetUnique(const string &stub_func) { | const char *KernelBinRegistry::GetUnique(const string &stub_func) { | ||||
| std::lock_guard<std::mutex> lock(mutex_); | std::lock_guard<std::mutex> lock(mutex_); | ||||
| auto it = unique_stubs_.find(stub_func); | auto it = unique_stubs_.find(stub_func); | ||||
| @@ -76,10 +85,17 @@ bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr< | |||||
| return ret.second; | return ret.second; | ||||
| } | } | ||||
| TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def) | |||||
| bool HandleRegistry::AddHandle(std::unique_ptr<HandleHolder> &&holder) { | |||||
| auto ret = registered_handles_.emplace(std::move(holder)); | |||||
| return ret.second; | |||||
| } | |||||
| TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def) | |||||
| : node_(node), | : node_(node), | ||||
| op_desc_(node->GetOpDesc()), | op_desc_(node->GetOpDesc()), | ||||
| kernel_def_(kernel_def), | |||||
| task_def_(task_def), | |||||
| kernel_def_(task_def.kernel()), | |||||
| kernel_def_with_handle_(task_def.kernel_with_handle()), | |||||
| stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} | stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {} | ||||
| Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, | Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, | ||||
| @@ -89,9 +105,14 @@ Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bi | |||||
| binary.data = kernel_bin.GetBinData(); | binary.data = kernel_bin.GetBinData(); | ||||
| binary.length = kernel_bin.GetBinDataSize(); | binary.length = kernel_bin.GetBinDataSize(); | ||||
| binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; | binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC; | ||||
| auto ret = rtDevBinaryRegister(&binary, bin_handle); | |||||
| Status ret = 0; | |||||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| ret = rtRegisterAllKernel(&binary, bin_handle); | |||||
| } else { | |||||
| ret = rtDevBinaryRegister(&binary, bin_handle); | |||||
| } | |||||
| if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
| GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), | |||||
| GELOGE(ret, "DoRegisterBinary failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(), | |||||
| param.core_type, static_cast<int>(ret)); | param.core_type, static_cast<int>(ret)); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -128,14 +149,15 @@ Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_nam | |||||
| Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle, | Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle, | ||||
| const SingleOpModelParam ¶m) { | const SingleOpModelParam ¶m) { | ||||
| std::string kernel_name; | |||||
| GetKernelName(op_desc_, kernel_name); | |||||
| void *handle = nullptr; | void *handle = nullptr; | ||||
| auto ret = DoRegisterBinary(tbe_kernel, &handle, param); | auto ret = DoRegisterBinary(tbe_kernel, &handle, param); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (task_def_.type() == RT_MODEL_TASK_ALL_KERNEL) { | |||||
| *bin_handle = handle; | |||||
| return SUCCESS; | |||||
| } | |||||
| ret = DoRegisterMeta(handle); | ret = DoRegisterMeta(handle); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| @@ -143,6 +165,8 @@ Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| std::string kernel_name; | |||||
| GetKernelName(op_desc_, kernel_name); | |||||
| ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str()); | ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str()); | ||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| GE_CHK_RT(rtDevBinaryUnRegister(handle)); | GE_CHK_RT(rtDevBinaryUnRegister(handle)); | ||||
| @@ -186,13 +210,15 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam | |||||
| void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
| auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param); | auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param); | ||||
| if (ret == SUCCESS) { | |||||
| holder->SetBinHandle(bin_handle); | |||||
| if (!registry.AddKernel(stub_name_, std::move(holder))) { | |||||
| // should not happen. only one thread can reach here | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. stub name = %s", stub_name_.c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| holder->SetBinHandle(bin_handle); | |||||
| if (!registry.AddKernel(stub_name_, std::move(holder))) { | |||||
| // should not happen. only one thread can reach here | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | } | ||||
| } | } | ||||
| @@ -200,6 +226,35 @@ Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeTaskBuilder::RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m) { | |||||
| GELOGD("RegisterKernelWithHandle begin."); | |||||
| HandleRegistry ®istry = HandleRegistry::GetInstance(); | |||||
| auto tbe_kernel = GetTbeKernel(op_desc_); | |||||
| if (tbe_kernel == nullptr) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", | |||||
| op_desc_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| void *bin_handle = nullptr; | |||||
| auto ret = DoRegisterKernel(*tbe_kernel, nullptr, &bin_handle, param); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "RegisterKernel failed. node name = %s", op_desc_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| handle_ = bin_handle; | |||||
| auto holder = std::unique_ptr<HandleHolder>(new (std::nothrow) HandleHolder(handle_)); | |||||
| if (holder == nullptr) { | |||||
| GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed."); | |||||
| return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
| } | |||||
| if (!registry.AddHandle(std::move(holder))) { | |||||
| GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc_->GetName().c_str()); | |||||
| return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const { | Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const { | ||||
| const std::string &sm_desc_str = kernel_def_.sm_desc(); | const std::string &sm_desc_str = kernel_def_.sm_desc(); | ||||
| if (sm_desc_str.empty()) { | if (sm_desc_str.empty()) { | ||||
| @@ -217,17 +272,17 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m | |||||
| } | } | ||||
| } | } | ||||
| auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rtRet)); | |||||
| return rtRet; | |||||
| auto rt_ret = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | } | ||||
| rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| rt_ret = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| (void)rtMemFreeManaged(*sm_desc); | (void)rtMemFreeManaged(*sm_desc); | ||||
| GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast<int>(rtRet)); | |||||
| return rtRet; | |||||
| GELOGE(rt_ret, "rtMemcpy, ret: %d", static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | } | ||||
| } | } | ||||
| @@ -239,10 +294,10 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | ||||
| GE_CHECK_NOTNULL(args); | GE_CHECK_NOTNULL(args); | ||||
| auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet)); | |||||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| const domi::KernelContext &context = kernel_def_.context(); | const domi::KernelContext &context = kernel_def_.context(); | ||||
| @@ -258,39 +313,83 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | ||||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | ||||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | ||||
| rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rtRet != RT_ERROR_NONE) { | |||||
| GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet)); | |||||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | } | ||||
| } | } | ||||
| task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); | ||||
| return SUCCESS; | |||||
| } | |||||
| Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, | |||||
| const OpDescPtr &op_desc) { | |||||
| size_t arg_size = kernel_def_with_handle_.args_size(); | |||||
| auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]); | |||||
| GE_CHECK_NOTNULL(args); | |||||
| auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | |||||
| const domi::KernelContext &context = kernel_def_with_handle_.context(); | |||||
| const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data()); | |||||
| uint16_t offset = *args_offset_tmp; | |||||
| bool is_dynamic = false; | |||||
| (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); | |||||
| if (is_dynamic) { | |||||
| GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); | |||||
| } else { | |||||
| // copy args | |||||
| std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); | |||||
| void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data()); | |||||
| uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); | |||||
| rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | |||||
| if (rt_ret != RT_ERROR_NONE) { | |||||
| GELOGE(rt_ret, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret)); | |||||
| return rt_ret; | |||||
| } | |||||
| } | |||||
| task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, | |||||
| kernel_def_with_handle_); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { | ||||
| GELOGD("Build tbe task begin"); | GELOGD("Build tbe task begin"); | ||||
| auto ret = SetKernelArgs(task, param, op_desc_); | |||||
| auto task_type = static_cast<rtModelTaskType_t>(task_def_.type()); | |||||
| auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : | |||||
| SetKernelArgs(task, param, op_desc_); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = RegisterKernel(task, param); | |||||
| ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : | |||||
| RegisterKernel(task, param); | |||||
| task.SetHandle(handle_); | |||||
| if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_); | auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_); | ||||
| GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str()); | GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str()); | ||||
| void *stub_func = nullptr; | |||||
| auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | |||||
| if (rtRet != SUCCESS) { | |||||
| GELOGE(rtRet, "rtGetFunctionByName failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
| if (task_type != RT_MODEL_TASK_ALL_KERNEL) { | |||||
| void *stub_func = nullptr; | |||||
| auto rt_ret = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | |||||
| if (rt_ret != SUCCESS) { | |||||
| GELOGE(rt_ret, "rtGetFunctionByName failed."); | |||||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
| } | |||||
| task.SetStubFunc(stub_name_, stub_func); | |||||
| } | } | ||||
| task.SetStubFunc(stub_name_, stub_func); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -299,15 +398,16 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { | |||||
| int64_t max_size = -1; | int64_t max_size = -1; | ||||
| (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); | (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size); | ||||
| GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); | GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size); | ||||
| if (max_size <= 0) { | |||||
| if (max_size < 0) { | |||||
| GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size); | ||||
| return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
| } | } | ||||
| void *tiling_buffer = nullptr; | void *tiling_buffer = nullptr; | ||||
| GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer); | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | |||||
| if (max_size > 0) { | |||||
| GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM)); | |||||
| GE_CHECK_NOTNULL(tiling_buffer); | |||||
| GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); | |||||
| } | |||||
| task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size)); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -42,6 +42,19 @@ class KernelHolder { | |||||
| std::shared_ptr<ge::OpKernelBin> kernel_bin_; | std::shared_ptr<ge::OpKernelBin> kernel_bin_; | ||||
| }; | }; | ||||
| class HandleHolder { | |||||
| public: | |||||
| HandleHolder(void *bin_handle); | |||||
| ~HandleHolder(); | |||||
| void SetBinHandle(void *bin_handle) { bin_handle_ = bin_handle; } | |||||
| void *GetBinHandle() { return bin_handle_; } | |||||
| private: | |||||
| friend class HandleRegistry; | |||||
| void *bin_handle_ = nullptr; | |||||
| }; | |||||
| class KernelBinRegistry { | class KernelBinRegistry { | ||||
| public: | public: | ||||
| static KernelBinRegistry &GetInstance() { | static KernelBinRegistry &GetInstance() { | ||||
| @@ -61,9 +74,22 @@ class KernelBinRegistry { | |||||
| std::mutex mutex_; | std::mutex mutex_; | ||||
| }; | }; | ||||
| class HandleRegistry { | |||||
| public: | |||||
| static HandleRegistry &GetInstance() { | |||||
| static HandleRegistry instance; | |||||
| return instance; | |||||
| } | |||||
| bool AddHandle(std::unique_ptr<HandleHolder> &&holder); | |||||
| private: | |||||
| std::set<std::unique_ptr<HandleHolder>> registered_handles_; | |||||
| }; | |||||
| class TbeTaskBuilder { | class TbeTaskBuilder { | ||||
| public: | public: | ||||
| TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def); | |||||
| TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::TaskDef &task_def); | |||||
| ~TbeTaskBuilder() = default; | ~TbeTaskBuilder() = default; | ||||
| Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); | Status BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m); | ||||
| @@ -71,9 +97,11 @@ class TbeTaskBuilder { | |||||
| private: | private: | ||||
| Status InitTilingInfo(TbeOpTask &task); | Status InitTilingInfo(TbeOpTask &task); | ||||
| Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | ||||
| Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); | |||||
| Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; | ||||
| Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); | ||||
| Status RegisterKernelWithHandle(TbeOpTask &task, const SingleOpModelParam ¶m); | |||||
| Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle, | Status DoRegisterKernel(const OpKernelBin &kernel_bin, const char *bin_file_key, void **bin_handle, | ||||
| const SingleOpModelParam ¶m); | const SingleOpModelParam ¶m); | ||||
| Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | Status DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle, const SingleOpModelParam ¶m) const; | ||||
| @@ -83,8 +111,11 @@ class TbeTaskBuilder { | |||||
| const NodePtr node_; | const NodePtr node_; | ||||
| const OpDescPtr op_desc_; | const OpDescPtr op_desc_; | ||||
| const domi::TaskDef &task_def_; | |||||
| const domi::KernelDef &kernel_def_; | const domi::KernelDef &kernel_def_; | ||||
| const domi::KernelDefWithHandle &kernel_def_with_handle_; | |||||
| const std::string stub_name_; | const std::string stub_name_; | ||||
| void *handle_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -131,8 +131,15 @@ rtError_t rtFunctionRegister(void *bin_handle, const void *stub_func, const char | |||||
| rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | ||||
| rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle) { return RT_ERROR_NONE; } | |||||
| rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; } | rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_t flag, void **arg) { return RT_ERROR_NONE; } | ||||
| rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
| rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc, | rtError_t rtKernelLaunch(const void *stub_func, uint32_t block_dim, void *args, uint32_t args_size, rtSmDesc_t *sm_desc, | ||||
| rtStream_t stream) { | rtStream_t stream) { | ||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| @@ -763,12 +763,17 @@ set(SINGLE_OP_TEST_FILES | |||||
| #"single_op/single_op_model_unittest.cc" | #"single_op/single_op_model_unittest.cc" | ||||
| "single_op/single_op_manager_unittest.cc" | "single_op/single_op_manager_unittest.cc" | ||||
| "single_op/stream_resource_unittest.cc" | "single_op/stream_resource_unittest.cc" | ||||
| "single_op/single_op_task_unittest.cc" | |||||
| ) | ) | ||||
| set(PROFILING_MNG_TEST_FILES | set(PROFILING_MNG_TEST_FILES | ||||
| "profiling/ge_profiling_manager_unittest.cc" | "profiling/ge_profiling_manager_unittest.cc" | ||||
| ) | ) | ||||
| set(HYBRID_TEST_FILES | |||||
| "hybrid/ge_hybrid_unittest.cc" | |||||
| ) | |||||
| set(OTHERS_TEST_FILES | set(OTHERS_TEST_FILES | ||||
| "plugin_manager/ge_util_unittest.cc" | "plugin_manager/ge_util_unittest.cc" | ||||
| ) | ) | ||||
| @@ -1064,6 +1069,7 @@ add_executable(ut_libge_distinct_load_utest | |||||
| ${DISTINCT_GRAPH_LOAD_SRC_FILES} | ${DISTINCT_GRAPH_LOAD_SRC_FILES} | ||||
| ${SINGLE_OP_TEST_FILES} | ${SINGLE_OP_TEST_FILES} | ||||
| ${PROFILING_MNG_TEST_FILES} | ${PROFILING_MNG_TEST_FILES} | ||||
| ${HYBRID_TEST_FILES} | |||||
| ) | ) | ||||
| target_compile_options(ut_libge_distinct_load_utest PRIVATE | target_compile_options(ut_libge_distinct_load_utest PRIVATE | ||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2019-2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <vector> | |||||
| #include "runtime/rt.h" | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "hybrid/model/hybrid_model_builder.h" | |||||
| #include "hybrid/model/hybrid_model.h" | |||||
| #include "model/ge_model.h" | |||||
| #include "model/ge_root_model.h" | |||||
| #include "hybrid/node_executor/aicore/aicore_op_task.h" | |||||
| #include "framework/common/taskdown_common.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "graph/ge_context.h" | |||||
| #include "hybrid/executor/hybrid_execution_context.h" | |||||
| #include "hybrid/node_executor/aicore/aicore_task_builder.h" | |||||
| #include "graph/load/model_manager/tbe_handle_store.h" | |||||
| #include "graph/types.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| using namespace ge; | |||||
| class UtestGeHybrid : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") { | |||||
| auto op_desc = std::make_shared<ge::OpDesc>(name, type); | |||||
| op_desc->SetStreamId(0); | |||||
| op_desc->SetId(0); | |||||
| op_desc->SetWorkspace({}); | |||||
| ; | |||||
| op_desc->SetWorkspaceBytes({}); | |||||
| op_desc->SetInputOffset({}); | |||||
| op_desc->SetOutputOffset({}); | |||||
| ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC"); | |||||
| bool support_dynamic = true; | |||||
| ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic); | |||||
| return op_desc; | |||||
| } | |||||
| TEST_F(UtestGeHybrid, aicore_op_task_init_success) { | |||||
| // build aicore task | |||||
| auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask()); | |||||
| domi::TaskDef task_def; | |||||
| task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||||
| domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); | |||||
| kernel_with_handle->set_original_kernel_key(""); | |||||
| kernel_with_handle->set_node_info(""); | |||||
| kernel_with_handle->set_block_dim(32); | |||||
| kernel_with_handle->set_args_size(64); | |||||
| string args(64, '1'); | |||||
| kernel_with_handle->set_args(args.data(), 64); | |||||
| domi::KernelContext *context = kernel_with_handle->mutable_context(); | |||||
| context->set_op_index(1); | |||||
| context->set_kernel_type(2); // ccKernelType::TE | |||||
| uint16_t args_offset[9] = {0}; | |||||
| context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||||
| OpDescPtr op_desc = CreateOpDesc("Add", "Add"); | |||||
| std::vector<char> kernelBin; | |||||
| TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin)); | |||||
| op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | |||||
| std::string kernel_name("kernel/Add"); | |||||
| AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||||
| ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); | |||||
| rtStream_t stream = nullptr; | |||||
| rtStreamCreate(&stream, 0); | |||||
| ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | |||||
| char *handle = ""; | |||||
| aicore_task->handle_ = handle; | |||||
| aicore_task->tiling_key_ = 1; | |||||
| ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); | |||||
| } | |||||
| @@ -0,0 +1,117 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <vector> | |||||
| #include "graph/load/model_manager/model_utils.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "runtime/rt.h" | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "single_op/single_op_model.h" | |||||
| #include "single_op/task/tbe_task_builder.h" | |||||
| #include "single_op/task/op_task.h" | |||||
| #include "single_op/task/tbe_task_builder.h" | |||||
| #include "external/register/op_tiling_registry.h" | |||||
| #undef private | |||||
| #undef protected | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| using namespace ge; | |||||
| using namespace optiling; | |||||
| class UtestSingleOpTask : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestSingleOpTask, test_build_kernel_task) { | |||||
| string model_data_str = "123456789"; | |||||
| SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); | |||||
| model.input_offset_list_.push_back(0); | |||||
| model.input_sizes_.push_back(16); | |||||
| model.output_offset_list_.push_back(0); | |||||
| model.output_sizes_.push_back(16); | |||||
| auto graph = make_shared<ComputeGraph>("graph"); | |||||
| auto op_desc = make_shared<OpDesc>("Add", "Add"); | |||||
| std::vector<char> kernelBin; | |||||
| TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin)); | |||||
| op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); | |||||
| std::string kernel_name("kernel/Add"); | |||||
| AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); | |||||
| vector<int64_t> shape{16, 16}; | |||||
| GeShape ge_shape(shape); | |||||
| GeTensorDesc desc(ge_shape); | |||||
| op_desc->AddInputDesc(desc); | |||||
| op_desc->AddOutputDesc(desc); | |||||
| auto node = graph->AddNode(op_desc); | |||||
| std::mutex stream_mu_; | |||||
| rtStream_t stream_ = nullptr; | |||||
| StreamResource stream_resource(0); | |||||
| SingleOp single_op(&stream_resource, &stream_mu_, stream_); | |||||
| domi::TaskDef task_def; | |||||
| task_def.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||||
| domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle(); | |||||
| kernel_with_handle->set_original_kernel_key(""); | |||||
| kernel_with_handle->set_node_info(""); | |||||
| kernel_with_handle->set_block_dim(32); | |||||
| kernel_with_handle->set_args_size(64); | |||||
| string args(64, '1'); | |||||
| kernel_with_handle->set_args(args.data(), 64); | |||||
| domi::KernelContext *context = kernel_with_handle->mutable_context(); | |||||
| context->set_op_index(1); | |||||
| context->set_kernel_type(2); // ccKernelType::TE | |||||
| uint16_t args_offset[9] = {0}; | |||||
| context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||||
| model.op_list_[1] = node; | |||||
| TbeOpTask task_tmp; | |||||
| TbeOpTask *task = &task_tmp; | |||||
| ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS); | |||||
| vector<GeTensorDesc> input_desc; | |||||
| vector<DataBuffer> input_buffers; | |||||
| vector<GeTensorDesc> output_desc; | |||||
| vector<DataBuffer> output_buffers; | |||||
| task->node_ = node; | |||||
| OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;}; | |||||
| OpTilingRegistryInterf("Add", op_tiling_func); | |||||
| ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key"); | |||||
| ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json"); | |||||
| char c = '0'; | |||||
| char* buffer = &c; | |||||
| task->tiling_buffer_ = buffer; | |||||
| task->max_tiling_size_ = 64; | |||||
| task->tiling_data_ = "tiling_data"; | |||||
| task->arg_size_ = 64; | |||||
| uint8_t task_args{0}; | |||||
| task->args_.reset(&task_args); | |||||
| ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); | |||||
| char handle_tmp = '0'; | |||||
| char *handle = &handle_tmp; | |||||
| task->SetHandle(handle); | |||||
| ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS); | |||||
| } | |||||
| @@ -191,6 +191,14 @@ typedef void (*rtCallback_t)(void *fnData); | |||||
| #define RT_FUSION_KERNEL_DUMPFLAG (0x04) | #define RT_FUSION_KERNEL_DUMPFLAG (0x04) | ||||
| #define RT_KERNEL_CUSTOM_AICPU (0x08) | #define RT_KERNEL_CUSTOM_AICPU (0x08) | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief kernel mode | |||||
| */ | |||||
| #define RT_DEFAULT_KERNEL_MODE (0x00) | |||||
| #define RT_NORMAL_KERNEL_MODE (0x01) | |||||
| #define RT_ALL_KERNEL_MODE (0x02) | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief kernel L1 Fusion Dump bit flags | * @brief kernel L1 Fusion Dump bit flags | ||||
| @@ -207,6 +215,16 @@ typedef void (*rtCallback_t)(void *fnData); | |||||
| */ | */ | ||||
| RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); | RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief register device binary | |||||
| * @param [in] bin device binary description | |||||
| * @param [out] handle device binary handle | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtRegisterAllKernel(const rtDevBinary_t *bin, void **handle); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief register fast memeory device binary | * @brief register fast memeory device binary | ||||
| @@ -314,6 +332,23 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u | |||||
| RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, | ||||
| rtSmDesc_t *smDesc, rtStream_t stream); | rtSmDesc_t *smDesc, rtStream_t stream); | ||||
| /** | |||||
| * @ingroup rt_kernel | |||||
| * @brief launch kernel with handle to device | |||||
| * @param [in] handle program | |||||
| * @param [in] devFunc device function description | |||||
| * @param [in] blockDim block dimentions | |||||
| * @param [in] args argments address for kernel function | |||||
| * @param [in] argsSize argements size | |||||
| * @param [in] smDesc shared memory description | |||||
| * @param [in] stream associated stream | |||||
| * @param [in] kernelInfo kernel info | |||||
| * @return RT_ERROR_NONE for ok | |||||
| * @return RT_ERROR_INVALID_VALUE for error input | |||||
| */ | |||||
| RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, | |||||
| rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo); | |||||
| /** | /** | ||||
| * @ingroup rt_kernel | * @ingroup rt_kernel | ||||
| * @brief launch kernel to device | * @brief launch kernel to device | ||||
| @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { | |||||
| RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, | RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, | ||||
| RT_MODEL_TASK_STREAM_LABEL_GOTO, | RT_MODEL_TASK_STREAM_LABEL_GOTO, | ||||
| RT_MODEL_TASK_MODEL_EXIT, | RT_MODEL_TASK_MODEL_EXIT, | ||||
| RT_MODEL_TASK_ALL_KERNEL, | |||||
| } rtModelTaskType_t; | } rtModelTaskType_t; | ||||
| typedef enum tagModelStreamType { | typedef enum tagModelStreamType { | ||||
| @@ -127,6 +128,17 @@ typedef struct tagKernelTaskInfo { | |||||
| uint16_t *argsOffset; | uint16_t *argsOffset; | ||||
| } rtKernelTaskInfo_t; | } rtKernelTaskInfo_t; | ||||
| typedef struct tagAllKernelTaskInfo { | |||||
| uint16_t blockDim; | |||||
| uint16_t argsCount; | |||||
| uint16_t argsSize; | |||||
| uint16_t reserved; | |||||
| const void *dev_func; | |||||
| void *handle; | |||||
| uint8_t *smDesc; | |||||
| uint8_t *args; | |||||
| uint16_t *argsOffset; | |||||
| } rtAllKernelTaskInfo_t; | |||||
| typedef struct tagKernelTaskInfoEx { | typedef struct tagKernelTaskInfoEx { | ||||
| uint32_t flags; | uint32_t flags; | ||||
| uint32_t argsSize; | uint32_t argsSize; | ||||
| @@ -251,6 +263,7 @@ typedef struct tagTaskInfo { | |||||
| union { | union { | ||||
| rtKernelTaskInfoEx_t kernelTaskEx; | rtKernelTaskInfoEx_t kernelTaskEx; | ||||
| rtKernelTaskInfo_t kernelTask; | rtKernelTaskInfo_t kernelTask; | ||||
| rtAllKernelTaskInfo_t allkernelTask; | |||||
| rtEventTaskInfo_t eventTask; | rtEventTaskInfo_t eventTask; | ||||
| rtStreamSwitchTaskInfo_t streamSwitchTask; | rtStreamSwitchTaskInfo_t streamSwitchTask; | ||||
| rtStreamActiveTaskInfo_t streamActiveTask; | rtStreamActiveTaskInfo_t streamActiveTask; | ||||