From: @zhao_zhixuan Reviewed-by: @xchu42,@ji_chen Signed-off-by: @ji_chentags/v1.2.0
| @@ -47,6 +47,7 @@ const char *const kEngineNameDefault = "default"; | |||
| const char *const kVectorEngine = "VectorEngine"; | |||
| const char *const kAIcoreEngine = "AIcoreEngine"; | |||
| const char *const kFileNameSuffix = "online"; | |||
| const char *const kAicpuAllshape = "_AllShape"; | |||
| const size_t kDynamicDimSize = 1; | |||
| const int64_t kDynamicDimValue = -2; | |||
| @@ -721,8 +722,12 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||
| GeModelPtr &ge_model = name_to_ge_model.begin()->second; | |||
| GELOGD("The opType in op_desc_tmp is [%s]", op_desc_tmp->GetType().c_str()); | |||
| bool all_shape = false; | |||
| bool dynamic_flag = false; | |||
| if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) { | |||
| (void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape); | |||
| CheckShapeReset(op_desc, dynamic_flag); | |||
| if (dynamic_flag || all_shape) { | |||
| GELOGD("Get aicpu all_shape kernel!"); | |||
| vector<GeTensor> inputs_dynamic; | |||
| vector<GeTensor> outputs_dynamic; | |||
| GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic)); | |||
| @@ -26,8 +26,42 @@ | |||
| #include "graph/attr_value.h" | |||
| #include "graph/load/model_manager/davinci_model.h" | |||
| #include "graph/load/model_manager/model_manager.h" | |||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
| #include "framework/common/debug/log.h" | |||
| namespace ge { | |||
| Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc) { | |||
| if (ext_info.empty()) { | |||
| return SUCCESS; | |||
| } | |||
| int32_t unknown_shape_type_val = 0; | |||
| (void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||
| UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||
| uint32_t num_inputs = op_desc->GetInputsSize(); | |||
| uint32_t num_outputs = op_desc->GetOutputsSize(); | |||
| std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||
| new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(), | |||
| num_inputs, | |||
| num_outputs, | |||
| unknown_type)); | |||
| GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); | |||
| GE_CHK_STATUS_RET(ext_handle->Parse(ext_info), | |||
| "Parse kernel ext info failed, kernel_ext_info_size=%zu.", ext_info.size()); | |||
| GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true), "UpdateExecuteMode failed."); | |||
| GELOGD("Update aicpu_task ext_info bit_map execute mode to 1."); | |||
| auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
| rt_ret = rtMemcpy(ext_info_addr_, ext_handle->GetExtInfoLen(), ext_handle->GetExtInfo(), | |||
| ext_handle->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
| return SUCCESS; | |||
| } | |||
| Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| GELOGI("KernelExTaskInfo Init Start."); | |||
| GE_CHECK_NOTNULL(davinci_model); | |||
| @@ -63,16 +97,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
| } | |||
| const auto &ext_info = kernel_ex_def.kernel_ext_info(); | |||
| if (!ext_info.empty()) { | |||
| auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
| rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
| } | |||
| GE_CHK_STATUS_RET(InitTaskExtInfo(ext_info, op_desc), | |||
| "Init aicpu tf_task ext info failed, ext_info size=%zu", ext_info.size()); | |||
| GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(), | |||
| op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | |||
| @@ -62,6 +62,7 @@ class KernelExTaskInfo : public TaskInfo { | |||
| void SetIoAddrs(const OpDescPtr &op_desc); | |||
| void InitDumpTask(void *addr, const OpDescPtr &op_desc); | |||
| Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); | |||
| uint32_t task_id_; | |||
| uint32_t stream_id_; | |||
| @@ -32,6 +32,8 @@ | |||
| #include "super_kernel/super_kernel.h" | |||
| #include "super_kernel/super_kernel_factory.h" | |||
| #include "cce/aicpu_engine_struct.h" | |||
| #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||
| #include "framework/common/debug/log.h" | |||
| namespace { | |||
| const uint8_t kL2LoadToDdr = 1; | |||
| @@ -964,39 +966,32 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||
| return SUCCESS; | |||
| } | |||
| std::unique_ptr<uint8_t[]> copy_ext_info; | |||
| copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]); | |||
| GE_CHECK_NOTNULL(copy_ext_info); | |||
| auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size()); | |||
| if (sec_ret != EOK) { | |||
| GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
| return FAILED; | |||
| } | |||
| auto ext_info_data = copy_ext_info.get(); | |||
| size_t offset = 0; | |||
| while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) { | |||
| auto aicpu_ext_info = reinterpret_cast<aicpu::FWKAdapter::ExtInfo *>(ext_info_data + offset); | |||
| GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||
| if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO) { | |||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(SessionInfo), PARAM_INVALID, | |||
| "Parse ext session info failed as infoLen must be %zu but %u.", | |||
| sizeof(SessionInfo), aicpu_ext_info->infoLen); | |||
| SessionInfo *session_info = reinterpret_cast<SessionInfo *>(aicpu_ext_info->infoMsg); | |||
| session_info->sessionId = davinci_model_->GetSessionId(); | |||
| session_info->sessFlag = true; | |||
| GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId); | |||
| } | |||
| offset += sizeof(aicpu::FWKAdapter::ExtInfo); | |||
| offset += aicpu_ext_info->infoLen; | |||
| } | |||
| auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||
| int32_t unknown_shape_type_val = 0; | |||
| (void) AttrUtils::GetInt(op_desc_, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||
| UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||
| uint32_t num_inputs = op_desc_->GetInputsSize(); | |||
| uint32_t num_outputs = op_desc_->GetOutputsSize(); | |||
| std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||
| new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc_->GetName(), | |||
| num_inputs, | |||
| num_outputs, | |||
| unknown_type)); | |||
| GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); | |||
| GE_CHK_STATUS_RET(ext_handle->Parse(ext_info), | |||
| "Parse kernel ext info failed, kernel_ext_info_size=%zu.", ext_info.size()); | |||
| GE_CHK_STATUS_RET(ext_handle->UpdateSessionInfoSessionId(davinci_model_->GetSessionId()), | |||
| "Update session info session id failed."); | |||
| GELOGD("Update aicpu_task ext_info session_info session_id is %lu", davinci_model_->GetSessionId()); | |||
| GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true), "UpdateExecuteMode failed."); | |||
| GELOGD("Update aicpu_task ext_info bit_map execute mode to 1."); | |||
| auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| } | |||
| rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), ext_handle->GetExtInfo(), | |||
| ext_handle->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
| return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
| @@ -64,6 +64,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: | |||
| GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); | |||
| break; | |||
| case aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP: | |||
| GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "Parse ext bit map failed."); | |||
| break; | |||
| default: | |||
| GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | |||
| node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||
| @@ -140,6 +143,29 @@ Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { | |||
| return SUCCESS; | |||
| } | |||
| Status AicpuExtInfoHandler::ParseExtBitMap(AicpuExtInfo *aicpu_ext_info) { | |||
| GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, | |||
| "Node[%s] parse bit_map info failed as infoLen must be %zu but %u.", | |||
| node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen); | |||
| bit_map_ = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg); | |||
| GELOGI("Node[%s] bit_map info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | |||
| return SUCCESS; | |||
| } | |||
| Status AicpuExtInfoHandler::UpdateExecuteMode(bool flag) { | |||
| if (bit_map_ == nullptr) { | |||
| GELOGD("There is no bit_map in ext_info, no need update."); | |||
| return SUCCESS; | |||
| } | |||
| if (flag) { | |||
| *(bit_map_) |= 1; | |||
| } else { | |||
| *(bit_map_) &= ~1; | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { | |||
| if (session_info_ == nullptr) { | |||
| GELOGD("There is no session info in ext_info, no need update."); | |||
| @@ -57,6 +57,8 @@ class AicpuExtInfoHandler { | |||
| Status UpdateSessionInfoSessionId(uint64_t session_id); | |||
| Status UpdateExecuteMode(bool flag); | |||
| Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | |||
| private: | |||
| @@ -65,6 +67,7 @@ class AicpuExtInfoHandler { | |||
| Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | |||
| Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | |||
| Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); | |||
| Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); | |||
| static Status UpdateShapeAndType(const GeShape &shape, | |||
| DataType data_type, | |||
| @@ -80,6 +83,7 @@ class AicpuExtInfoHandler { | |||
| const uint32_t output_num_; | |||
| UnknowShapeOpType unknown_type_; | |||
| AicpuSessionInfo *session_info_ = nullptr; | |||
| uint64_t *bit_map_ = nullptr; | |||
| std::unique_ptr<uint8_t[]> ext_info_; | |||
| size_t ext_info_len_ = 0; | |||
| @@ -136,6 +136,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { | |||
| return SUCCESS; | |||
| } | |||
| GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateExecuteMode(false), "UpdateExecuteMode failed."); | |||
| for (auto i = 0; i < node_item_->num_inputs; ++i) { | |||
| auto input_desc = node_item_->MutableInputDesc(i); | |||
| GE_CHECK_NOTNULL(input_desc); | |||
| @@ -373,6 +373,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | |||
| "UpdateSessionInfo failed."); | |||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateExecuteMode(true), "UpdateExecuteMode failed."); | |||
| GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | |||
| GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | |||
| @@ -404,13 +405,14 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||
| std::vector<GeTensorDesc> &output_desc, | |||
| rtStream_t stream) { | |||
| GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | |||
| GE_CHECK_NOTNULL(aicpu_ext_handle_); | |||
| GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateExecuteMode(false), "UpdateExecuteMode failed."); | |||
| if (num_inputs_ == 0 && num_outputs_ == 0) { | |||
| GELOGI("No input and output, no need update ext info."); | |||
| return SUCCESS; | |||
| } | |||
| GE_CHECK_NOTNULL(aicpu_ext_handle_); | |||
| size_t non_const_index = 0; | |||
| for (size_t input_index = 0; input_index < num_inputs_; input_index++) { | |||
| if (input_index < input_is_const_.size() && input_is_const_[input_index]) { | |||
| @@ -137,4 +137,12 @@ TEST_F(UtestKernelExTaskInfo, kernel_ex_task_info_calculate_args) { | |||
| EXPECT_EQ(kernel_ex_task_info.CalculateArgs(task_def, &model), FAILED); | |||
| } | |||
| TEST_F(UtestKernelExTaskInfo, kernel_ex_task_ext_info) { | |||
| const string ext_info = {1, 1, 1, 1, 0, 0, 0, 0}; | |||
| const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp"); | |||
| KernelExTaskInfo kernel_ex_task_info; | |||
| EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS); | |||
| } | |||
| } // namespace ge | |||
| @@ -413,6 +413,9 @@ TEST_F(UtestKernelTaskInfo, init_kernel_taskInfo_with_aicpu_kernel_type_fail) { | |||
| // rtMemcpy -> RT_ERROR_INVALID_VALUE | |||
| EXPECT_EQ(kernel_task_info.Init(task_def, &model), SUCCESS); | |||
| const string ext_info = {1, 1, 1, 1, 0, 0, 0, 0}; | |||
| EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(ext_info), SUCCESS); | |||
| EXPECT_EQ(kernel_task_info.Distribute(), SUCCESS); | |||
| EXPECT_EQ(kernel_task_info.Release(), SUCCESS); | |||