@@ -26,8 +26,42 @@ | |||||
#include "graph/attr_value.h" | #include "graph/attr_value.h" | ||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
#include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h" | |||||
#include "framework/common/debug/log.h" | |||||
namespace ge { | namespace ge { | ||||
Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc) { | |||||
if (ext_info.empty()) { | |||||
return SUCCESS; | |||||
} | |||||
int32_t unknown_shape_type_val = 0; | |||||
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||||
UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||||
uint32_t num_inputs = op_desc->GetInputsSize(); | |||||
uint32_t num_outputs = op_desc->GetOutputsSize(); | |||||
std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||||
new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(), | |||||
num_inputs, | |||||
num_outputs, | |||||
unknown_type)); | |||||
GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); | |||||
GE_CHK_STATUS_RET(ext_handle->Parse(ext_info) | |||||
"Parse kernel ext info failed, kernel_ext_info_size=%zu.", ext_info.size()); | |||||
GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true), "UpdateExecuteMode failed."); | |||||
GELOGD("Update aicpu_task ext_info bit_map execute mode to 1."); | |||||
auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy(ext_info_addr_, ext_handle_->GetExtInfoLen(), ext_handle_->GetExtInfo(), | |||||
ext_handle->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
return SUCCESS; | |||||
} | |||||
Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
GELOGI("KernelExTaskInfo Init Start."); | GELOGI("KernelExTaskInfo Init Start."); | ||||
GE_CHECK_NOTNULL(davinci_model); | GE_CHECK_NOTNULL(davinci_model); | ||||
@@ -63,16 +97,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
} | } | ||||
const auto &ext_info = kernel_ex_def.kernel_ext_info(); | const auto &ext_info = kernel_ex_def.kernel_ext_info(); | ||||
if (!ext_info.empty()) { | |||||
auto rt_ret = rtMalloc(&ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
rt_ret = rtMemcpy(ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | |||||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||||
} | |||||
GE_CHK_STATUS_RET(InitTaskExtInfo(ext_info, op_desc), | |||||
"Init aicpu tf_task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(), | GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, ext_info_addr_=%p", op_desc->GetName().c_str(), | ||||
op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | op_desc->GetType().c_str(), ext_info.size(), ext_info_addr_); | ||||
@@ -62,6 +62,7 @@ class KernelExTaskInfo : public TaskInfo { | |||||
void SetIoAddrs(const OpDescPtr &op_desc); | void SetIoAddrs(const OpDescPtr &op_desc); | ||||
void InitDumpTask(void *addr, const OpDescPtr &op_desc); | void InitDumpTask(void *addr, const OpDescPtr &op_desc); | ||||
Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); | |||||
uint32_t task_id_; | uint32_t task_id_; | ||||
uint32_t stream_id_; | uint32_t stream_id_; | ||||
@@ -964,39 +964,32 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
std::unique_ptr<uint8_t[]> copy_ext_info; | |||||
copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]); | |||||
GE_CHECK_NOTNULL(copy_ext_info); | |||||
auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size()); | |||||
if (sec_ret != EOK) { | |||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return FAILED; | |||||
} | |||||
auto ext_info_data = copy_ext_info.get(); | |||||
size_t offset = 0; | |||||
while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) { | |||||
auto aicpu_ext_info = reinterpret_cast<aicpu::FWKAdapter::ExtInfo *>(ext_info_data + offset); | |||||
GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||||
if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO) { | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(SessionInfo), PARAM_INVALID, | |||||
"Parse ext session info failed as infoLen must be %zu but %u.", | |||||
sizeof(SessionInfo), aicpu_ext_info->infoLen); | |||||
SessionInfo *session_info = reinterpret_cast<SessionInfo *>(aicpu_ext_info->infoMsg); | |||||
session_info->sessionId = davinci_model_->GetSessionId(); | |||||
session_info->sessFlag = true; | |||||
GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId); | |||||
} | |||||
offset += sizeof(aicpu::FWKAdapter::ExtInfo); | |||||
offset += aicpu_ext_info->infoLen; | |||||
} | |||||
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||||
int32_t unknown_shape_type_val = 0; | |||||
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||||
UnknowShapeOpType unknown_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||||
uint32_t num_inputs = op_desc->GetInputsSize(); | |||||
uint32_t num_outputs = op_desc->GetOutputsSize(); | |||||
std::unique_ptr<ge::hybrid::AicpuExtInfoHandler> ext_handle( | |||||
new(std::nothrow) ::ge::hybrid::AicpuExtInfoHandler(op_desc->GetName(), | |||||
num_inputs, | |||||
num_outputs, | |||||
unknown_type)); | |||||
GE_CHK_BOOL_RET_STATUS(ext_handle != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); | |||||
GE_CHK_STATUS_RET(ext_handle->Parse(ext_info) | |||||
"Parse kernel ext info failed, kernel_ext_info_size=%zu.", ext_info.size()); | |||||
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(davinci_model_->GetSessionId()), | |||||
"Update session info session id sfailed."); | |||||
GELOGD("Update aicpu_task ext_info session_info session_id is %lu", davinci_model_->GetSessionId()); | |||||
GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true), "UpdateExecuteMode failed."); | |||||
GELOGD("Update aicpu_task ext_info bit_map execute mode to 1."); | |||||
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), ext_handle_->GetExtInfo(), | |||||
ext_handle->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
@@ -64,6 +64,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: | case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: | ||||
GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); | GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); | ||||
break; | break; | ||||
case aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP: | |||||
GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "Parse ext bit map failed."); | |||||
break; | |||||
default: | default: | ||||
GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | ||||
node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | ||||
@@ -140,6 +143,29 @@ Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AicpuExtInfoHandler::ParseExtBitMap(AicpuExtInfo *aicpu_ext_info) { | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID, | |||||
"Node[%s] parse bit_map info failed as infoLen must be %zu but %u.", | |||||
node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen); | |||||
bit_map_ = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg); | |||||
GELOGI("Node[%s] bit_map info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | |||||
return SUCCESS; | |||||
} | |||||
Status AicpuExtInfoHandler::UpdateExecuteMode(bool flag) { | |||||
if (bit_map_ == nullptr) { | |||||
GELOGD("There is no bit_map in ext_info, no need update."); | |||||
return SUCCESS; | |||||
} | |||||
if (flag) { | |||||
*(bit_map_) |= 1; | |||||
} else { | |||||
*(bit_map_) &= ~1; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { | Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { | ||||
if (session_info_ == nullptr) { | if (session_info_ == nullptr) { | ||||
GELOGD("There is no session info in ext_info, no need update."); | GELOGD("There is no session info in ext_info, no need update."); | ||||
@@ -57,6 +57,8 @@ class AicpuExtInfoHandler { | |||||
Status UpdateSessionInfoSessionId(uint64_t session_id); | Status UpdateSessionInfoSessionId(uint64_t session_id); | ||||
Status UpdateExecuteMode(bool flag); | |||||
Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | ||||
private: | private: | ||||
@@ -65,6 +67,7 @@ class AicpuExtInfoHandler { | |||||
Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | ||||
Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | ||||
Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); | Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); | ||||
Status ParseExtBitMap(AicpuExtInfo *aicpu_ext_info); | |||||
static Status UpdateShapeAndType(const GeShape &shape, | static Status UpdateShapeAndType(const GeShape &shape, | ||||
DataType data_type, | DataType data_type, | ||||
@@ -80,6 +83,7 @@ class AicpuExtInfoHandler { | |||||
const uint32_t output_num_; | const uint32_t output_num_; | ||||
UnknowShapeOpType unknown_type_; | UnknowShapeOpType unknown_type_; | ||||
AicpuSessionInfo *session_info_ = nullptr; | AicpuSessionInfo *session_info_ = nullptr; | ||||
uint64_t *bit_map_ = nullptr; | |||||
std::unique_ptr<uint8_t[]> ext_info_; | std::unique_ptr<uint8_t[]> ext_info_; | ||||
size_t ext_info_len_ = 0; | size_t ext_info_len_ = 0; | ||||
@@ -60,6 +60,7 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_ | |||||
GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id); | GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id); | ||||
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | ||||
"UpdateSessionInfoSessionId failed."); | "UpdateSessionInfoSessionId failed."); | ||||
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateExecuteMode(true), "UpdateExecuteMode failed."); | |||||
// copy task args buf | // copy task args buf | ||||
GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | ||||
@@ -136,6 +137,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(false), "UpdateExecuteMode failed."); | |||||
for (auto i = 0; i < node_item_->num_inputs; ++i) { | for (auto i = 0; i < node_item_->num_inputs; ++i) { | ||||
auto input_desc = node_item_->MutableInputDesc(i); | auto input_desc = node_item_->MutableInputDesc(i); | ||||
GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
@@ -373,6 +373,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), | ||||
"UpdateSessionInfo failed."); | "UpdateSessionInfo failed."); | ||||
GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(true, "UpdateExecuteMode failed."); | |||||
GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); | ||||
GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), | ||||
@@ -404,13 +405,14 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
std::vector<GeTensorDesc> &output_desc, | std::vector<GeTensorDesc> &output_desc, | ||||
rtStream_t stream) { | rtStream_t stream) { | ||||
GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | GELOGI("Update ext info begin, unknown_type=%d.", unknown_type_); | ||||
GE_CHECK_NOTNULL(aicpu_ext_handle_); | |||||
GE_CHK_STATUS_RET(ext_handle->UpdateExecuteMode(false), "UpdateExecuteMode failed."); | |||||
if (num_inputs_ == 0 && num_outputs_ == 0) { | if (num_inputs_ == 0 && num_outputs_ == 0) { | ||||
GELOGI("No input and output, no need update ext info."); | GELOGI("No input and output, no need update ext info."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GE_CHECK_NOTNULL(aicpu_ext_handle_); | |||||
size_t non_const_index = 0; | size_t non_const_index = 0; | ||||
for (size_t input_index = 0; input_index < num_inputs_; input_index++) { | for (size_t input_index = 0; input_index < num_inputs_; input_index++) { | ||||
if (input_index < input_is_const_.size() && input_is_const_[input_index]) { | if (input_index < input_is_const_.size() && input_is_const_[input_index]) { | ||||
@@ -137,4 +137,12 @@ TEST_F(UtestKernelExTaskInfo, kernel_ex_task_info_calculate_args) { | |||||
EXPECT_EQ(kernel_ex_task_info.CalculateArgs(task_def, &model), FAILED); | EXPECT_EQ(kernel_ex_task_info.CalculateArgs(task_def, &model), FAILED); | ||||
} | } | ||||
TEST_F(UtestKernelExTaskInfo, kernel_ex_task_ext_info) { | |||||
const char ext_info[4] = {0, 0, 0, 4}; | |||||
const OpDescPtr op_desc = CreateOpDesc("FrameworkOp", "FrameworkOp"); | |||||
KernelExTaskInfo kernel_ex_task_info; | |||||
EXPECT_EQ(kernel_ex_task_info.InitTaskExtInfo(ext_info, op_desc), SUCCESS); | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -1195,4 +1195,10 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_super_kernel_info) { | |||||
EXPECT_EQ(kernel_task_info.SKTFinalize(), SUCCESS); | EXPECT_EQ(kernel_task_info.SKTFinalize(), SUCCESS); | ||||
} | } | ||||
TEST_F(UtestKernelTaskInfo, kernel_ask_ext_info) { | |||||
const char ext_info[4] = {0, 0, 0, 4}; | |||||
KernelTaskInfo kernel_task_info; | |||||
EXPECT_EQ(kernel_task_info.InitAicpuTaskExtInfo(ext_info), SUCCESS); | |||||
} | |||||
} // namespace ge | } // namespace ge |