| @@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||||
| // 1. Copy context from kernelExDef.private to workspace | // 1. Copy context from kernelExDef.private to workspace | ||||
| uint32_t op_index = kernel_ex_def.op_index(); | uint32_t op_index = kernel_ex_def.op_index(); | ||||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | ||||
| op_desc_ = op_desc; | |||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); | REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); | ||||
| GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); | GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); | ||||
| @@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() { | |||||
| if (topic_type_flag_ > 0) { | if (topic_type_flag_ > 0) { | ||||
| dump_flag_ = dump_flag_ | topic_type_flag_; | dump_flag_ = dump_flag_ | topic_type_flag_; | ||||
| } | } | ||||
| rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||||
| rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); | REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); | ||||
| GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); | GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); | ||||
| @@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo { | |||||
| uint32_t dump_flag_; | uint32_t dump_flag_; | ||||
| uint32_t kernel_buf_size_; | uint32_t kernel_buf_size_; | ||||
| DavinciModel *davinci_model_; | DavinciModel *davinci_model_; | ||||
| OpDescPtr op_desc_; | |||||
| void *kernel_buf_; | void *kernel_buf_; | ||||
| void *input_output_addr_; | void *input_output_addr_; | ||||
| void *ext_info_addr_; | void *ext_info_addr_; | ||||
| @@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() { | |||||
| } | } | ||||
| GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | ||||
| // blockDim is reserved parameter, set to 1 | // blockDim is reserved parameter, set to 1 | ||||
| rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()), | |||||
| reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_, | |||||
| nullptr, stream_, dump_flag_); | |||||
| std::string op_name = op_desc_->GetName(); | |||||
| rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()}; | |||||
| rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_, | |||||
| nullptr, stream_, dump_flag_); | |||||
| call_save_dump_ = true; | call_save_dump_ = true; | ||||
| } else { | } else { | ||||
| /* default: not skt launch */ | /* default: not skt launch */ | ||||
| @@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, | |||||
| GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); | GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); | ||||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); | ||||
| GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||||
| GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||||
| RT_KERNEL_DEFAULT, context.GetStream())); | RT_KERNEL_DEFAULT, context.GetStream())); | ||||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); | RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); | ||||
| @@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { | |||||
| GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); | GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); | ||||
| uint32_t flag = RT_KERNEL_DEFAULT; | uint32_t flag = RT_KERNEL_DEFAULT; | ||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); | ||||
| GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); | |||||
| GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(), | |||||
| kernel_buf_->GetSize(), flag, context.GetStream())); | |||||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | ||||
| GELOGD("Node[%s] launch end.", node_name_.c_str()); | GELOGD("Node[%s] launch end.", node_name_.c_str()); | ||||
| if (need_sync_) { | if (need_sync_) { | ||||
| @@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { | |||||
| if (kernel_type == ccKernelType::CUST_AI_CPU) { | if (kernel_type == ccKernelType::CUST_AI_CPU) { | ||||
| flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); | flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); | ||||
| } | } | ||||
| auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), | |||||
| reinterpret_cast<const void *>(kernel_name.c_str()), | |||||
| 1, // default core dim is 1 | |||||
| args_.get(), args_size_, | |||||
| nullptr, context.GetStream(), flag); | |||||
| rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()}; | |||||
| auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, | |||||
| 1, // default core dim is 1 | |||||
| args_.get(), args_size_, | |||||
| nullptr, context.GetStream(), flag); | |||||
| GE_CHK_RT_RET(rt_ret); | GE_CHK_RT_RET(rt_ret); | ||||
| GELOGD("Node[%s] launch task end.", node_name_.c_str()); | GELOGD("Node[%s] launch task end.", node_name_.c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| @@ -0,0 +1,10 @@ | |||||
| approvers: | |||||
| - gegenhua | |||||
| reviewers: | |||||
| - wqtshg | |||||
| - ji_chen | |||||
| - xchu42 | |||||
| - sheng-nan | |||||
| - wangxiaotian22 | |||||
| - zhangxiaokun9 | |||||
| - tangqunzhang | |||||
| @@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { | |||||
| rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { | rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { | ||||
| return RT_ERROR_NONE; | return RT_ERROR_NONE; | ||||
| } | } | ||||
| rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||||
| uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||||
| uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) { | |||||
| return RT_ERROR_NONE; | |||||
| } | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| @@ -735,6 +735,7 @@ set(HYBRID_TEST_FILES | |||||
| "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" | "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" | ||||
| "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | ||||
| "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" | "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" | ||||
| "hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc" | |||||
| "hybrid/executor/hybrid_model_async_executor_unittest.cc" | "hybrid/executor/hybrid_model_async_executor_unittest.cc" | ||||
| "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | ||||
| "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" | "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" | ||||
| @@ -0,0 +1,168 @@ | |||||
| /** | |||||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gmock/gmock.h> | |||||
| #include <gtest/gtest.h> | |||||
| #include <vector> | |||||
| #define private public | |||||
| #define protected public | |||||
| #include "graph/runtime_inference_context.h" | |||||
| #include "aicpu/common/aicpu_task_struct.h" | |||||
| #include "hybrid/executor/subgraph_context.h" | |||||
| #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | |||||
| #undef protected | |||||
| #undef private | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| namespace { | |||||
| struct AicpuTaskStruct { | |||||
| aicpu::AicpuParamHead head; | |||||
| uint64_t io_addrp[6]; | |||||
| }__attribute__((packed)); | |||||
| } // namespace | |||||
| namespace ge { | |||||
| using namespace hybrid; | |||||
| class UtestAicpuNodeExecutor : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>(name, type); | |||||
| op_desc->SetStreamId(0); | |||||
| static int32_t index = 0; | |||||
| op_desc->SetId(index++); | |||||
| GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); | |||||
| TensorUtils::SetSize(tensor, 64); | |||||
| vector<int64_t> input_offset; | |||||
| for (int i = 0; i < in_num; i++) { | |||||
| op_desc->AddInputDesc(tensor); | |||||
| input_offset.emplace_back(i * 64); | |||||
| } | |||||
| op_desc->SetInputOffset(input_offset); | |||||
| vector<int64_t> output_offset; | |||||
| for (int i = 0; i < out_num; i++) { | |||||
| op_desc->AddOutputDesc(tensor); | |||||
| output_offset.emplace_back(in_num * 64 + i * 64); | |||||
| } | |||||
| op_desc->SetOutputOffset(output_offset); | |||||
| return graph->AddNode(op_desc); | |||||
| } | |||||
| TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
| GeModelPtr ge_sub_model = std::make_shared<GeModel>(); | |||||
| GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||||
| ge_root_model->SetModelName("test_name"); | |||||
| ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model); | |||||
| HybridModel hybrid_model(ge_root_model); | |||||
| NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2); | |||||
| std::unique_ptr<NodeItem> new_node; | |||||
| ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||||
| NodeItem *node_item = new_node.get(); | |||||
| hybrid_model.node_items_[node] = std::move(new_node); | |||||
| node_item->input_start = 0; | |||||
| node_item->output_start = 0; | |||||
| node_item->is_dynamic = true; | |||||
| node_item->shape_inference_type = DEPEND_COMPUTE; | |||||
| GraphItem graph_item; | |||||
| graph_item.node_items_.emplace_back(node_item); | |||||
| graph_item.total_inputs_ = 4; | |||||
| graph_item.total_outputs_ = 2; | |||||
| GraphExecutionContext graph_context; | |||||
| SubgraphContext subgraph_context(&graph_item, &graph_context); | |||||
| ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||||
| graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||||
| auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||||
| ASSERT_NE(node_state, nullptr); | |||||
| for (int i=0; i<4; ++i) { | |||||
| uint64_t value_0 = 512; | |||||
| TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||||
| subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||||
| } | |||||
| uint64_t value_0 = 512; | |||||
| TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||||
| subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||||
| uint64_t value_1 = 512; | |||||
| TensorValue out_tensor1(&value_1, sizeof(value_1)); | |||||
| subgraph_context.SetOutput(*node_item, 1, out_tensor1); | |||||
| // task | |||||
| domi::TaskDef task_def; | |||||
| domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex(); | |||||
| kernel_ex_def->set_kernel_ext_info_size(12); | |||||
| AicpuExtInfo aicpu_ext_info; | |||||
| aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; | |||||
| aicpu_ext_info.infoLen = sizeof(int32_t); | |||||
| int32_t type = node_item->shape_inference_type; | |||||
| memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); | |||||
| char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||||
| memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||||
| std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||||
| std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info(); | |||||
| (*mutable_ext_info) = ext_info; | |||||
| hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||||
| AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||||
| ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
| AicpuTaskStruct args; | |||||
| args.head.length = sizeof(args); | |||||
| args.head.ioAddrNum = 6; | |||||
| domi::TaskDef task_def2; | |||||
| task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||||
| task_def2.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||||
| task_def2.mutable_kernel()->set_args_size(args.head.length); | |||||
| hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def2}); | |||||
| AicpuNodeTask aicpu_node_task(node_item, task_def); | |||||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||||
| ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||||
| //kernel_ex_def->set_allocated_kernel_ext_info(nullptr); | |||||
| free(ext_mem); | |||||
| } | |||||
| } // namespace ge | |||||