| @@ -106,6 +106,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
| // 1. Copy context from kernelExDef.private to workspace | |||
| uint32_t op_index = kernel_ex_def.op_index(); | |||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(op_index); | |||
| op_desc_ = op_desc; | |||
| if (op_desc == nullptr) { | |||
| REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u", op_index); | |||
| GELOGE(INTERNAL_ERROR, "[Get][Op] by index failed, index:%u is out of range!", op_index); | |||
| @@ -422,7 +423,7 @@ Status KernelExTaskInfo::Distribute() { | |||
| if (topic_type_flag_ > 0) { | |||
| dump_flag_ = dump_flag_ | static_cast<uint32_t>(topic_type_flag_); | |||
| } | |||
| rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||
| rtError_t rt_ret = rtKernelLaunchFwk(op_desc_->GetName().c_str(), kernel_buf_, kernel_buf_size_, dump_flag_, stream_); | |||
| if (rt_ret != RT_ERROR_NONE) { | |||
| REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); | |||
| GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed, ret:0x%X", rt_ret); | |||
| @@ -70,6 +70,7 @@ class KernelExTaskInfo : public TaskInfo { | |||
| uint32_t dump_flag_; | |||
| uint32_t kernel_buf_size_; | |||
| DavinciModel *davinci_model_; | |||
| OpDescPtr op_desc_; | |||
| void *kernel_buf_; | |||
| void *input_output_addr_; | |||
| void *ext_info_addr_; | |||
| @@ -440,9 +440,10 @@ Status KernelTaskInfo::Distribute() { | |||
| } | |||
| GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | |||
| // blockDim is reserved parameter, set to 1 | |||
| rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name_.c_str()), | |||
| reinterpret_cast<const void *>(kernel_name_.c_str()), 1, args_, args_size_, | |||
| nullptr, stream_, dump_flag_); | |||
| std::string op_name = op_desc_->GetName(); | |||
| rtKernelLaunchNames_t launch_name = {so_name_.c_str(), kernel_name_.c_str(), op_name.c_str()}; | |||
| rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, 1, args_, args_size_, | |||
| nullptr, stream_, dump_flag_); | |||
| call_save_dump_ = true; | |||
| } else { | |||
| /* default: not skt launch */ | |||
| @@ -477,7 +477,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context, | |||
| GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm)); | |||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] Start"); | |||
| GE_CHK_RT_RET(rtKernelLaunchEx(copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||
| GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), copy_task_args_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL), | |||
| RT_KERNEL_DEFAULT, context.GetStream())); | |||
| RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[LaunchCopy] End"); | |||
| @@ -638,7 +638,8 @@ Status AicpuTfNodeTask::LaunchTask(TaskContext &context) { | |||
| GELOGD("Node[%s] launch task start, unknown_type=%d.", node_name_.c_str(), unknown_type_); | |||
| uint32_t flag = RT_KERNEL_DEFAULT; | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] Start"); | |||
| GE_CHK_RT_RET(rtKernelLaunchEx(kernel_buf_->GetData(), kernel_buf_->GetSize(), flag, context.GetStream())); | |||
| GE_CHK_RT_RET(rtKernelLaunchFwk(node_name_.c_str(), kernel_buf_->GetData(), | |||
| kernel_buf_->GetSize(), flag, context.GetStream())); | |||
| RECORD_EXECUTION_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[AicpuTfNodertKernelLaunchEx] End"); | |||
| GELOGD("Node[%s] launch end.", node_name_.c_str()); | |||
| if (need_sync_) { | |||
| @@ -819,11 +820,11 @@ Status AicpuNodeTask::LaunchTask(TaskContext &context) { | |||
| if (kernel_type == ccKernelType::CUST_AI_CPU) { | |||
| flag |= static_cast<uint32_t>(RT_KERNEL_CUSTOM_AICPU); | |||
| } | |||
| auto rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(so_name.c_str()), | |||
| reinterpret_cast<const void *>(kernel_name.c_str()), | |||
| 1, // default core dim is 1 | |||
| args_.get(), args_size_, | |||
| nullptr, context.GetStream(), flag); | |||
| rtKernelLaunchNames_t launch_name = {so_name.c_str(), kernel_name.c_str(), node_name_.c_str()}; | |||
| auto rt_ret = rtAicpuKernelLaunchWithFlag(&launch_name, | |||
| 1, // default core dim is 1 | |||
| args_.get(), args_size_, | |||
| nullptr, context.GetStream(), flag); | |||
| GE_CHK_RT_RET(rt_ret); | |||
| GELOGD("Node[%s] launch task end.", node_name_.c_str()); | |||
| return SUCCESS; | |||
| @@ -0,0 +1,10 @@ | |||
| approvers: | |||
| - gegenhua | |||
| reviewers: | |||
| - wqtshg | |||
| - ji_chen | |||
| - xchu42 | |||
| - sheng-nan | |||
| - wangxiaotian22 | |||
| - zhangxiaokun9 | |||
| - tangqunzhang | |||
| @@ -460,6 +460,21 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { | |||
| rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtKernelLaunchFwk(const char *opName, void *args, uint32_t argSize, uint32_t flags, rtStream_t rtStream) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtAicpuKernelLaunchWithFlag(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||
| uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| rtError_t rtAicpuKernelLaunch(const rtKernelLaunchNames_t *launchNames, uint32_t blockDim, const void *args, | |||
| uint32_t argSize, rtSmDesc_t *smDesc, rtStream_t stream) { | |||
| return RT_ERROR_NONE; | |||
| } | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| @@ -736,6 +736,7 @@ set(HYBRID_TEST_FILES | |||
| "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" | |||
| "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" | |||
| "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" | |||
| "hybrid/node_executor/aicpu/aicpu_node_executor_unittest.cc" | |||
| "hybrid/executor/hybrid_model_async_executor_unittest.cc" | |||
| "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" | |||
| "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" | |||
| @@ -0,0 +1,168 @@ | |||
| /** | |||
| * Copyright 2021 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include <gmock/gmock.h> | |||
| #include <gtest/gtest.h> | |||
| #include <vector> | |||
| #define private public | |||
| #define protected public | |||
| #include "graph/runtime_inference_context.h" | |||
| #include "aicpu/common/aicpu_task_struct.h" | |||
| #include "hybrid/executor/subgraph_context.h" | |||
| #include "hybrid/node_executor/aicpu/aicpu_node_executor.h" | |||
| #undef protected | |||
| #undef private | |||
| using namespace std; | |||
| using namespace testing; | |||
| namespace { | |||
| struct AicpuTaskStruct { | |||
| aicpu::AicpuParamHead head; | |||
| uint64_t io_addrp[6]; | |||
| }__attribute__((packed)); | |||
| } // namespace | |||
| namespace ge { | |||
| using namespace hybrid; | |||
| class UtestAicpuNodeExecutor : public testing::Test { | |||
| protected: | |||
| void SetUp() {} | |||
| void TearDown() {} | |||
| }; | |||
| static NodePtr CreateNode(ComputeGraphPtr graph, const string &name, const string &type, int in_num, int out_num) { | |||
| OpDescPtr op_desc = std::make_shared<OpDesc>(name, type); | |||
| op_desc->SetStreamId(0); | |||
| static int32_t index = 0; | |||
| op_desc->SetId(index++); | |||
| GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); | |||
| TensorUtils::SetSize(tensor, 64); | |||
| vector<int64_t> input_offset; | |||
| for (int i = 0; i < in_num; i++) { | |||
| op_desc->AddInputDesc(tensor); | |||
| input_offset.emplace_back(i * 64); | |||
| } | |||
| op_desc->SetInputOffset(input_offset); | |||
| vector<int64_t> output_offset; | |||
| for (int i = 0; i < out_num; i++) { | |||
| op_desc->AddOutputDesc(tensor); | |||
| output_offset.emplace_back(in_num * 64 + i * 64); | |||
| } | |||
| op_desc->SetOutputOffset(output_offset); | |||
| return graph->AddNode(op_desc); | |||
| } | |||
| TEST_F(UtestAicpuNodeExecutor, aicpu_tf_node_task) { | |||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||
| GeModelPtr ge_sub_model = std::make_shared<GeModel>(); | |||
| GeRootModelPtr ge_root_model = std::make_shared<GeRootModel>(graph); | |||
| ge_root_model->SetModelName("test_name"); | |||
| ge_root_model->SetSubgraphInstanceNameToModel("sub", ge_sub_model); | |||
| HybridModel hybrid_model(ge_root_model); | |||
| NodePtr node = CreateNode(graph, "frameworkop", FRAMEWORK_OP_TYPE, 4, 2); | |||
| std::unique_ptr<NodeItem> new_node; | |||
| ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); | |||
| NodeItem *node_item = new_node.get(); | |||
| hybrid_model.node_items_[node] = std::move(new_node); | |||
| node_item->input_start = 0; | |||
| node_item->output_start = 0; | |||
| node_item->is_dynamic = true; | |||
| node_item->shape_inference_type = DEPEND_COMPUTE; | |||
| GraphItem graph_item; | |||
| graph_item.node_items_.emplace_back(node_item); | |||
| graph_item.total_inputs_ = 4; | |||
| graph_item.total_outputs_ = 2; | |||
| GraphExecutionContext graph_context; | |||
| SubgraphContext subgraph_context(&graph_item, &graph_context); | |||
| ASSERT_EQ(subgraph_context.Init(), SUCCESS); | |||
| graph_context.callback_manager = std::unique_ptr<CallbackManager>(new CallbackManager()); | |||
| auto node_state = subgraph_context.GetOrCreateNodeState(node_item); | |||
| ASSERT_NE(node_state, nullptr); | |||
| for (int i=0; i<4; ++i) { | |||
| uint64_t value_0 = 512; | |||
| TensorValue in_tensor0(&value_0, sizeof(value_0)); | |||
| subgraph_context.SetInput(*node_item, 0, in_tensor0); | |||
| } | |||
| uint64_t value_0 = 512; | |||
| TensorValue out_tensor0(&value_0, sizeof(value_0)); | |||
| subgraph_context.SetOutput(*node_item, 0, out_tensor0); | |||
| uint64_t value_1 = 512; | |||
| TensorValue out_tensor1(&value_1, sizeof(value_1)); | |||
| subgraph_context.SetOutput(*node_item, 1, out_tensor1); | |||
| // task | |||
| domi::TaskDef task_def; | |||
| domi::KernelExDef *kernel_ex_def = task_def.mutable_kernel_ex(); | |||
| kernel_ex_def->set_kernel_ext_info_size(12); | |||
| AicpuExtInfo aicpu_ext_info; | |||
| aicpu_ext_info.infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE; | |||
| aicpu_ext_info.infoLen = sizeof(int32_t); | |||
| int32_t type = node_item->shape_inference_type; | |||
| memcpy_s(aicpu_ext_info.infoMsg, sizeof(int32_t), &type, sizeof(int32_t)); | |||
| char *ext_mem = (char*)malloc(sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||
| memcpy_s(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t), &aicpu_ext_info, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||
| std::string ext_info(ext_mem, sizeof(AicpuExtInfo) + sizeof(int32_t)); | |||
| std::string *mutable_ext_info = kernel_ex_def->mutable_kernel_ext_info(); | |||
| (*mutable_ext_info) = ext_info; | |||
| hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def, task_def}); | |||
| AicpuTfNodeTask aicpu_tf_node_task(node_item, task_def); | |||
| ASSERT_EQ(aicpu_tf_node_task.Init(hybrid_model), SUCCESS); | |||
| ASSERT_EQ(aicpu_tf_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
| AicpuTaskStruct args; | |||
| args.head.length = sizeof(args); | |||
| args.head.ioAddrNum = 6; | |||
| domi::TaskDef task_def2; | |||
| task_def2.set_type(RT_MODEL_TASK_ALL_KERNEL); | |||
| task_def2.mutable_kernel()->set_args(reinterpret_cast<const char *>(&args), args.head.length); | |||
| task_def2.mutable_kernel()->set_args_size(args.head.length); | |||
| hybrid_model.task_defs_[node] = std::vector<domi::TaskDef>({task_def2}); | |||
| AicpuNodeTask aicpu_node_task(node_item, task_def); | |||
| ASSERT_EQ(aicpu_node_task.Init(hybrid_model), FAILED); | |||
| ASSERT_EQ(aicpu_node_task.LaunchTask(*node_state->GetTaskContext()), SUCCESS); | |||
| //kernel_ex_def->set_allocated_kernel_ext_info(nullptr); | |||
| free(ext_mem); | |||
| } | |||
| } // namespace ge | |||