You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ge_hybrid_unittest.cc 26 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. /**
  2. * Copyright 2019-2021 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <gtest/gtest.h>
  17. #include <gmock/gmock.h>
  18. #include <vector>
  19. #include "runtime/rt.h"
  20. #include "graph/utils/node_utils.h"
  21. #define protected public
  22. #define private public
  23. #include "hybrid/model/hybrid_model_builder.h"
  24. #include "hybrid/model/hybrid_model.h"
  25. #include "hybrid/node_executor/node_executor.h"
  26. #include "model/ge_model.h"
  27. #include "model/ge_root_model.h"
  28. #include "hybrid/node_executor/aicore/aicore_op_task.h"
  29. #include "framework/common/taskdown_common.h"
  30. #include "framework/common/debug/log.h"
  31. #include "graph/ge_context.h"
  32. #include "hybrid/executor/hybrid_execution_context.h"
  33. #include "hybrid/executor/hybrid_model_executor.h"
  34. #include "hybrid/node_executor/aicore/aicore_task_builder.h"
  35. #include "graph/load/model_manager/tbe_handle_store.h"
  36. #include "graph/manager/graph_mem_allocator.h"
  37. #include "hybrid/common/npu_memory_allocator.h"
  38. #include "graph/types.h"
  39. #include "graph/utils/tensor_utils.h"
  40. #include "graph/testcase/ge_graph/graph_builder_utils.h"
  41. #undef private
  42. #undef protected
  43. using namespace std;
  44. using namespace testing;
  45. using namespace ge;
  46. using namespace hybrid;
  47. class UtestGeHybrid : public testing::Test {
  48. protected:
  49. void SetUp() {}
  50. void TearDown() {
  51. NpuMemoryAllocator::allocators_.clear();
  52. }
  53. };
  54. static ge::OpDescPtr CreateOpDesc(string name = "", string type = "") {
  55. auto op_desc = std::make_shared<ge::OpDesc>(name, type);
  56. op_desc->SetStreamId(0);
  57. op_desc->SetId(0);
  58. op_desc->SetWorkspace({});
  59. ;
  60. op_desc->SetWorkspaceBytes({});
  61. op_desc->SetInputOffset({});
  62. op_desc->SetOutputOffset({});
  63. ge::AttrUtils::SetStr(op_desc, ge::TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF_AIVEC");
  64. bool support_dynamic = true;
  65. ge::AttrUtils::GetBool(op_desc, "support_dynamicshape", support_dynamic);
  66. return op_desc;
  67. }
  68. TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
  69. // build aicore task
  70. auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
  71. domi::TaskDef task_def;
  72. task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
  73. domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
  74. kernel_with_handle->set_original_kernel_key("");
  75. kernel_with_handle->set_node_info("");
  76. kernel_with_handle->set_block_dim(32);
  77. kernel_with_handle->set_args_size(64);
  78. string args(64, '1');
  79. kernel_with_handle->set_args(args.data(), 64);
  80. domi::KernelContext *context = kernel_with_handle->mutable_context();
  81. context->set_op_index(1);
  82. context->set_kernel_type(2); // ccKernelType::TE
  83. uint16_t args_offset[9] = {0};
  84. context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
  85. OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  86. std::vector<char> kernelBin;
  87. TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
  88. op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
  89. std::string kernel_name("kernel/Add");
  90. AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
  91. ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS);
  92. rtStream_t stream = nullptr;
  93. rtStreamCreate(&stream, 0);
  94. ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
  95. char *handle = "";
  96. aicore_task->handle_ = handle;
  97. aicore_task->tiling_key_ = 1;
  98. ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS);
  99. }
  100. TEST_F(UtestGeHybrid, task_update_tiling_info) {
  101. auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
  102. aicore_task->is_single_op_ = true;
  103. auto graph = make_shared<ComputeGraph>("graph");
  104. OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  105. ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key");
  106. ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json");
  107. auto node = graph->AddNode(op_desc);
  108. optiling::OpRunInfo tiling_info;
  109. ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS);
  110. }
  111. TEST_F(UtestGeHybrid, index_taskdefs_failed) {
  112. // build aicore task
  113. domi::ModelTaskDef model_task_def;
  114. std::shared_ptr<domi::ModelTaskDef> model_task_def_ptr = make_shared<domi::ModelTaskDef>(model_task_def);
  115. domi::TaskDef *task_def = model_task_def_ptr->add_task();
  116. GeModelPtr ge_model = make_shared<GeModel>();
  117. ge_model->SetModelTaskDef(model_task_def_ptr);
  118. auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
  119. task_def->set_type(RT_MODEL_TASK_ALL_KERNEL);
  120. domi::KernelDefWithHandle *kernel_with_handle = task_def->mutable_kernel_with_handle();
  121. kernel_with_handle->set_original_kernel_key("");
  122. kernel_with_handle->set_node_info("");
  123. kernel_with_handle->set_block_dim(32);
  124. kernel_with_handle->set_args_size(64);
  125. string args(64, '1');
  126. kernel_with_handle->set_args(args.data(), 64);
  127. domi::KernelContext *context = kernel_with_handle->mutable_context();
  128. context->set_op_index(1);
  129. context->set_kernel_type(2); // ccKernelType::TE
  130. uint16_t args_offset[9] = {0};
  131. context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
  132. OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  133. std::vector<char> kernelBin;
  134. TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
  135. op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
  136. std::string kernel_name("kernel/Add");
  137. AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
  138. ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
  139. GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
  140. HybridModel hybrid_model(ge_root_model);
  141. HybridModelBuilder hybrid_model_builder(hybrid_model);
  142. ASSERT_EQ(hybrid_model_builder.IndexTaskDefs(graph, ge_model), INTERNAL_ERROR);
  143. }
  144. TEST_F(UtestGeHybrid, parse_force_infershape_nodes) {
  145. const char *const kForceInfershape = "_force_infershape_when_running";
  146. auto graph = make_shared<ComputeGraph>("graph");
  147. OpDescPtr op_desc = CreateOpDesc("Conv2D", "Conv2D");
  148. ge::AttrUtils::SetBool(op_desc, kForceInfershape, true);
  149. auto node = graph->AddNode(op_desc);
  150. std::unique_ptr<NodeItem> new_node;
  151. NodeItem::Create(node, new_node);
  152. GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
  153. HybridModel hybrid_model(ge_root_model);
  154. HybridModelBuilder hybrid_model_builder(hybrid_model);
  155. ASSERT_EQ(hybrid_model_builder.ParseForceInfershapeNodes(node, *new_node), SUCCESS);
  156. }
  157. static ComputeGraphPtr BuildDataDirectConnectGraph() {
  158. ge::ut::GraphBuilder builder("subgraph");
  159. auto data = builder.AddNode("Data", "Data", 1, 1);
  160. auto netoutput = builder.AddNode("Netoutput", "Netoutput", 1, 1);
  161. builder.AddDataEdge(data, 0, netoutput, 0);
  162. return builder.GetGraph();
  163. }
  164. TEST_F(UtestGeHybrid, data_direct_connect) {
  165. std::unique_ptr<NodeItem> node_item;
  166. auto root_graph = make_shared<ComputeGraph>("root_graph");
  167. OpDescPtr op_desc = CreateOpDesc("PartitionedCall", "PartitionedCall");
  168. auto node = root_graph->AddNode(op_desc);
  169. auto sub_graph = BuildDataDirectConnectGraph();
  170. sub_graph->SetParentGraph(root_graph);
  171. sub_graph->SetParentNode(node);
  172. node->GetOpDesc()->AddSubgraphName("subgraph");
  173. node->GetOpDesc()->SetSubgraphInstanceName(0, "subgraph");
  174. root_graph->AddSubgraph("subgraph", sub_graph);
  175. std::unique_ptr<NodeItem> new_node;
  176. NodeItem::Create(node, new_node);
  177. GeRootModelPtr ge_root_model = make_shared<GeRootModel>(root_graph);
  178. HybridModel hybrid_model(ge_root_model);
  179. HybridModelBuilder hybrid_model_builder(hybrid_model);
  180. auto ret = hybrid_model_builder.IdentifyVariableOutputs(*new_node.get());
  181. ASSERT_EQ(ret, SUCCESS);
  182. }
  183. TEST_F(UtestGeHybrid, index_taskdefs_success) {
  184. // build aicore task
  185. domi::ModelTaskDef model_task_def;
  186. std::shared_ptr<domi::ModelTaskDef> model_task_def_ptr = make_shared<domi::ModelTaskDef>(model_task_def);
  187. domi::TaskDef *task_def = model_task_def_ptr->add_task();
  188. GeModelPtr ge_model = make_shared<GeModel>();
  189. ge_model->SetModelTaskDef(model_task_def_ptr);
  190. auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
  191. task_def->set_type(RT_MODEL_TASK_ALL_KERNEL);
  192. domi::KernelDefWithHandle *kernel_with_handle = task_def->mutable_kernel_with_handle();
  193. kernel_with_handle->set_original_kernel_key("");
  194. kernel_with_handle->set_node_info("");
  195. kernel_with_handle->set_block_dim(32);
  196. kernel_with_handle->set_args_size(64);
  197. string args(64, '1');
  198. kernel_with_handle->set_args(args.data(), 64);
  199. domi::KernelContext *context = kernel_with_handle->mutable_context();
  200. context->set_op_index(0);
  201. context->set_kernel_type(2); // ccKernelType::TE
  202. uint16_t args_offset[9] = {0};
  203. context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
  204. OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  205. std::vector<char> kernelBin;
  206. TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
  207. op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
  208. std::string kernel_name("kernel/Add");
  209. AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
  210. ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
  211. NodePtr node = graph->AddNode(op_desc);
  212. GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
  213. HybridModel hybrid_model(ge_root_model);
  214. HybridModelBuilder hybrid_model_builder(hybrid_model);
  215. ASSERT_EQ(hybrid_model_builder.IndexTaskDefs(graph, ge_model), SUCCESS);
  216. }
  217. TEST_F(UtestGeHybrid, init_weight_success) {
  218. NpuMemoryAllocator::allocators_.emplace(make_pair(0, nullptr));
  219. // make graph with sub_graph
  220. ComputeGraphPtr graph = std::make_shared<ComputeGraph>("root_graph");
  221. OpDescPtr op_desc = CreateOpDesc("if", IF);
  222. NodePtr node = graph->AddNode(op_desc);
  223. // make sub graph
  224. ComputeGraphPtr sub_graph = std::make_shared<ComputeGraph>("if_sub_graph");
  225. OpDescPtr const_op_desc = CreateOpDesc("const", CONSTANT);
  226. vector<int64_t> dims_vec_0 = {2, 1, 4, 1, 2};
  227. vector<int32_t> data_vec_0 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
  228. GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32);
  229. (void)TensorUtils::SetRealDimCnt(tensor_desc_0, dims_vec_0.size());
  230. ConstGeTensorPtr constTensor_0 =
  231. std::make_shared<GeTensor>(tensor_desc_0, (uint8_t *)&data_vec_0[0], data_vec_0.size() * sizeof(int32_t));
  232. AttrUtils::SetTensor(const_op_desc, ge::ATTR_NAME_WEIGHTS, constTensor_0);
  233. const_op_desc->AddOutputDesc(tensor_desc_0);
  234. NodePtr const_node = sub_graph->AddNode(const_op_desc);
  235. graph->AddSubgraph("sub", sub_graph);
  236. GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
  237. GeModelPtr ge_sub_model = make_shared<GeModel>();
  238. //Buffer weight_buffer = Buffer(128,0);
  239. //ge_sub_model->SetWeight(weight_buffer);
  240. ge_root_model->SetSubgraphInstanceNameToModel("sub",ge_sub_model);
  241. HybridModel hybrid_model(ge_root_model);
  242. HybridModelBuilder hybrid_model_builder(hybrid_model);
  243. auto ret = hybrid_model_builder.InitWeights();
  244. ASSERT_EQ(ret,SUCCESS);
  245. Buffer weight_buffer = Buffer(128,0);
  246. ge_sub_model->SetWeight(weight_buffer);
  247. ret = hybrid_model_builder.InitWeights();
  248. ASSERT_EQ(ret,PARAM_INVALID);
  249. }
  250. TEST_F(UtestGeHybrid, hybrid_model_executor) {
  251. ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("abc");
  252. GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph);
  253. HybridModel model(root_model);
  254. HybridModel *model_ptr = &model;
  255. uint32_t device_id = 0;
  256. rtStream_t stream;
  257. HybridModelExecutor executor(model_ptr, device_id, stream);
  258. executor.Init();
  259. }
  260. TEST_F(UtestGeHybrid, test_parse_parallel_group) {
  261. NodeExecutorManager::GetInstance().engine_mapping_.emplace("ops_kernel_info_hccl",
  262. NodeExecutorManager::ExecutorType::HCCL);
  263. ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test");
  264. OpDescPtr op_desc = CreateOpDesc("AllReduce", "AllReduce");
  265. op_desc->SetId(0);
  266. ge::AttrUtils::SetStr(op_desc, ATTR_NAME_PARALLEL_GROUP, "group_1");
  267. auto node = compute_graph->AddNode(op_desc);
  268. std::unique_ptr<NodeItem> node_item;
  269. NodeItem::Create(node, node_item);
  270. node_item->node_id = 0;
  271. op_desc->SetOpKernelLibName("ops_kernel_info_hccl");
  272. GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph);
  273. HybridModel model(root_model);
  274. model.root_graph_ = compute_graph;
  275. HybridModelBuilder builder(model);
  276. ASSERT_EQ(builder.CollectParallelGroups(node_item.get()), SUCCESS);
  277. ASSERT_EQ(builder.node_to_parallel_groups_.size(), 1);
  278. ASSERT_EQ(builder.parallel_group_to_nodes_.size(), 1);
  279. OpDescPtr op_desc_1 = CreateOpDesc("subgraph", "PartitionedCall");
  280. op_desc_1->AddSubgraphName("subgraph");
  281. auto node_1 = compute_graph->AddNode(op_desc_1);
  282. ComputeGraphPtr subgraph = MakeShared<ComputeGraph>("subgraph");
  283. ASSERT_EQ(NodeUtils::SetSubgraph(*node_1, 0, subgraph), GRAPH_SUCCESS);
  284. std::unique_ptr<NodeItem> node_item_1;
  285. NodeItem::Create(node_1, node_item_1);
  286. node_item_1->node_id = 1;
  287. ASSERT_EQ(builder.CollectParallelGroups(node_item_1.get()), SUCCESS);
  288. ASSERT_EQ(builder.node_to_parallel_groups_.size(), 1);
  289. ASSERT_EQ(builder.parallel_group_to_nodes_.size(), 1);
  290. OpDescPtr op_desc_2 = CreateOpDesc("sub_node_1", "AllReduce");
  291. ge::AttrUtils::SetStr(op_desc_2, ATTR_NAME_PARALLEL_GROUP, "group_1");
  292. auto node_2 = subgraph->AddNode(op_desc_2);
  293. ASSERT_TRUE(node_2 != nullptr);
  294. OpDescPtr op_desc_3 = CreateOpDesc("sub_node_2", "AllReduce2");
  295. ge::AttrUtils::SetStr(op_desc_3, ATTR_NAME_PARALLEL_GROUP, "group_2");
  296. auto node_3 = subgraph->AddNode(op_desc_3);
  297. ASSERT_TRUE(node_3 != nullptr);
  298. ASSERT_EQ(builder.CollectParallelGroups(node_item_1.get()), SUCCESS);
  299. ASSERT_EQ(builder.node_to_parallel_groups_.size(), 2);
  300. ASSERT_EQ(builder.parallel_group_to_nodes_.size(), 2);
  301. ASSERT_EQ(builder.parallel_group_to_nodes_["group_1"].size(), 2);
  302. ASSERT_EQ(builder.parallel_group_to_nodes_["group_2"].size(), 1);
  303. builder.parallel_group_to_nodes_.clear();
  304. builder.node_ref_inputs_.clear();
  305. model.node_items_[node] = std::move(node_item);
  306. model.node_items_[node_1] = std::move(node_item_1);
  307. ASSERT_FALSE(model.node_items_[node]->has_observer);
  308. ASSERT_TRUE(model.node_items_[node_1]->dependents_for_execution.empty());
  309. ASSERT_EQ(builder.ParseDependentByParallelGroup(), SUCCESS);
  310. ASSERT_TRUE(model.node_items_[node]->has_observer);
  311. ASSERT_EQ(model.node_items_[node_1]->dependents_for_execution.size(), 1);
  312. ASSERT_EQ(model.node_items_[node_1]->dependents_for_execution[0], node);
  313. // repeat parse
  314. ASSERT_EQ(builder.ParseDependentByParallelGroup(), SUCCESS);
  315. ASSERT_TRUE(model.node_items_[node]->has_observer);
  316. ASSERT_EQ(model.node_items_[node_1]->dependents_for_execution.size(), 1);
  317. ASSERT_EQ(model.node_items_[node_1]->dependents_for_execution[0], node);
  318. }
  319. TEST_F(UtestGeHybrid, unfold_subgraphs_success) {
  320. ComputeGraphPtr merged_graph = nullptr;
  321. ComputeGraphPtr sub_sub_graph1 = std::make_shared<ComputeGraph>("while_cond");
  322. OpDescPtr sub_sub_graph_while_cond_data_op_desc = CreateOpDesc("cond_data", DATA);
  323. NodePtr sub_sub_graph_while_cond_data_node = sub_sub_graph1->AddNode(sub_sub_graph_while_cond_data_op_desc);
  324. ComputeGraphPtr sub_sub_graph2 = std::make_shared<ComputeGraph>("while_body");
  325. /*OpDescPtr sub_sub_graph_while_body_const_op_desc = CreateOpDesc("body_const", CONSTANT);
  326. NodePtr sub_sub_graph_while_body_const_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_const_op_desc);*/
  327. OpDescPtr sub_sub_graph_while_body_data_op_desc = CreateOpDesc("body_data", DATA);
  328. NodePtr sub_sub_graph_while_body_data_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_data_op_desc);
  329. sub_sub_graph2->SetGraphUnknownFlag(true);
  330. /*OpDescPtr sub_sub_graph_while_body_add_op_desc = CreateOpDesc("body_add", ADD);
  331. NodePtr sub_sub_graph_while_body_add_node = sub_sub_graph2->AddNode(sub_sub_graph_while_body_add_node);
  332. sub_sub_graph_while_body_add_node->AddLinkFrom(sub_sub_graph_while_body_data_node);
  333. sub_sub_graph_while_body_add_node->AddLinkFrom(sub_sub_graph_while_body_const_node);*/
  334. ComputeGraphPtr sub_graph = std::make_shared<ComputeGraph>("sub_graph");
  335. OpDescPtr sub_graph_while_op_desc = CreateOpDesc("while", WHILE);
  336. NodePtr sub_graph_while_node = sub_graph->AddNode(sub_graph_while_op_desc);
  337. sub_graph->SetGraphUnknownFlag(true);
  338. sub_graph_while_node->GetOpDesc()->AddSubgraphName("while_cond");
  339. sub_graph_while_node->GetOpDesc()->AddSubgraphName("while_body");
  340. sub_graph_while_node->GetOpDesc()->SetSubgraphInstanceName(0, "while_cond");
  341. sub_graph_while_node->GetOpDesc()->SetSubgraphInstanceName(1, "while_body");
  342. ComputeGraphPtr root_graph = std::make_shared<ComputeGraph>("root_graph");
  343. auto partitioned_call_op_desc = MakeShared<OpDesc>("partitioned_call", PARTITIONEDCALL);
  344. auto partitioned_call_node = root_graph->AddNode(partitioned_call_op_desc);
  345. partitioned_call_node->GetOpDesc()->AddSubgraphName("sub_graph");
  346. partitioned_call_node->GetOpDesc()->SetSubgraphInstanceName(0, "sub_graph");
  347. root_graph->AddSubGraph(sub_sub_graph1);
  348. root_graph->AddSubGraph(sub_sub_graph2);
  349. sub_sub_graph1->SetParentGraph(root_graph);
  350. sub_sub_graph2->SetParentGraph(root_graph);
  351. sub_sub_graph1->SetParentNode(sub_graph_while_node);
  352. sub_sub_graph2->SetParentNode(sub_graph_while_node);
  353. root_graph->AddSubGraph(sub_graph);
  354. sub_graph->SetParentNode(partitioned_call_node);
  355. sub_graph->SetParentGraph(root_graph);
  356. GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(root_graph);
  357. HybridModel hybrid_model(root_model);
  358. HybridModelBuilder hybrid_model_builder(hybrid_model);
  359. EXPECT_EQ(hybrid_model_builder.UnfoldSubgraphs(root_graph, merged_graph), SUCCESS);
  360. }
  361. TEST_F(UtestGeHybrid, TestTaskContext) {
  362. auto graph = make_shared<ComputeGraph>("graph");
  363. OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  364. GeShape shape({2, 16});
  365. GeTensorDesc tensor_desc(shape);
  366. op_desc->AddInputDesc(tensor_desc);
  367. op_desc->AddInputDesc(tensor_desc);
  368. op_desc->AddOutputDesc(tensor_desc);
  369. auto node = graph->AddNode(op_desc);
  370. std::unique_ptr<NodeItem> node_item;
  371. NodeItem::Create(node, node_item);
  372. node_item->input_start = 0;
  373. node_item->output_start = 0;
  374. GraphExecutionContext execution_context;
  375. SubgraphContext subgraph_context(nullptr, &execution_context);
  376. subgraph_context.all_inputs_.resize(2);
  377. subgraph_context.all_outputs_.resize(1);
  378. NodeState node_state(*node_item, &subgraph_context);
  379. auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);
  380. ASSERT_TRUE(task_context != nullptr);
  381. auto desc = task_context->MutableInputDesc(2);
  382. ASSERT_TRUE(desc == nullptr);
  383. desc = task_context->MutableOutputDesc(0);
  384. ASSERT_TRUE(desc != nullptr);
  385. ASSERT_EQ(desc->GetShape().GetDims(), shape.GetDims());
  386. GeTensorDesc output_desc;
  387. ASSERT_EQ(task_context->GetOutputDesc(0, output_desc), SUCCESS);
  388. ASSERT_EQ(output_desc.GetShape().GetDims(), shape.GetDims());
  389. desc = task_context->MutableInputDesc(0);
  390. ASSERT_TRUE(desc != nullptr);
  391. ASSERT_EQ(desc->GetShape().GetDims(), shape.GetDims());
  392. GeShape new_shape({8, 2});
  393. tensor_desc.SetShape(new_shape);
  394. task_context->UpdateInputDesc(1, tensor_desc);
  395. GeTensorDesc new_desc;
  396. ASSERT_EQ(task_context->GetInputDesc(1, new_desc), SUCCESS);
  397. ASSERT_EQ(new_desc.GetShape().GetDims(), new_shape.GetDims());
  398. }
  399. TEST_F(UtestGeHybrid, hybrid_model_executor_check_shape) {
  400. HybridModelExecutor::ExecuteArgs args;
  401. GeTensorDescPtr ge_tensor = make_shared<GeTensorDesc>(GeTensorDesc());
  402. vector<int64_t> dim = {2 , 3};
  403. ge_tensor->SetShape(GeShape(dim));
  404. args.input_desc.push_back(ge_tensor);
  405. // create node
  406. ge::ComputeGraphPtr graph = std::make_shared<ComputeGraph>("God");
  407. OpDescPtr op_desc = std::make_shared<OpDesc>("data", DATA);
  408. GeTensorDesc tensor_desc(GeShape({2, 3}));
  409. std::vector<std::pair<int64_t, int64_t>> shape_range({std::pair<int64_t, int64_t>(1, 3),
  410. std::pair<int64_t, int64_t>(2, 4)});
  411. tensor_desc.SetShapeRange(shape_range);
  412. op_desc->AddInputDesc(tensor_desc);
  413. op_desc->AddOutputDesc(tensor_desc);
  414. NodePtr node = graph->AddNode(op_desc);
  415. std::unique_ptr<NodeItem> new_node;
  416. NodeItem::Create(node, new_node);
  417. GraphItem graph_item;
  418. graph_item.input_nodes_.emplace_back(new_node.get());
  419. Status ret = HybridModelExecutor::CheckInputShapeByShapeRange(&graph_item, args);
  420. ASSERT_EQ(ret, ge::SUCCESS);
  421. HybridModelExecutor::ExecuteArgs args1;
  422. ret = HybridModelExecutor::CheckInputShapeByShapeRange(&graph_item, args1);
  423. ASSERT_EQ(ret, ge::INTERNAL_ERROR);
  424. HybridModelExecutor::ExecuteArgs args2;
  425. GeTensorDescPtr ge_tensor2 = make_shared<GeTensorDesc>(GeTensorDesc());
  426. vector<int64_t> dim2 = {-1 , 3};
  427. ge_tensor2->SetShape(GeShape(dim2));
  428. args2.input_desc.push_back(ge_tensor2);
  429. ret = HybridModelExecutor::CheckInputShapeByShapeRange(&graph_item, args1);
  430. ASSERT_EQ(ret, ge::INTERNAL_ERROR);
  431. }
  432. TEST_F(UtestGeHybrid, TestOptimizeDependenciesForConstInputs) {
  433. ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>("test");
  434. GeRootModelPtr root_model = MakeShared<ge::GeRootModel>(compute_graph);
  435. HybridModel model(root_model);
  436. model.root_graph_ = compute_graph;
  437. HybridModelBuilder builder(model);
  438. GeShape shape({2, 16});
  439. GeTensorDesc tensor_desc(shape);
  440. std::unique_ptr<NodeItem> const_node_item;
  441. {
  442. OpDescPtr const_op_desc = CreateOpDesc("Constant", "Const");
  443. const_op_desc->AddOutputDesc(tensor_desc);
  444. auto const_node = compute_graph->AddNode(const_op_desc);
  445. NodeItem::Create(const_node, const_node_item);
  446. }
  447. std::unique_ptr<NodeItem> non_const_node_item;
  448. {
  449. OpDescPtr op_desc = CreateOpDesc("Add", "Add");
  450. op_desc->AddOutputDesc(tensor_desc);
  451. auto const_node = compute_graph->AddNode(op_desc);
  452. NodeItem::Create(const_node, non_const_node_item);
  453. }
  454. std::unique_ptr<NodeItem> known_node_item;
  455. {
  456. OpDescPtr known_op_desc = CreateOpDesc("known", "PartitionedCall");
  457. known_op_desc->AddOutputDesc(tensor_desc);
  458. known_op_desc->AddOutputDesc(tensor_desc);
  459. auto known_node = compute_graph->AddNode(known_op_desc);
  460. NodeItem::Create(known_node, known_node_item);
  461. }
  462. std::unique_ptr<NodeItem> dst_node_item;
  463. {
  464. OpDescPtr known_op_desc = CreateOpDesc("SomeOp", "SomeOpType ");
  465. known_op_desc->AddOutputDesc(tensor_desc);
  466. known_op_desc->AddOutputDesc(tensor_desc);
  467. auto known_node = compute_graph->AddNode(known_op_desc);
  468. NodeItem::Create(known_node, dst_node_item);
  469. }
  470. float buffer[2 * 16];
  471. unique_ptr<TensorValue> tensor_value(new TensorValue(buffer, sizeof(buffer)));
  472. model.constant_tensors_[const_node_item->node] = std::move(tensor_value);
  473. // Case 1. connect to Const
  474. auto output_id = 1;
  475. builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(output_id, const_node_item.get());
  476. builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(0, non_const_node_item.get());
  477. dst_node_item->dependents_for_shape_inference.emplace_back(const_node_item->node);
  478. dst_node_item->dependents_for_shape_inference.emplace_back(non_const_node_item->node);
  479. ASSERT_EQ(builder.OptimizeDependenciesForConstantInputs(), SUCCESS);
  480. ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1);
  481. ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node);
  482. // Case 2. connect to known-subgraph, netoutput connect to Const
  483. builder.host_input_value_dependencies_.clear();
  484. dst_node_item->dependents_for_shape_inference.clear();
  485. builder.known_subgraph_constant_output_refs_[known_node_item.get()].emplace(output_id, const_node_item->node);
  486. builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(output_id, known_node_item.get());
  487. builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(0, non_const_node_item.get());
  488. dst_node_item->dependents_for_shape_inference.emplace_back(known_node_item->node);
  489. dst_node_item->dependents_for_shape_inference.emplace_back(non_const_node_item->node);
  490. ASSERT_EQ(builder.OptimizeDependenciesForConstantInputs(), SUCCESS);
  491. ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1);
  492. ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node);
  493. }
  494. TEST_F(UtestGeHybrid, test_key_for_kernel_bin) {
  495. auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
  496. OpDesc op_desc("Sum", "Sum");
  497. EXPECT_EQ(aicore_task->GetKeyForTbeKernel(), OP_EXTATTR_NAME_TBE_KERNEL);
  498. EXPECT_EQ(aicore_task->GetKeyForTvmMagic(), TVM_ATTR_NAME_MAGIC);
  499. EXPECT_EQ(aicore_task->GetKeyForTvmMetaData(), TVM_ATTR_NAME_METADATA);
  500. EXPECT_EQ(aicore_task->GetKeyForKernelName(op_desc), "Sum_kernelname");
  501. auto atomic_task = std::unique_ptr<hybrid::AtomicAddrCleanOpTask>(new(std::nothrow)hybrid::AtomicAddrCleanOpTask());
  502. EXPECT_EQ(atomic_task->GetKeyForTbeKernel(), EXT_ATTR_ATOMIC_TBE_KERNEL);
  503. EXPECT_EQ(atomic_task->GetKeyForTvmMagic(), ATOMIC_ATTR_TVM_MAGIC);
  504. EXPECT_EQ(atomic_task->GetKeyForTvmMetaData(), ATOMIC_ATTR_TVM_METADATA);
  505. EXPECT_EQ(atomic_task->GetKeyForKernelName(op_desc), "Sum_atomic_kernelname");
  506. }

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知.