You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

davinci_model_unittest.cc 12 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <gtest/gtest.h>
  17. #define private public
  18. #define protected public
  19. #include "graph/utils/graph_utils.h"
  20. #include "common/profiling/profiling_manager.h"
  21. #include "graph/load/new_model_manager/davinci_model.h"
  22. using namespace std;
  23. namespace ge {
  24. extern OpDescPtr CreateOpDesc(string name, string type);
  25. class UtestDavinciModel : public testing::Test {
  26. protected:
  27. void SetUp() {}
  28. void TearDown() {}
  29. };
  30. TEST_F(UtestDavinciModel, init_success) {
  31. DavinciModel model(0, nullptr);
  32. ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
  33. ProfilingManager::Instance().is_load_profiling_ = true;
  34. GeModelPtr ge_model = make_shared<GeModel>();
  35. ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
  36. AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000);
  37. AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);
  38. shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
  39. ge_model->SetModelTaskDef(model_task_def);
  40. GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  41. TensorUtils::SetSize(tensor, 512);
  42. OpDescPtr op_input = CreateOpDesc("data", DATA);
  43. op_input->AddInputDesc(tensor);
  44. op_input->AddOutputDesc(tensor);
  45. op_input->SetInputOffset({1024});
  46. op_input->SetOutputOffset({1024});
  47. NodePtr node_input = graph->AddNode(op_input); // op_index = 0
  48. OpDescPtr op_kernel = CreateOpDesc("square", "Square");
  49. op_kernel->AddInputDesc(tensor);
  50. op_kernel->AddOutputDesc(tensor);
  51. op_kernel->SetInputOffset({1024});
  52. op_kernel->SetOutputOffset({1024});
  53. NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1
  54. OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC);
  55. op_memcpy->AddInputDesc(tensor);
  56. op_memcpy->AddOutputDesc(tensor);
  57. op_memcpy->SetInputOffset({1024});
  58. op_memcpy->SetOutputOffset({5120});
  59. NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2
  60. OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
  61. op_output->AddInputDesc(tensor);
  62. op_output->SetInputOffset({5120});
  63. op_output->SetSrcName( { "memcpy" } );
  64. op_output->SetSrcIndex( { 0 } );
  65. NodePtr node_output = graph->AddNode(op_output); // op_index = 3
  66. domi::TaskDef *task_def1 = model_task_def->add_task();
  67. task_def1->set_stream_id(0);
  68. task_def1->set_type(RT_MODEL_TASK_KERNEL);
  69. domi::KernelDef *kernel_def = task_def1->mutable_kernel();
  70. kernel_def->set_stub_func("stub_func");
  71. kernel_def->set_args_size(64);
  72. string args(64, '1');
  73. kernel_def->set_args(args.data(), 64);
  74. domi::KernelContext *context = kernel_def->mutable_context();
  75. context->set_op_index(1);
  76. context->set_kernel_type(2); // ccKernelType::TE
  77. uint16_t args_offset[9] = {0};
  78. context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
  79. domi::TaskDef *task_def2 = model_task_def->add_task();
  80. task_def2->set_stream_id(0);
  81. task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC);
  82. domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async();
  83. memcpy_async->set_src(1024);
  84. memcpy_async->set_dst(5120);
  85. memcpy_async->set_dst_max(512);
  86. memcpy_async->set_count(1);
  87. memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE);
  88. memcpy_async->set_op_index(2);
  89. EXPECT_EQ(model.Assign(ge_model), SUCCESS);
  90. EXPECT_EQ(model.Init(), SUCCESS);
  91. EXPECT_EQ(model.input_addrs_list_.size(), 1);
  92. EXPECT_EQ(model.output_addrs_list_.size(), 1);
  93. EXPECT_EQ(model.task_list_.size(), 2);
  94. OutputData output_data;
  95. vector<OutputTensorInfo> outputs;
  96. EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
  97. EXPECT_EQ(output_data.blobs.size(), 1);
  98. EXPECT_EQ(outputs.size(), 1);
  99. ProfilingManager::Instance().is_load_profiling_ = false;
  100. }
  101. TEST_F(UtestDavinciModel, init_data_op) {
  102. DavinciModel model(0, nullptr);
  103. model.ge_model_ = make_shared<GeModel>();
  104. model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  105. model.runtime_param_.mem_size = 5120000;
  106. ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
  107. OpDescPtr op_input = CreateOpDesc("data", DATA);
  108. GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  109. TensorUtils::SetSize(tensor, 512);
  110. op_input->AddInputDesc(tensor);
  111. op_input->AddOutputDesc(tensor);
  112. op_input->SetInputOffset({1024});
  113. op_input->SetOutputOffset({5120});
  114. NodePtr node_input = graph->AddNode(op_input);
  115. OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
  116. op_output->AddInputDesc(tensor);
  117. op_output->SetInputOffset({1024});
  118. op_output->SetSrcName( { "data" } );
  119. op_output->SetSrcIndex( { 0 } );
  120. NodePtr node_output = graph->AddNode(op_output);
  121. EXPECT_EQ(model.InitNodes(graph), SUCCESS);
  122. EXPECT_EQ(model.input_addrs_list_.size(), 1);
  123. EXPECT_EQ(model.output_addrs_list_.size(), 1);
  124. EXPECT_EQ(model.op_list_.size(), 2);
  125. }
  126. TEST_F(UtestDavinciModel, init_data_op_subgraph) {
  127. DavinciModel model(0, nullptr);
  128. model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  129. model.runtime_param_.mem_size = 5120000;
  130. ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
  131. OpDescPtr op_input = CreateOpDesc("data", DATA);
  132. GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  133. op_input->AddInputDesc(tensor);
  134. op_input->AddOutputDesc(tensor);
  135. op_input->SetInputOffset({1024});
  136. op_input->SetOutputOffset({5120});
  137. NodePtr node = graph->AddNode(op_input);
  138. uint32_t data_op_index = 0;
  139. map<uint32_t, OpDescPtr> data_by_index;
  140. EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS);
  141. EXPECT_EQ(model.input_addrs_list_.size(), 0);
  142. EXPECT_EQ(model.output_addrs_list_.size(), 0);
  143. EXPECT_EQ(data_op_index, 0);
  144. EXPECT_TRUE(data_by_index.empty());
  145. }
  146. TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) {
  147. DavinciModel model(0, nullptr);
  148. model.runtime_param_.mem_base = (uint8_t *)0x08000000;
  149. model.runtime_param_.mem_size = 5120000;
  150. ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
  151. OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
  152. GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  153. op_output->AddInputDesc(tensor);
  154. op_output->SetInputOffset({1024});
  155. op_output->SetSrcName( { "data" } );
  156. op_output->SetSrcIndex( { 0 } );
  157. NodePtr node = graph->AddNode(op_output);
  158. std::vector<OpDescPtr> output_op_list;
  159. EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS);
  160. EXPECT_EQ(model.input_addrs_list_.size(), 0);
  161. EXPECT_EQ(model.output_addrs_list_.size(), 0);
  162. EXPECT_TRUE(output_op_list.empty());
  163. }
  164. TEST_F(UtestDavinciModel, init_unknown) {
  165. DavinciModel model(0, nullptr);
  166. model.SetKnownNode(true);
  167. ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
  168. GeModelPtr ge_model = make_shared<GeModel>();
  169. ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
  170. AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000);
  171. AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);
  172. shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
  173. ge_model->SetModelTaskDef(model_task_def);
  174. GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  175. TensorUtils::SetSize(tensor, 512);
  176. OpDescPtr op_input = CreateOpDesc("data", DATA);
  177. op_input->AddInputDesc(tensor);
  178. op_input->AddOutputDesc(tensor);
  179. op_input->SetInputOffset({1024});
  180. op_input->SetOutputOffset({1024});
  181. NodePtr node_input = graph->AddNode(op_input); // op_index = 0
  182. OpDescPtr op_kernel = CreateOpDesc("square", "Square");
  183. op_kernel->AddInputDesc(tensor);
  184. op_kernel->AddOutputDesc(tensor);
  185. op_kernel->SetInputOffset({1024});
  186. op_kernel->SetOutputOffset({1024});
  187. NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1
  188. OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC);
  189. op_memcpy->AddInputDesc(tensor);
  190. op_memcpy->AddOutputDesc(tensor);
  191. op_memcpy->SetInputOffset({1024});
  192. op_memcpy->SetOutputOffset({5120});
  193. NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2
  194. OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT);
  195. op_output->AddInputDesc(tensor);
  196. op_output->SetInputOffset({5120});
  197. op_output->SetSrcName( { "memcpy" } );
  198. op_output->SetSrcIndex( { 0 } );
  199. NodePtr node_output = graph->AddNode(op_output); // op_index = 3
  200. domi::TaskDef *task_def1 = model_task_def->add_task();
  201. task_def1->set_stream_id(0);
  202. task_def1->set_type(RT_MODEL_TASK_KERNEL);
  203. domi::KernelDef *kernel_def = task_def1->mutable_kernel();
  204. kernel_def->set_stub_func("stub_func");
  205. kernel_def->set_args_size(64);
  206. string args(64, '1');
  207. kernel_def->set_args(args.data(), 64);
  208. domi::KernelContext *context = kernel_def->mutable_context();
  209. context->set_op_index(1);
  210. context->set_kernel_type(2); // ccKernelType::TE
  211. uint16_t args_offset[9] = {0};
  212. context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
  213. domi::TaskDef *task_def2 = model_task_def->add_task();
  214. task_def2->set_stream_id(0);
  215. task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC);
  216. domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async();
  217. memcpy_async->set_src(1024);
  218. memcpy_async->set_dst(5120);
  219. memcpy_async->set_dst_max(512);
  220. memcpy_async->set_count(1);
  221. memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE);
  222. memcpy_async->set_op_index(2);
  223. EXPECT_EQ(model.Assign(ge_model), SUCCESS);
  224. EXPECT_EQ(model.Init(), SUCCESS);
  225. EXPECT_EQ(model.input_addrs_list_.size(), 1);
  226. EXPECT_EQ(model.output_addrs_list_.size(), 1);
  227. EXPECT_EQ(model.task_list_.size(), 2);
  228. EXPECT_EQ(model.task_list_[0]->UpdateArgs(), SUCCESS);
  229. EXPECT_EQ(model.task_list_[1]->UpdateArgs(), SUCCESS);
  230. vector<string> out_shape_info;
  231. model.GetModelAttr(out_shape_info);
  232. vector<InputOutputDescInfo> input_descs;
  233. vector<InputOutputDescInfo> output_descs;
  234. EXPECT_EQ(model.GetInputOutputDescInfo(input_descs, output_descs), SUCCESS);
  235. int32_t virtual_addr = 0;
  236. const vector<void *> inputs = { &virtual_addr };
  237. const vector<void *> outputs = { &virtual_addr };
  238. EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS);
  239. }
  240. TEST_F(UtestDavinciModel, ReturnNoOutput_test) {
  241. DavinciModel model(0, nullptr);
  242. GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  243. TensorUtils::SetSize(tensor, 512);
  244. OpDescPtr var1 = CreateOpDesc("var1", VARIABLE);
  245. var1->AddInputDesc(tensor);
  246. var1->AddOutputDesc(tensor);
  247. var1->SetInputOffset({1024});
  248. var1->SetOutputOffset({1024});
  249. model.variable_op_list_.push_back(var1);
  250. EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID);
  251. }
  252. TEST_F(UtestDavinciModel, SyncVarData_test) {
  253. DavinciModel model(0, nullptr);
  254. GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT);
  255. TensorUtils::SetSize(tensor, 512);
  256. OpDescPtr var1 = CreateOpDesc("var1", VARIABLE);
  257. var1->AddInputDesc(tensor);
  258. var1->AddOutputDesc(tensor);
  259. var1->SetInputOffset({1024});
  260. var1->SetOutputOffset({1024});
  261. model.variable_op_list_.push_back(var1);
  262. EXPECT_NE(model.SyncVarData(), SUCCESS);
  263. }
  264. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示