You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

single_op_task_unittest.cc 16 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <gtest/gtest.h>
  17. #include <vector>
  18. #include <iostream>
  19. #include "graph/load/model_manager/model_utils.h"
  20. #include "graph/utils/graph_utils.h"
  21. #include "hybrid/node_executor/aicpu/aicpu_ext_info.h"
  22. #include "runtime/rt.h"
  23. #define protected public
  24. #define private public
  25. #include "single_op/single_op_model.h"
  26. #include "aicpu/common/aicpu_task_struct.h"
  27. #include "single_op/task/tbe_task_builder.h"
  28. #include "single_op/task/op_task.h"
  29. #include "single_op/task/tbe_task_builder.h"
  30. #include "external/register/op_tiling_registry.h"
  31. #undef private
  32. #undef protected
  33. #include "tests/depends/runtime/src/runtime_stub.h"
  34. using namespace std;
  35. using namespace testing;
  36. using namespace ge;
  37. using namespace optiling;
  38. namespace {
  39. struct AicpuTaskStruct {
  40. aicpu::AicpuParamHead head;
  41. uint64_t io_addrp[3];
  42. }__attribute__((packed));
  43. } // namespace
  44. class UtestSingleOpTask : public testing::Test {
  45. protected:
  46. void SetUp() {
  47. RTS_STUB_SETUP();
  48. }
  49. void TearDown() {
  50. RTS_STUB_TEARDOWN();
  51. }
  52. };
  53. TEST_F(UtestSingleOpTask, test_build_kernel_task) {
  54. string model_data_str = "123456789";
  55. SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
  56. model.input_offset_list_.push_back(0);
  57. model.input_sizes_.push_back(16);
  58. model.output_offset_list_.push_back(0);
  59. model.output_sizes_.push_back(16);
  60. auto graph = make_shared<ComputeGraph>("graph");
  61. auto op_desc = make_shared<OpDesc>("Add", "Add");
  62. AttrUtils::SetStr(op_desc, TVM_ATTR_NAME_MAGIC, "RT_DEV_BINARY_MAGIC_ELF");
  63. std::vector<char> kernelBin;
  64. TBEKernelPtr tbe_kernel = std::make_shared<ge::OpKernelBin>("name/Add", std::move(kernelBin));
  65. op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel);
  66. std::string kernel_name("kernel/Add");
  67. AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
  68. vector<int64_t> shape{16, 16};
  69. GeShape ge_shape(shape);
  70. GeTensorDesc desc(ge_shape);
  71. op_desc->AddInputDesc(desc);
  72. op_desc->AddOutputDesc(desc);
  73. auto node = graph->AddNode(op_desc);
  74. std::mutex stream_mu_;
  75. rtStream_t stream_ = nullptr;
  76. StreamResource stream_resource(0);
  77. SingleOp single_op(&stream_resource, &stream_mu_, stream_);
  78. domi::TaskDef task_def;
  79. task_def.set_type(RT_MODEL_TASK_ALL_KERNEL);
  80. domi::KernelDefWithHandle *kernel_with_handle = task_def.mutable_kernel_with_handle();
  81. kernel_with_handle->set_original_kernel_key("");
  82. kernel_with_handle->set_node_info("");
  83. kernel_with_handle->set_block_dim(32);
  84. kernel_with_handle->set_args_size(64);
  85. string args(64, '1');
  86. kernel_with_handle->set_args(args.data(), 64);
  87. domi::KernelContext *context = kernel_with_handle->mutable_context();
  88. context->set_op_index(1);
  89. context->set_kernel_type(2); // ccKernelType::TE
  90. uint16_t args_offset[9] = {0};
  91. context->set_args_offset(args_offset, 9 * sizeof(uint16_t));
  92. model.op_list_[1] = node;
  93. TbeOpTask task_tmp;
  94. TbeOpTask *task = &task_tmp;
  95. ASSERT_EQ(model.BuildKernelTask(task_def, &task), SUCCESS);
  96. ge::DataBuffer data_buffer;
  97. vector<GeTensorDesc> input_desc;
  98. vector<DataBuffer> input_buffers = { data_buffer };
  99. vector<GeTensorDesc> output_desc;
  100. vector<DataBuffer> output_buffers = { data_buffer };
  101. task->node_ = node;
  102. OpTilingFunc op_tiling_func = [](const TeOpParas &, const OpCompileInfo &, OpRunInfo &) -> bool {return true;};
  103. OpTilingRegistryInterf("Add", op_tiling_func);
  104. ge::AttrUtils::SetStr(op_desc, "compile_info_key", "op_compile_info_key");
  105. ge::AttrUtils::SetStr(op_desc, "compile_info_json", "op_compile_info_json");
  106. char c = '0';
  107. char* buffer = &c;
  108. task->tiling_buffer_ = buffer;
  109. task->max_tiling_size_ = 64;
  110. task->tiling_data_ = "tiling_data";
  111. task->arg_size_ = 64;
  112. task->args_.reset(new (std::nothrow) uint8_t[sizeof(void *) * 3]);
  113. ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
  114. char *handle = "00";
  115. task->SetHandle(handle);
  116. ASSERT_EQ(task->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_), SUCCESS);
  117. }
  118. TEST_F(UtestSingleOpTask, test_update_ioaddr) {
  119. auto graph = make_shared<ComputeGraph>("graph");
  120. auto op_desc = make_shared<OpDesc>("Add", "Add");
  121. GeTensorDesc desc;
  122. op_desc->AddInputDesc(desc);
  123. op_desc->AddInputDesc(desc);
  124. op_desc->AddOutputDesc(desc);
  125. vector<bool> is_input_const = { true, false };
  126. op_desc->SetIsInputConst(is_input_const);
  127. auto node = graph->AddNode(op_desc);
  128. TbeOpTask task;
  129. task.op_desc_ = op_desc;
  130. task.node_ = node;
  131. ASSERT_EQ(task.SetArgIndex(), SUCCESS);
  132. task.arg_size_ = sizeof(void *) * 4;
  133. task.args_.reset(new (std::nothrow) uint8_t[task.arg_size_]);
  134. task.arg_index_ = {0};
  135. task.input_num_ = 2;
  136. task.output_num_ = 1;
  137. vector<void *> args;
  138. vector<DataBuffer> inputs;
  139. vector<DataBuffer> outputs;
  140. ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), ACL_ERROR_GE_PARAM_INVALID);
  141. ge::DataBuffer data_buffer;
  142. inputs = { data_buffer };
  143. outputs = { data_buffer };
  144. ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), SUCCESS);
  145. task.tiling_buffer_ = (void *)0x0001;
  146. task.workspaces_ = { (void *)0x0002 };
  147. ASSERT_EQ(task.UpdateTilingArgs(nullptr), SUCCESS);
  148. task.tiling_buffer_ = nullptr;
  149. }
  150. TEST_F(UtestSingleOpTask, test_atomic_exec) {
  151. auto graph = make_shared<ComputeGraph>("graph");
  152. auto op_desc = make_shared<OpDesc>("Add", "Add");
  153. GeTensorDesc desc;
  154. op_desc->AddInputDesc(desc);
  155. op_desc->AddOutputDesc(desc);
  156. auto node = graph->AddNode(op_desc);
  157. AtomicAddrCleanOpTask task;
  158. task.op_desc_ = op_desc;
  159. task.node_ = node;
  160. vector<DataBuffer> inputs;
  161. vector<DataBuffer> outputs;
  162. std::vector<int64_t> atomic_output_indices;
  163. ge::AttrUtils::SetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices);
  164. ASSERT_EQ(task.InitAtomicAddrCleanIndices(), INTERNAL_ERROR);
  165. atomic_output_indices = { 0 };
  166. ge::AttrUtils::SetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_indices);
  167. ASSERT_EQ(task.InitAtomicAddrCleanIndices(), INTERNAL_ERROR);
  168. task.arg_size_ = sizeof(void *) * 2;
  169. task.args_.reset(new (std::nothrow) uint8_t[task.arg_size_]);
  170. ASSERT_EQ(task.InitAtomicAddrCleanIndices(), SUCCESS);
  171. ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), ACL_ERROR_GE_PARAM_INVALID);
  172. ge::DataBuffer data_buffer;
  173. outputs = { data_buffer };
  174. ASSERT_EQ(task.UpdateIoAddr(inputs, outputs), SUCCESS);
  175. task.tiling_buffer_ = (void *)0x0001;
  176. ASSERT_EQ(task.UpdateTilingArgs(nullptr), SUCCESS);
  177. task.tiling_buffer_ = nullptr;
  178. optiling::utils::OpRunInfo run_info(0, true, 0);
  179. task.CalcTilingInfo(run_info);
  180. }
  181. TEST_F(UtestSingleOpTask, test_aicpu_task_launch_kernel) {
  182. AiCpuCCTask task;
  183. rtStream_t stream;
  184. ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE);
  185. task.num_inputs_ = 2;
  186. task.num_outputs_ = 1;
  187. task.input_is_const_ = {true, false};
  188. int total_addr = 3;
  189. uint32_t* addrs[total_addr] = {nullptr, nullptr, nullptr};
  190. task.io_addr_ = reinterpret_cast<uintptr_t*>(addrs);
  191. task.io_addr_num_ = total_addr;
  192. vector<DataBuffer> outputs(1, DataBuffer());
  193. outputs[0].data = 0;
  194. task.unknown_type_ = ge::DEPEND_COMPUTE;
  195. ASSERT_EQ(task.InitForSummaryAndCopy(), SUCCESS);
  196. auto &summary = task.output_summary_host_[0];
  197. summary.shape_data_ptr = 0;
  198. summary.shape_data_size = 1;
  199. summary.raw_data_ptr = 0;
  200. summary.raw_data_size = 1;
  201. void *shape_buffer = nullptr;
  202. rtMalloc(&shape_buffer, 1, RT_MEMORY_HBM);
  203. task.out_shape_hbm_.emplace_back(shape_buffer);
  204. task.memcpy_so_name_ = "libcpu_kernel.so";
  205. task.memcpy_kernel_name_ = "RunCpuKernel";
  206. AicpuTaskStruct args;
  207. args.head.length = sizeof(args);
  208. args.head.ioAddrNum = 3;
  209. domi::TaskDef task_def;
  210. domi::KernelDef *kernel_def = task_def.mutable_kernel();
  211. kernel_def->set_args(reinterpret_cast<const char *>(&args), args.head.length);
  212. kernel_def->set_args_size(args.head.length);
  213. auto &memcpy_args = kernel_def->args();
  214. task.memcpy_args_size_ = kernel_def->args_size();
  215. task.memcpy_args_.reset(new(std::nothrow) uint8_t[task.memcpy_args_size_]());
  216. memcpy_s(task.memcpy_args_.get(), task.memcpy_args_size_, memcpy_args.c_str(), memcpy_args.size());
  217. ASSERT_EQ(task.CopyDataToHbm(outputs, stream), SUCCESS);
  218. }
  219. TEST_F(UtestSingleOpTask, test_aicpu_task_update_io_addr) {
  220. AiCpuCCTask task;
  221. task.num_inputs_ = 2;
  222. task.num_outputs_ = 1;
  223. task.input_is_const_ = {true, false};
  224. int total_addr = 3;
  225. uint32_t* addrs[total_addr] = {nullptr, nullptr, nullptr};
  226. task.io_addr_ = reinterpret_cast<uintptr_t*>(addrs);
  227. task.io_addr_num_ = total_addr;
  228. {
  229. vector<DataBuffer> inputs(1, DataBuffer());
  230. vector<DataBuffer> outputs(1, DataBuffer());
  231. auto ret = task.UpdateIoAddr(inputs, outputs);
  232. ASSERT_EQ(ret, SUCCESS);
  233. ASSERT_EQ(addrs[0], nullptr);
  234. ASSERT_EQ(addrs[1], nullptr);
  235. ASSERT_EQ(addrs[2], nullptr);
  236. }
  237. {
  238. uint32_t data_buf[2];
  239. vector<DataBuffer> inputs{DataBuffer(&data_buf[0], 4, false)};
  240. vector<DataBuffer> outputs{DataBuffer(&data_buf[1], 4, false)};
  241. auto ret = task.UpdateIoAddr(inputs, outputs);
  242. ASSERT_EQ(ret, SUCCESS);
  243. ASSERT_EQ(addrs[0], nullptr);
  244. ASSERT_EQ(addrs[1], &data_buf[0]);
  245. ASSERT_EQ(addrs[2], &data_buf[1]);
  246. }
  247. {
  248. uint32_t data_buf[2];
  249. vector<DataBuffer> inputs{DataBuffer(nullptr, 4, false)};
  250. vector<DataBuffer> outputs{DataBuffer(&data_buf[1], 4, false)};
  251. auto ret = task.UpdateIoAddr(inputs, outputs);
  252. ASSERT_EQ(ret, PARAM_INVALID);
  253. }
  254. {
  255. uint32_t data_buf[2];
  256. vector<DataBuffer> inputs{DataBuffer(&data_buf[0], 4, false)};
  257. vector<DataBuffer> outputs{DataBuffer(nullptr, 4, false)};
  258. auto ret = task.UpdateIoAddr(inputs, outputs);
  259. ASSERT_EQ(ret, PARAM_INVALID);
  260. }
  261. }
  262. TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_01) {
  263. int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  264. vector<char> aicpu_ext_info(len, 0);
  265. char *buf = aicpu_ext_info.data();
  266. int offset = 0;
  267. hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  268. ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  269. ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  270. offset += sizeof(hybrid::AicpuExtInfo);
  271. hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  272. async_wait_info->waitType = 0;
  273. async_wait_info->waitId = 0;
  274. async_wait_info->timeOut = 0;
  275. async_wait_info->reserved = 0;
  276. domi::KernelDef kernel_def;
  277. kernel_def.set_kernel_ext_info(buf, len);
  278. kernel_def.set_kernel_ext_info_size(len);
  279. auto op_desc = make_shared<OpDesc>("deque", "Deque");
  280. ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  281. AiCpuCCTask aicpu_task;
  282. aicpu_task.SetOpDesc(op_desc);
  283. rtStream_t stream;
  284. ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE);
  285. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  286. ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
  287. }
  288. TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_02) {
  289. int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  290. vector<char> aicpu_ext_info(len, 0);
  291. char *buf = aicpu_ext_info.data();
  292. int offset = 0;
  293. hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  294. ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  295. ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  296. offset += sizeof(hybrid::AicpuExtInfo);
  297. hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  298. async_wait_info->waitType = 0;
  299. async_wait_info->waitId = 0;
  300. async_wait_info->timeOut = 0;
  301. async_wait_info->reserved = 0;
  302. domi::KernelDef kernel_def;
  303. kernel_def.set_kernel_ext_info(buf, len);
  304. kernel_def.set_kernel_ext_info_size(len);
  305. auto op_desc = make_shared<OpDesc>("deque", "Deque");
  306. ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  307. AiCpuTask aicpu_task;
  308. aicpu_task.SetOpDesc(op_desc);
  309. rtStream_t stream;
  310. ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE);
  311. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  312. ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
  313. }
  314. TEST_F(UtestSingleOpTask, test_blocking_aicpu_op_fail) {
  315. int len = sizeof(hybrid::AicpuExtInfo) + sizeof(hybrid::AsyncWaitInfo);
  316. vector<char> aicpu_ext_info(len, 0);
  317. char *buf = aicpu_ext_info.data();
  318. int offset = 0;
  319. hybrid::AicpuExtInfo *ext_info = reinterpret_cast<hybrid::AicpuExtInfo*>(buf + offset);
  320. ext_info->infoType = aicpu::FWKAdapter::FWK_ADPT_EXT_ASYNCWAIT;
  321. ext_info->infoLen = sizeof(hybrid::AsyncWaitInfo);
  322. offset += sizeof(hybrid::AicpuExtInfo);
  323. hybrid::AsyncWaitInfo *async_wait_info = reinterpret_cast<hybrid::AsyncWaitInfo*>(buf + offset);
  324. async_wait_info->waitType = 0;
  325. async_wait_info->waitId = 0;
  326. async_wait_info->timeOut = 0;
  327. async_wait_info->reserved = 0;
  328. domi::KernelDef kernel_def;
  329. kernel_def.set_kernel_ext_info(buf, len);
  330. kernel_def.set_kernel_ext_info_size(len);
  331. auto op_desc = make_shared<OpDesc>("deque", "Deque");
  332. ge::AttrUtils::SetBool(op_desc, ATTR_NAME_IS_BLOCKING_OP, true);
  333. AiCpuTask aicpu_task;
  334. aicpu_task.SetOpDesc(op_desc);
  335. rtStream_t stream;
  336. ASSERT_EQ(rtStreamCreate(&stream, 0), RT_ERROR_NONE);
  337. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  338. ASSERT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
  339. RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  340. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);
  341. RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  342. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);
  343. RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, 0x78000001);
  344. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);
  345. RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  346. RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_SUPPORT + 1);
  347. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), FAILED);
  348. RTS_STUB_RETURN_VALUE(rtGetDevice, rtError_t, 0x78000001);
  349. ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED);
  350. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  351. RTS_STUB_RETURN_VALUE(rtStreamWaitEvent, rtError_t, 0x78000001);
  352. ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED);
  353. ASSERT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  354. RTS_STUB_RETURN_VALUE(rtEventReset, rtError_t, 0x78000001);
  355. ASSERT_EQ(aicpu_task.LaunchKernel(stream), FAILED);
  356. RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  357. RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  358. EXPECT_EQ(aicpu_task.SetExtInfoAndType(kernel_def.kernel_ext_info(), 0), SUCCESS);
  359. RTS_STUB_RETURN_VALUE(rtGetDeviceCapability, rtError_t, RT_ERROR_NONE);
  360. RTS_STUB_OUTBOUND_VALUE(rtGetDeviceCapability, int32_t, value, RT_AICPU_BLOCKING_OP_NOT_SUPPORT);
  361. EXPECT_EQ(aicpu_task.LaunchKernel(stream), SUCCESS);
  362. }

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示