You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

single_op_model.cc 18 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "single_op/single_op_model.h"
  17. #include <atomic>
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include "framework/common/debug/ge_log.h"
  22. #include "graph/debug/ge_attr_define.h"
  23. #include "graph/load/new_model_manager/model_utils.h"
  24. #include "graph/utils/attr_utils.h"
  25. #include "graph/utils/graph_utils.h"
  26. #include "graph/utils/tensor_utils.h"
  27. #include "runtime/rt.h"
  28. #include "task/aicpu_task_builder.h"
  29. #include "task/aicpu_kernel_task_builder.h"
  30. #include "task/tbe_task_builder.h"
  31. static std::atomic<std::uint64_t> aicpu_sessionid(0);
  32. using domi::TaskDef;
  33. using std::unique_ptr;
  34. using std::vector;
  35. namespace ge {
  36. namespace {
  37. const size_t kDataOutputNum = 1;
  38. } // namespace
  39. SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size)
  40. : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {}
  41. Status SingleOpModel::Init() {
  42. GE_CHK_STATUS_RET_NOLOG(InitModel());
  43. return LoadAllNodes();
  44. }
  45. Status SingleOpModel::InitModel() {
  46. ge::ModelData model;
  47. model.model_len = ori_model_size_;
  48. model.model_data = const_cast<void *>(ori_model_data_);
  49. auto ret = model_helper_.LoadModel(model);
  50. if (ret != SUCCESS) {
  51. GELOGE(ret, "LoadModel failed");
  52. return ret;
  53. }
  54. return SUCCESS;
  55. }
  56. void SingleOpModel::ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param) {
  57. int64_t value = 0;
  58. bool ret = false;
  59. std::shared_ptr<ge::GeModel> model = model_helper.GetGeModel();
  60. GE_CHECK_NOTNULL_JUST_RETURN(model);
  61. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_MEMORY_SIZE, value);
  62. param.memory_size = ret ? static_cast<uint64_t>(value) : 0;
  63. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, value);
  64. param.zero_copy_mem_size = ret ? static_cast<uint64_t>(value) : 0;
  65. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_WEIGHT_SIZE, value);
  66. param.weight_size = ret ? static_cast<uint64_t>(value) : 0;
  67. ret = ge::AttrUtils::GetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, value);
  68. param.base_addr = ret ? static_cast<uint64_t>(value) : 0;
  69. ret = ge::AttrUtils::GetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, value);
  70. param.weight_addr = ret ? static_cast<uint64_t>(value) : 0;
  71. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value);
  72. param.core_type = ret ? value : 0;
  73. GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu. core_type = %lu",
  74. param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type);
  75. }
  76. Status SingleOpModel::InitModelMem(StreamResource &res) {
  77. ParseOpModelParams(model_helper_, model_params_);
  78. if (model_params_.memory_size > model_params_.zero_copy_mem_size) {
  79. const string purpose("malloc feature map memory on model execute.");
  80. GELOGI("total memory: %lu, zero_copy_mem: %lu", model_params_.memory_size, model_params_.zero_copy_mem_size);
  81. model_params_.mem_base = res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size);
  82. if (model_params_.mem_base == nullptr) {
  83. return RT_FAILED;
  84. }
  85. }
  86. if (model_params_.weight_size > 0 && has_weight_) {
  87. const string purpose("malloc weights memory on model execute.");
  88. model_params_.weight_base = res.MallocWeight(purpose, model_params_.weight_size);
  89. if (model_params_.weight_base == nullptr) {
  90. // no need to free memory, for that was handled by StreamResources
  91. return RT_FAILED;
  92. }
  93. auto weight_buffer = model_helper_.GetGeModel()->GetWeight();
  94. GELOGI("To copy weight to device. weight size = %zu", weight_buffer.GetSize());
  95. GE_CHK_RT_RET(rtMemcpy(model_params_.weight_base,
  96. model_params_.weight_size,
  97. weight_buffer.GetData(),
  98. weight_buffer.GetSize(),
  99. RT_MEMCPY_HOST_TO_DEVICE));
  100. }
  101. return SUCCESS;
  102. }
  103. Status SingleOpModel::ParseInputNode(const OpDescPtr &op_desc) {
  104. vector<int64_t> offsets = op_desc->GetOutputOffset();
  105. if (offsets.size() != kDataOutputNum) {
  106. GELOGE(PARAM_INVALID, "Data op should have only one output, but got %zu", op_desc->GetOutputOffset().size());
  107. return PARAM_INVALID;
  108. }
  109. auto output_desc = op_desc->GetOutputDescPtr(0);
  110. GE_CHECK_NOTNULL(output_desc);
  111. int64_t tensor_size = 0;
  112. (void)TensorUtils::GetSize(*output_desc, tensor_size);
  113. input_offset_list_.emplace_back(offsets[0]);
  114. input_sizes_.emplace_back(tensor_size);
  115. GELOGI("[%s] parse input node: %s, size = %ld, offset = %u", model_name_.c_str(), op_desc->GetName().c_str(),
  116. tensor_size, static_cast<uint32_t>(offsets[0]));
  117. return SUCCESS;
  118. }
  119. void SingleOpModel::ParseOutputNode(const OpDescPtr &op_desc) {
  120. vector<int64_t> offsets = op_desc->GetInputOffset();
  121. for (uint32_t k = 0; k < static_cast<uint32_t>(offsets.size()); ++k) {
  122. auto input_desc = op_desc->GetInputDescPtr(k);
  123. if (input_desc == nullptr) {
  124. continue;
  125. }
  126. int64_t tensor_size = 0;
  127. (void)TensorUtils::GetSize(*input_desc, tensor_size);
  128. output_offset_list_.emplace_back(offsets[k]);
  129. output_sizes_.emplace_back(tensor_size);
  130. GELOGI("[%s] parse output node: %s, size = %ld, offset = %u", model_name_.c_str(), op_desc->GetName().c_str(),
  131. tensor_size, static_cast<uint32_t>(offsets[k]));
  132. }
  133. }
  134. Status SingleOpModel::LoadAllNodes() {
  135. auto ge_model = model_helper_.GetGeModel();
  136. GE_CHECK_NOTNULL(ge_model);
  137. Graph graph = ge_model->GetGraph();
  138. auto compute_graph = GraphUtils::GetComputeGraph(graph);
  139. if (compute_graph == nullptr) {
  140. GELOGE(PARAM_INVALID, "[%s] compute_graph is null", model_name_.c_str());
  141. return PARAM_INVALID;
  142. }
  143. auto nodes = compute_graph->GetDirectNode();
  144. size_t model_op_size = nodes.size();
  145. GELOGI("[%s] node size = %zu", model_name_.c_str(), model_op_size);
  146. for (size_t i = 0; i < model_op_size; ++i) {
  147. auto node = nodes.at(i);
  148. auto op_desc = node->GetOpDesc();
  149. GE_CHECK_NOTNULL(op_desc);
  150. op_list_[i] = node;
  151. auto op_type = op_desc->GetType();
  152. GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str());
  153. if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) {
  154. data_ops_.emplace_back(op_desc);
  155. continue;
  156. }
  157. if (op_type == CONSTANT || op_type == CONSTANTOP) {
  158. has_weight_ = true;
  159. continue;
  160. }
  161. if (op_type == NETOUTPUT) {
  162. netoutput_op_ = op_desc;
  163. continue;
  164. }
  165. ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc);
  166. ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc);
  167. }
  168. return SUCCESS;
  169. }
  170. Status SingleOpModel::ParseInputsAndOutputs() {
  171. for (auto &op_desc : data_ops_) {
  172. GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc));
  173. }
  174. ParseOutputNode(netoutput_op_);
  175. return SUCCESS;
  176. }
  177. Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) {
  178. int arg_index = 0;
  179. for (size_t i = 0; i < input_offset_list_.size(); ++i) {
  180. auto *addr = model_params_.mem_base + input_offset_list_[i];
  181. model_params_.addr_mapping_.emplace(reinterpret_cast<uintptr_t>(addr), arg_index++);
  182. single_op.input_sizes_.emplace_back(input_sizes_[i]);
  183. single_op.input_addr_list_.emplace_back(addr);
  184. }
  185. for (size_t i = 0; i < output_offset_list_.size(); ++i) {
  186. auto *addr = model_params_.mem_base + output_offset_list_[i];
  187. model_params_.addr_mapping_.emplace(reinterpret_cast<uintptr_t>(addr), arg_index++);
  188. single_op.output_sizes_.emplace_back(output_sizes_[i]);
  189. single_op.output_addr_list_.emplace_back(addr);
  190. }
  191. single_op.args_.resize(arg_index);
  192. return SUCCESS;
  193. }
  194. Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
  195. auto ge_model = model_helper_.GetGeModel();
  196. GE_CHECK_NOTNULL(ge_model);
  197. auto tasks = ge_model->GetModelTaskDefPtr()->task();
  198. for (int i = 0; i < tasks.size(); ++i) {
  199. const TaskDef &task_def = tasks[i];
  200. GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
  201. task_def.DebugString().c_str());
  202. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  203. if (task_type == RT_MODEL_TASK_KERNEL) {
  204. const domi::KernelDef &kernel_def = task_def.kernel();
  205. const auto &context = kernel_def.context();
  206. auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
  207. if (kernel_type == cce::ccKernelType::TE) {
  208. GELOGD("Building TBE task");
  209. TbeOpTask *tbe_task = nullptr;
  210. auto ret = BuildKernelTask(task_def.kernel(), &tbe_task);
  211. if (ret != SUCCESS) {
  212. return ret;
  213. }
  214. single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
  215. ParseArgTable(tbe_task, single_op);
  216. single_op.tasks_.emplace_back(tbe_task);
  217. } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
  218. GELOGD("Building AICPU_CC task");
  219. OpTask *task = nullptr;
  220. auto ret = BuildCpuKernelTask(task_def.kernel(), &task);
  221. if (ret != SUCCESS) {
  222. return ret;
  223. }
  224. single_op.tasks_.emplace_back(task);
  225. } else {
  226. GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
  227. return UNSUPPORTED;
  228. }
  229. } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
  230. GELOGD("Building AICPU_TF task");
  231. AiCpuTask *aicpu_task = nullptr;
  232. bool depend_compute_flag = false;
  233. uint64_t singleop_sessionid = aicpu_sessionid++;
  234. GELOGI("Build singleOp, sessionId = %lu", singleop_sessionid);
  235. auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_sessionid);
  236. if (ret != SUCCESS) {
  237. return ret;
  238. }
  239. single_op.tasks_.emplace_back(aicpu_task);
  240. single_op.SetSessionID(singleop_sessionid);
  241. } else {
  242. // skip
  243. GELOGD("Skip task type: %d", static_cast<int>(task_type));
  244. }
  245. }
  246. return SUCCESS;
  247. }
  248. void SingleOpModel::ParseArgTable(TbeOpTask *task, SingleOp &op) {
  249. if (task == nullptr) {
  250. GELOGE(PARAM_INVALID, "tbe op task is nullptr");
  251. return;
  252. }
  253. // args: addr1, addr2, addr3 ...
  254. auto *args = const_cast<uintptr_t *>(reinterpret_cast<const uintptr_t *>(task->GetArgs()));
  255. size_t arg_size = task->GetArgSize();
  256. for (size_t i = 0; i < arg_size / sizeof(void *); ++i) {
  257. uintptr_t *ptr_to_addr = args + i;
  258. uintptr_t addr = *ptr_to_addr;
  259. auto iter = model_params_.addr_mapping_.find(addr);
  260. if (iter != model_params_.addr_mapping_.end()) {
  261. int arg_index = iter->second;
  262. GELOGI("%s args[%zu] mapped to user designated args[%d]", task->GetStubName().c_str(), i, arg_index);
  263. op.arg_table_[iter->second].emplace_back(ptr_to_addr);
  264. }
  265. }
  266. }
  267. Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task) {
  268. GE_CHECK_NOTNULL(task);
  269. const auto &context = kernel_def.context();
  270. auto iter = op_list_.find(context.op_index());
  271. if (iter == op_list_.end()) {
  272. GELOGE(INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
  273. return INTERNAL_ERROR;
  274. }
  275. auto *tbe_task = new (std::nothrow) TbeOpTask();
  276. if (tbe_task == nullptr) {
  277. GELOGE(MEMALLOC_FAILED, "create tbe op task failed");
  278. return MEMALLOC_FAILED;
  279. }
  280. auto builder = TbeTaskBuilder(model_name_, iter->second, kernel_def);
  281. auto ret = builder.BuildTask(*tbe_task, model_params_);
  282. if (ret != SUCCESS) {
  283. delete tbe_task;
  284. tbe_task = nullptr;
  285. return ret;
  286. }
  287. *task = tbe_task;
  288. return SUCCESS;
  289. }
  290. Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
  291. bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id) {
  292. auto iter = op_list_.find(kernel_def.op_index());
  293. if (iter == op_list_.end()) {
  294. GELOGE(INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index());
  295. return INTERNAL_ERROR;
  296. }
  297. std::unique_ptr<AiCpuTask> aicpu_task(new (std::nothrow) AiCpuTask());
  298. if (aicpu_task == nullptr) {
  299. GELOGE(MEMALLOC_FAILED, "create aicpu_TF op task failed");
  300. return MEMALLOC_FAILED;
  301. }
  302. auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def);
  303. auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, session_id);
  304. if (ret != SUCCESS) {
  305. GELOGE(ret, "build aicpu_TF op task failed");
  306. return ret;
  307. }
  308. depend_compute_flag = (aicpu_task->GetUnknownType() == DEPEND_COMPUTE);
  309. *task = aicpu_task.release();
  310. return SUCCESS;
  311. }
  312. Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task) {
  313. const auto &context = kernel_def.context();
  314. auto iter = op_list_.find(context.op_index());
  315. if (iter == op_list_.end()) {
  316. GELOGE(INTERNAL_ERROR, "op desc not found. op index = %u", context.op_index());
  317. return INTERNAL_ERROR;
  318. }
  319. std::unique_ptr<AiCpuCCTask> aicpucc_task(new (std::nothrow) AiCpuCCTask());
  320. if (aicpucc_task == nullptr) {
  321. GELOGE(MEMALLOC_FAILED, "create aicpu_CC op task failed");
  322. return MEMALLOC_FAILED;
  323. }
  324. auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def);
  325. auto ret = builder.BuildTask(*aicpucc_task);
  326. if (ret != SUCCESS) {
  327. GELOGE(ret, "build aicpu_CC op task failed");
  328. return ret;
  329. }
  330. *task = aicpucc_task.release();
  331. return SUCCESS;
  332. }
  333. Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
  334. GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs());
  335. GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
  336. GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op));
  337. return BuildTaskList(single_op);
  338. }
  339. Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
  340. const domi::KernelDef &kernel_def = task_def.kernel();
  341. const auto &context = kernel_def.context();
  342. auto kernel_type = static_cast<cce::ccKernelType>(context.kernel_type());
  343. if (kernel_type == cce::ccKernelType::TE) {
  344. GELOGD("Building TBE task");
  345. TbeOpTask *tbe_task = nullptr;
  346. GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def.kernel(), &tbe_task));
  347. single_op.op_task_.reset(tbe_task);
  348. } else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
  349. GELOGD("Building AICPU_CC task");
  350. OpTask *task = nullptr;
  351. GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task));
  352. single_op.op_task_.reset(task);
  353. } else {
  354. GELOGE(UNSUPPORTED, "Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u", context.kernel_type());
  355. return UNSUPPORTED;
  356. }
  357. return SUCCESS;
  358. }
  359. Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
  360. auto ge_model = model_helper_.GetGeModel();
  361. GE_CHECK_NOTNULL(ge_model);
  362. auto tasks = ge_model->GetModelTaskDefPtr()->task();
  363. for (int i = 0; i < tasks.size(); ++i) {
  364. const TaskDef &task_def = tasks[i];
  365. GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
  366. task_def.DebugString().c_str());
  367. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  368. if (task_type == RT_MODEL_TASK_KERNEL) {
  369. if (single_op.op_task_ != nullptr) {
  370. GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks.");
  371. return UNSUPPORTED;
  372. }
  373. GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
  374. } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
  375. if (single_op.op_task_ != nullptr) {
  376. GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks.");
  377. return UNSUPPORTED;
  378. }
  379. GELOGD("Building AICPU_TF task");
  380. AiCpuTask *aicpu_task = nullptr;
  381. bool depend_compute_flag = false;
  382. uint64_t dynamic_singleop_sessionid = aicpu_sessionid++;
  383. GELOGI("Build dynamic singleOp, sessionId = %lu", dynamic_singleop_sessionid);
  384. GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true,
  385. depend_compute_flag, dynamic_singleop_sessionid));
  386. if (depend_compute_flag) {
  387. if (i >= tasks.size() - 1) {
  388. GELOGE(FAILED, "The copy task of the fourth operator was not found.");
  389. return FAILED;
  390. }
  391. ++i;
  392. const TaskDef &copy_task_def = tasks[i];
  393. GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex()));
  394. }
  395. single_op.op_task_.reset(aicpu_task);
  396. single_op.SetSessionID(dynamic_singleop_sessionid);
  397. } else {
  398. // skip
  399. GELOGD("Skip task type: %d", static_cast<int>(task_type));
  400. }
  401. }
  402. return SUCCESS;
  403. }
  404. Status SingleOpModel::BuildDynamicOp(DynamicSingleOp &single_op) {
  405. single_op.num_inputs_ = data_ops_.size();
  406. single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
  407. ParseOpModelParams(model_helper_, model_params_);
  408. return BuildTaskListForDynamicOp(single_op);
  409. }
  410. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示