You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

task_context.cc 19 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "task_context.h"
  17. #include "framework/common/ge_inner_error_codes.h"
  18. #include "framework/common/debug/log.h"
  19. #include "graph/utils/tensor_utils.h"
  20. #include "graph/types.h"
  21. #include "graph/debug/ge_attr_define.h"
  22. #include "hybrid/executor/hybrid_execution_context.h"
  23. #include "hybrid/executor/subgraph_executor.h"
  24. #include "common/profiling/profiling_manager.h"
  25. namespace ge {
  26. namespace hybrid {
  27. TaskContext::TaskContext(GraphExecutionContext *execution_context,
  28. const NodeItem *node_item,
  29. SubgraphContext *subgraph_context)
  30. : node_item_(node_item), execution_context_(execution_context), subgraph_context_(subgraph_context) {
  31. }
  32. TaskContext::~TaskContext() {
  33. GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str());
  34. for (auto ws_addr : workspaces_) {
  35. execution_context_->allocator->Deallocate(ws_addr);
  36. }
  37. // release output
  38. for (int i = 0; i < NumOutputs(); ++i) {
  39. auto output_tensor = MutableOutput(i);
  40. if (output_tensor != nullptr) {
  41. output_tensor->Destroy();
  42. }
  43. }
  44. }
  45. std::unique_ptr<TaskContext> TaskContext::Create(const NodeItem &node_item,
  46. GraphExecutionContext *execution_context,
  47. SubgraphContext *subgraph_context) {
  48. GELOGI("[%s] To create task context, input start = %d, num_inputs = %d, output start = %d, num_outputs = %d.",
  49. node_item.NodeName().c_str(),
  50. node_item.input_start,
  51. node_item.num_inputs,
  52. node_item.output_start,
  53. node_item.num_outputs);
  54. if (node_item.input_start < 0 || node_item.output_start < 0) {
  55. GELOGE(INTERNAL_ERROR,
  56. "NodeItem not property initialized. input_start = %d, output_start = %d",
  57. node_item.input_start,
  58. node_item.output_start);
  59. return nullptr;
  60. }
  61. auto task_context = std::unique_ptr<TaskContext>(
  62. new(std::nothrow)TaskContext(execution_context, &node_item, subgraph_context));
  63. if (task_context == nullptr) {
  64. GELOGE(MEMALLOC_FAILED, "[%s] Failed to create instance of TaskContext.", node_item.NodeName().c_str());
  65. return nullptr;
  66. }
  67. task_context->node_item_ = &node_item;
  68. task_context->inputs_start_ = subgraph_context->all_inputs_.data() + node_item.input_start;
  69. task_context->outputs_start_ = subgraph_context->all_outputs_.data() + node_item.output_start;
  70. task_context->iteration_ = execution_context->iteration;
  71. return task_context;
  72. }
  73. int TaskContext::NumInputs() const {
  74. return node_item_->num_inputs;
  75. }
  76. int TaskContext::NumOutputs() const {
  77. return node_item_->num_outputs;
  78. }
  79. TensorValue *TaskContext::MutableInput(int index) {
  80. if (index < 0 || index >= node_item_->num_inputs) {
  81. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_inputs = %d", index, node_item_->num_inputs);
  82. return nullptr;
  83. }
  84. return inputs_start_ + index;
  85. }
  86. const TensorValue *TaskContext::GetOutput(int index) const {
  87. if (index < 0 || index >= node_item_->num_outputs) {
  88. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_outputs = %d", index, node_item_->num_outputs);
  89. return nullptr;
  90. }
  91. return outputs_start_ + index;
  92. }
  93. TensorValue *TaskContext::MutableOutput(int index) {
  94. if (index < 0 || index >= node_item_->num_outputs) {
  95. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_outputs = %d", index, node_item_->num_outputs);
  96. return nullptr;
  97. }
  98. return outputs_start_ + index;
  99. }
  100. std::size_t TaskContext::NumWorkspaces() const {
  101. return workspaces_.size();
  102. }
  103. void *TaskContext::MutableWorkspace(int index) {
  104. if (index < 0 || static_cast<size_t>(index) >= workspaces_.size()) {
  105. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_workspaces = %d", index, node_item_->num_outputs);
  106. return nullptr;
  107. }
  108. return workspaces_[index];
  109. }
  110. const TensorValue *TaskContext::GetInput(int index) const {
  111. if (index < 0 || index >= node_item_->num_inputs) {
  112. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_inputs = %d", index, node_item_->num_inputs);
  113. return nullptr;
  114. }
  115. return inputs_start_ + index;
  116. }
  117. Status TaskContext::AllocateWorkspaces() {
  118. auto workspace_sizes = node_item_->node->GetOpDesc()->GetWorkspaceBytes();
  119. for (auto size : workspace_sizes) {
  120. void *workspace = execution_context_->allocator->Allocate(size);
  121. if (workspace == nullptr) {
  122. GELOGE(MEMALLOC_FAILED, "Failed to allocate workspace of size: %ld", size);
  123. return MEMALLOC_FAILED;
  124. }
  125. workspaces_.emplace_back(workspace);
  126. }
  127. return SUCCESS;
  128. }
  129. Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
  130. if (callback_fun == nullptr) {
  131. GELOGW("[%s] Callback is NULL", GetNodeName());
  132. return SUCCESS;
  133. }
  134. auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
  135. if (ret != SUCCESS) {
  136. GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
  137. execution_context_->callback_manager->Destroy();
  138. return ret;
  139. }
  140. return SUCCESS;
  141. }
  142. string TaskContext::TensorDesc2String(const GeTensorDesc &desc) {
  143. std::stringstream ss;
  144. ss << "[TensorDesc] ";
  145. ss << "DataType = " << desc.GetDataType();
  146. ss << ", Format = " << desc.GetFormat();
  147. ss << ", Shape = [";
  148. for (auto dim : desc.GetShape().GetDims()) {
  149. ss << dim << ", ";
  150. }
  151. ss << "]";
  152. return ss.str();
  153. }
  154. Status TaskContext::AllocateTensor(const GeTensorDesc &tensor_desc, TensorValue &tensor, AllocationAttr *attr) {
  155. int64_t size = 0;
  156. if (ge::TensorUtils::GetSize(tensor_desc, size) != GRAPH_SUCCESS) {
  157. GELOGE(INTERNAL_ERROR, "Failed to get tensor size");
  158. return INTERNAL_ERROR;
  159. }
  160. if (size == 0) {
  161. GELOGW("size from tensor_desc == 0");
  162. }
  163. auto buffer = TensorBuffer::Create(execution_context_->allocator, size, attr);
  164. GE_CHECK_NOTNULL(buffer);
  165. tensor = TensorValue(shared_ptr<TensorBuffer>(buffer.release()));
  166. return SUCCESS;
  167. }
  168. Status TaskContext::AllocateOutput(int index,
  169. const GeTensorDesc &tensor_desc,
  170. TensorValue **tensor,
  171. AllocationAttr *attr) {
  172. GELOGI("To allocate output for node: %s. index = %d, tensor desc = %s",
  173. node_item_->NodeName().c_str(),
  174. index,
  175. TensorDesc2String(tensor_desc).c_str());
  176. if (index < 0 || index >= node_item_->num_outputs) {
  177. GELOGE(PARAM_INVALID, "output index out of range. num_output = %d, index = %d", node_item_->num_outputs, index);
  178. return PARAM_INVALID;
  179. }
  180. if (outputs_start_[index].GetData() != nullptr) {
  181. GELOGI("already allocated as net output");
  182. return SUCCESS;
  183. }
  184. int32_t calc_type = 0;
  185. bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
  186. if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
  187. outputs_start_[index] = TensorValue();
  188. return SUCCESS;
  189. }
  190. auto it = node_item_->ref_outputs.find(index);
  191. if (it != node_item_->ref_outputs.end()) {
  192. auto &ref_node = it->second;
  193. GELOGD("source node of %s:%d = %s, op_type = %s",
  194. node_item_->NodeName().c_str(),
  195. index,
  196. ref_node->GetName().c_str(),
  197. ref_node->GetType().c_str());
  198. TensorValue *ref_tensor = execution_context_->model->GetVariable(ref_node->GetName());
  199. GE_CHECK_NOTNULL(ref_tensor);
  200. outputs_start_[index] = *ref_tensor;
  201. } else {
  202. auto reuse_output_it = node_item_->reuse_outputs.find(index);
  203. if (reuse_output_it != node_item_->reuse_outputs.end()) {
  204. GELOGD("[%s] reuse output [%d] with output [%d]", GetNodeName(), index, reuse_output_it->second);
  205. outputs_start_[index] = outputs_start_[reuse_output_it->second];
  206. } else {
  207. auto reuse_input = node_item_->reuse_inputs.find(index);
  208. if (reuse_input != node_item_->reuse_inputs.end()) {
  209. GELOGD("[%s] Output[%d] is referenced to input[%d]", GetNodeName(), index, reuse_input->second);
  210. outputs_start_[index] = inputs_start_[reuse_input->second];
  211. } else {
  212. GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr));
  213. GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu",
  214. node_item_->NodeName().c_str(), index, outputs_start_[index].GetSize());
  215. }
  216. }
  217. }
  218. if (execution_context_->trace_enabled) {
  219. outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index));
  220. }
  221. if (tensor != nullptr) {
  222. *tensor = outputs_start_ + index;
  223. }
  224. return SUCCESS;
  225. }
  226. Status TaskContext::AllocateOutputs(AllocationAttr *attr) {
  227. for (int i = 0; i < node_item_->num_outputs; ++i) {
  228. const auto &output_desc = node_item_->MutableOutputDesc(i);
  229. GE_CHECK_NOTNULL(output_desc);
  230. uint32_t mem_type = 0;
  231. (void)AttrUtils::GetInt(output_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type);
  232. if (attr == nullptr) {
  233. auto tmp_attr = AllocationAttr(0, nullptr, static_cast<MemStorageType>(mem_type));
  234. GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, &tmp_attr));
  235. } else {
  236. attr->SetMemType(static_cast<MemStorageType>(mem_type));
  237. GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, attr));
  238. }
  239. }
  240. return SUCCESS;
  241. }
  242. Status TaskContext::AllocateTensor(size_t size, TensorValue &tensor, AllocationAttr *attr) {
  243. auto buffer = TensorBuffer::Create(execution_context_->allocator, size, attr);
  244. if (buffer == nullptr) {
  245. GELOGE(MEMALLOC_FAILED, "Failed to allocate buffer of size: %zu", size);
  246. return MEMALLOC_FAILED;
  247. }
  248. tensor = TensorValue(shared_ptr<TensorBuffer>(buffer.release()));
  249. return SUCCESS;
  250. }
  251. const NodeItem &TaskContext::GetNodeItem() const {
  252. return *node_item_;
  253. }
  254. Status TaskContext::SetOutput(int index, const TensorValue &tensor) {
  255. if (index < 0 || index >= node_item_->num_outputs) {
  256. GELOGE(PARAM_INVALID, "output index out of range. num_output = %d, index = %d", node_item_->num_outputs, index);
  257. return PARAM_INVALID;
  258. }
  259. GELOGD("Set %s:%d with tensor: %s",
  260. node_item_->NodeName().c_str(),
  261. index,
  262. tensor.DebugString().c_str());
  263. outputs_start_[index] = tensor;
  264. return SUCCESS;
  265. }
  266. rtStream_t TaskContext::GetStream() {
  267. return execution_context_->stream;
  268. }
  269. int64_t TaskContext::GetSessionId() const {
  270. return execution_context_->session_id;
  271. }
  272. Status TaskContext::GetStatus() const {
  273. return status_;
  274. }
  275. void TaskContext::SetStatus(Status status) {
  276. status_ = status;
  277. if (status != SUCCESS) {
  278. execution_context_->SetErrorCode(status);
  279. }
  280. }
  281. uint32_t TaskContext::GetTaskId() const {
  282. return task_id_;
  283. }
  284. void TaskContext::SetTaskId(uint32_t task_id) {
  285. task_id_ = task_id;
  286. }
  287. uint32_t TaskContext::GetStreamId() const {
  288. return stream_id_;
  289. }
  290. void TaskContext::SetStreamId(uint32_t stream_id) {
  291. stream_id_ = stream_id;
  292. }
  293. Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
  294. GE_CHECK_NOTNULL(buffer);
  295. if (ori_addr == nullptr) {
  296. *buffer = execution_context_->allocator->Allocate(size, nullptr);
  297. } else {
  298. AllocationAttr attr(ori_addr);
  299. *buffer = execution_context_->allocator->Allocate(size, &attr);
  300. }
  301. if (*buffer == nullptr) {
  302. GELOGE(MEMALLOC_FAILED, "Failed to allocate workspace of size = %zu", size);
  303. return MEMALLOC_FAILED;
  304. }
  305. GELOGD("Allocating workspace of size = %zu successfully", size);
  306. workspaces_.emplace_back(*buffer);
  307. return SUCCESS;
  308. }
  309. Status TaskContext::PropagateOutputs() {
  310. // propagate outputs
  311. for (int i = 0; i < NumOutputs(); ++i) {
  312. auto tensor = MutableOutput(i);
  313. GE_CHECK_NOTNULL(tensor);
  314. if (tensor->GetData() == nullptr) {
  315. GELOGD("[%s] Node output[%d] is null.", node_item_->NodeName().c_str(), i);
  316. }
  317. auto &output_nodes = node_item_->outputs[i];
  318. for (auto &dst_input_index_and_node : output_nodes) {
  319. auto dst_input_idx = dst_input_index_and_node.first;
  320. auto dst_node_item = dst_input_index_and_node.second;
  321. auto input_offset = dst_node_item->input_start + dst_input_idx;
  322. GELOGD(
  323. "Propagate output of node %s, output index = %d, dst node = %s, "
  324. "dst_input_index = %d, dst_input_offset = %d.",
  325. node_item_->NodeName().c_str(),
  326. i,
  327. dst_node_item->NodeName().c_str(),
  328. dst_input_idx,
  329. input_offset);
  330. if (subgraph_context_->all_inputs_.size() <= static_cast<size_t>(input_offset)) {
  331. GELOGE(INTERNAL_ERROR,
  332. "[%s] input index out of range. index = %d, total input num = %zu",
  333. GetNodeName(),
  334. input_offset,
  335. subgraph_context_->all_inputs_.size());
  336. return INTERNAL_ERROR;
  337. }
  338. subgraph_context_->all_inputs_[input_offset] = *tensor;
  339. if (execution_context_->trace_enabled) {
  340. subgraph_context_->all_inputs_[input_offset].SetName(
  341. node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx));
  342. }
  343. }
  344. }
  345. return SUCCESS;
  346. }
  347. const void *TaskContext::GetVarBaseAddr() {
  348. return execution_context_->model->GetVarMemBase();
  349. }
  350. const char *TaskContext::GetNodeName() const {
  351. return node_item_->NodeName().c_str();
  352. }
  353. void TaskContext::ReleaseInputsAndOutputs() {
  354. for (int i = 0; i < node_item_->num_inputs; ++i) {
  355. auto tensor = inputs_start_ + i;
  356. tensor->Destroy();
  357. GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
  358. }
  359. for (int i = 0; i < node_item_->num_outputs; ++i) {
  360. auto tensor = outputs_start_ + i;
  361. tensor->Destroy();
  362. GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
  363. }
  364. }
  365. void TaskContext::ReleaseInput(int index) {
  366. auto input_tensor = MutableInput(index);
  367. if (input_tensor != nullptr) {
  368. input_tensor->Destroy();
  369. GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index);
  370. }
  371. }
  372. ConstGeTensorDescPtr TaskContext::GetOutputDesc(int index) const {
  373. return node_item_->MutableOutputDesc(static_cast<uint32_t>(index));
  374. }
  375. ConstGeTensorDescPtr TaskContext::GetInputDesc(int index) const {
  376. return node_item_->MutableInputDesc(index);
  377. }
  378. GeTensorDescPtr TaskContext::MutableInputDesc(int index) const {
  379. return node_item_->MutableInputDesc(index);
  380. }
  381. GeTensorDescPtr TaskContext::MutableOutputDesc(int index) const {
  382. return node_item_->MutableOutputDesc(static_cast<uint32_t>(index));
  383. }
  384. bool TaskContext::IsForceInferShape() const {
  385. return force_infer_shape_;
  386. }
  387. void TaskContext::SetForceInferShape(bool force_infer_shape) {
  388. force_infer_shape_ = force_infer_shape;
  389. }
  390. void TaskContext::NodeDone() {
  391. subgraph_context_->NodeDone(node_item_->node);
  392. }
  393. void TaskContext::OnError(Status error) {
  394. subgraph_context_->OnError(error);
  395. execution_context_->SetErrorCode(error);
  396. }
  397. bool TaskContext::IsTraceEnabled() const {
  398. return execution_context_->trace_enabled;
  399. }
  400. TensorValue *TaskContext::GetVariable(const std::string &name) {
  401. return execution_context_->model->GetVariable(name);
  402. }
  403. uint64_t TaskContext::GetIterationNumber() const {
  404. return iteration_;
  405. }
  406. bool TaskContext::IsDumpEnabled() const {
  407. return execution_context_->dump_enabled;
  408. }
  409. Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) const {
  410. if (!callback_fun) {
  411. return SUCCESS;
  412. }
  413. if (node_item_->has_observer) {
  414. return RegisterCallback(callback_fun);
  415. }
  416. callback_fun();
  417. return SUCCESS;
  418. }
  419. const DumpProperties &TaskContext::GetDumpProperties() const {
  420. return execution_context_->dump_properties;
  421. }
  422. bool TaskContext::NeedCallback() {
  423. return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
  424. }
  425. Status TaskContext::Synchronize() {
  426. return execution_context_->Synchronize(GetStream());
  427. }
  428. Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id,
  429. uint32_t task_type, uint32_t block_dim) {
  430. if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
  431. const NodeItem &node_item = GetNodeItem();
  432. auto op_desc = node_item.GetOpDesc();
  433. GE_CHECK_NOTNULL(op_desc);
  434. const GraphExecutionContext * graph_context = GetExecutionContext();
  435. GE_CHECK_NOTNULL(graph_context);
  436. const HybridModel *model = graph_context->model;
  437. GE_CHECK_NOTNULL(model);
  438. std::string op_name = op_desc->GetName();
  439. std::string dynamic_model_name = model->GetModelName();
  440. TaskDescInfo tmp_task_desc_info;
  441. tmp_task_desc_info.model_name = dynamic_model_name;
  442. tmp_task_desc_info.op_name = op_name;
  443. tmp_task_desc_info.block_dim = block_dim;
  444. tmp_task_desc_info.task_type = task_type;
  445. tmp_task_desc_info.task_id = task_id;
  446. tmp_task_desc_info.stream_id = stream_id;
  447. tmp_task_desc_info.shape_type = "dynamic";
  448. tmp_task_desc_info.cur_iter_num = iteration_ + 1;
  449. task_desc_info.emplace_back(tmp_task_desc_info);
  450. }
  451. return SUCCESS;
  452. }
  453. Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream_id) {
  454. if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
  455. const NodeItem &node_item = GetNodeItem();
  456. auto op_desc = node_item.GetOpDesc();
  457. GE_CHECK_NOTNULL(op_desc);
  458. const GraphExecutionContext * graph_context = GetExecutionContext();
  459. GE_CHECK_NOTNULL(graph_context);
  460. const HybridModel *model = graph_context->model;
  461. GE_CHECK_NOTNULL(model);
  462. std::string dynamic_model_name = model->GetModelName();
  463. auto op_mode = static_cast<uint32_t>(domi::ImplyType::INVALID);
  464. if (AttrUtils::GetInt(op_desc, ATTR_NAME_IMPLY_TYPE, op_mode) &&
  465. op_mode == static_cast<uint32_t>(domi::ImplyType::TVM)) {
  466. ComputeGraphDescInfo tmp_compute_graph_info;
  467. tmp_compute_graph_info.model_name = dynamic_model_name;
  468. tmp_compute_graph_info.op_name = op_desc->GetName();
  469. tmp_compute_graph_info.op_type = op_desc->GetType();
  470. tmp_compute_graph_info.task_id = task_id;
  471. tmp_compute_graph_info.stream_id = stream_id;
  472. compute_graph_info.emplace_back(tmp_compute_graph_info);
  473. }
  474. }
  475. return SUCCESS;
  476. }
  477. } // namespace hybrid
  478. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示