You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

subgraph_executor.cc 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "hybrid/executor/subgraph_executor.h"
  17. #include "hybrid/executor/worker/task_compile_engine.h"
  18. #include "hybrid/executor/worker/execution_engine.h"
  19. #include "hybrid/node_executor/node_executor.h"
  20. namespace ge {
  21. namespace hybrid {
  22. namespace {
  23. constexpr int kDefaultThreadNum = 4;
  24. constexpr int kDataInputIndex = 0;
  25. } // namespace
  26. SubgraphExecutor::SubgraphExecutor(const GraphItem *graph_item, GraphExecutionContext *context, bool force_infer_shape)
  27. : graph_item_(graph_item),
  28. context_(context),
  29. force_infer_shape_(force_infer_shape),
  30. pre_run_pool_(kDefaultThreadNum) {}
  31. SubgraphExecutor::~SubgraphExecutor() { GELOGD("[%s] SubgraphExecutor destroyed.", graph_item_->GetName().c_str()); }
  32. Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
  33. const std::vector<ConstGeTensorDescPtr> &input_desc) {
  34. subgraph_context_.reset(new (std::nothrow) SubgraphContext(graph_item_));
  35. GE_CHECK_NOTNULL(subgraph_context_);
  36. GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str());
  37. shape_inference_engine_.reset(new (std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get()));
  38. GE_CHECK_NOTNULL(shape_inference_engine_);
  39. if (graph_item_->IsDynamic()) {
  40. GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc), "[%s] Failed to set inputs.",
  41. graph_item_->GetName().c_str());
  42. } else {
  43. GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs),
  44. "[%s] Failed to init subgraph executor for known shape subgraph.",
  45. graph_item_->GetName().c_str());
  46. }
  47. return SUCCESS;
  48. }
  49. Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue> &inputs,
  50. const std::vector<ConstGeTensorDescPtr> &input_desc) {
  51. // Number of inputs of parent node should be greater or equal than that of subgraph
  52. auto input_nodes = graph_item_->GetInputNodes();
  53. if (inputs.size() < input_nodes.size()) {
  54. GELOGE(INTERNAL_ERROR, "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
  55. graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
  56. return INTERNAL_ERROR;
  57. }
  58. for (size_t i = 0; i < input_nodes.size(); ++i) {
  59. auto &input_node = input_nodes[i];
  60. if (input_node == nullptr) {
  61. GELOGD("[%s] Input[%zu] is not needed by subgraph, skip it.", graph_item_->GetName().c_str(), i);
  62. continue;
  63. }
  64. auto &input_tensor = inputs[i];
  65. GELOGD("[%s] Set input tensor[%zu] to inputs with index = %d, tensor = %s", graph_item_->GetName().c_str(), i,
  66. input_node->input_start, input_tensor.DebugString().c_str());
  67. GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor),
  68. "[%s] Failed to set input tensor[%zu]", graph_item_->GetName().c_str(), i);
  69. if (force_infer_shape_ || input_node->is_dynamic) {
  70. GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i);
  71. GE_CHECK_LE(i + 1, input_desc.size());
  72. const auto &tensor_desc = input_desc[i];
  73. auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
  74. GE_CHECK_NOTNULL(node_state);
  75. node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
  76. }
  77. }
  78. GELOGD("[%s] Done setting inputs.", graph_item_->GetName().c_str());
  79. return SUCCESS;
  80. }
  81. Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue> &inputs) {
  82. auto &input_index_mapping = graph_item_->GetInputIndexMapping();
  83. for (size_t i = 0; i < input_index_mapping.size(); ++i) {
  84. auto &parent_input_index = input_index_mapping[i];
  85. if (static_cast<size_t>(parent_input_index) >= inputs.size()) {
  86. GELOGE(INTERNAL_ERROR,
  87. "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
  88. graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1);
  89. return INTERNAL_ERROR;
  90. }
  91. auto &input_tensor = inputs[parent_input_index];
  92. subgraph_context_->SetInput(static_cast<int>(i), input_tensor);
  93. GELOGD("[%s] Set input tensor[%zu] with inputs with index = %d, tensor = %s", graph_item_->GetName().c_str(), i,
  94. parent_input_index, input_tensor.DebugString().c_str());
  95. }
  96. return SUCCESS;
  97. }
  98. Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
  99. const std::vector<ConstGeTensorDescPtr> &input_desc) {
  100. GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
  101. GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());
  102. if (!graph_item_->IsDynamic()) {
  103. return ExecuteAsyncForKnownShape(inputs);
  104. }
  105. GE_CHK_STATUS_RET(ScheduleTasks(), "[%s] Failed to execute tasks.", graph_item_->GetName().c_str());
  106. GELOGD("[%s] Done executing subgraph successfully.", graph_item_->GetName().c_str());
  107. return SUCCESS;
  108. }
  109. Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) {
  110. GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str());
  111. if (graph_item_->GetAllNodes().size() != 1) {
  112. GELOGE(INTERNAL_ERROR, "[%s] Invalid known shape subgraph. node size = %zu", graph_item_->GetName().c_str(),
  113. graph_item_->GetAllNodes().size());
  114. return INTERNAL_ERROR;
  115. }
  116. auto node_item = graph_item_->GetAllNodes()[0];
  117. GE_CHECK_NOTNULL(node_item);
  118. auto node_state = subgraph_context_->GetOrCreateNodeState(node_item);
  119. GE_CHECK_NOTNULL(node_state);
  120. node_state->SetKernelTask(node_item->kernel_task);
  121. known_shape_task_context_ = TaskContext::Create(*node_item, context_, subgraph_context_.get());
  122. GE_CHECK_NOTNULL(known_shape_task_context_);
  123. GE_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, known_shape_task_context_, *context_),
  124. "[%s] Failed to execute node [%s] for known subgraph.", graph_item_->GetName().c_str(),
  125. known_shape_task_context_->GetNodeName());
  126. GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str());
  127. return SUCCESS;
  128. }
  129. Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) {
  130. std::vector<TensorValue> inputs;
  131. std::vector<ConstGeTensorDescPtr> input_desc;
  132. for (int i = 0; i < task_context.NumInputs(); ++i) {
  133. auto tensor = task_context.GetInput(i);
  134. GE_CHECK_NOTNULL(tensor);
  135. inputs.emplace_back(*tensor);
  136. input_desc.emplace_back(task_context.GetInputDesc(i));
  137. }
  138. GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), "[%s] Failed to execute subgraph.",
  139. graph_item_->GetName().c_str());
  140. GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context), "[%s] Failed to set output shapes to parent node.",
  141. graph_item_->GetName().c_str());
  142. return SUCCESS;
  143. }
  144. Status SubgraphExecutor::PrepareNodes() {
  145. GELOGD("[%s] Start to prepare nodes. force infer shape = %s.", graph_item_->GetName().c_str(),
  146. force_infer_shape_ ? "true" : "false");
  147. auto &all_nodes = graph_item_->GetAllNodes();
  148. for (auto all_node : all_nodes) {
  149. auto &node_item = *all_node;
  150. // for while op
  151. if (force_infer_shape_ && !node_item.is_dynamic) {
  152. GELOGD("[%s] Force infer shape is set, updating node to dynamic.", node_item.NodeName().c_str());
  153. auto &mutable_node_item = const_cast<NodeItem &>(node_item);
  154. mutable_node_item.SetToDynamic();
  155. }
  156. GELOGD("[%s] Start to prepare node [%s].", graph_item_->GetName().c_str(), node_item.NodeName().c_str());
  157. auto node_state = subgraph_context_->GetOrCreateNodeState(&node_item);
  158. GE_CHECK_NOTNULL(node_state);
  159. auto p_node_state = node_state.get();
  160. if (node_item.node_type == NETOUTPUT) {
  161. // Wait for all inputs become valid
  162. // after PrepareNodes returned. all output tensors and shapes are valid
  163. GE_CHK_STATUS_RET_NOLOG(p_node_state->GetShapeInferenceState().AwaitShapesReady(*context_));
  164. GE_CHK_STATUS_RET_NOLOG(p_node_state->AwaitInputTensors(*context_));
  165. continue;
  166. }
  167. // only do shape inference and compilation for nodes with dynamic shapes.
  168. if (node_item.is_dynamic) {
  169. auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status {
  170. GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state));
  171. return PrepareForExecution(context_, *p_node_state);
  172. });
  173. p_node_state->SetPrepareFuture(std::move(prepare_future));
  174. } else {
  175. GELOGD("[%s] Skipping shape inference and compilation for node with static shape.", node_item.NodeName().c_str());
  176. if (node_item.kernel_task == nullptr) {
  177. GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
  178. GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_), "[%s] Failed to create task.",
  179. p_node_state->GetName().c_str());
  180. } else {
  181. node_state->SetKernelTask(node_item.kernel_task);
  182. }
  183. }
  184. if (!ready_queue_.Push(p_node_state)) {
  185. GELOGE(INTERNAL_ERROR, "[%s] Error occurs while launching tasks. quit from preparing nodes.",
  186. graph_item_->GetName().c_str());
  187. return INTERNAL_ERROR;
  188. }
  189. GELOGD("[%s] Push node [%s] to queue.", graph_item_->GetName().c_str(), node_item.NodeName().c_str());
  190. }
  191. return SUCCESS;
  192. }
  193. Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) {
  194. const auto &node_item = *node_state.GetNodeItem();
  195. GE_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state), "[%s] Failed to InferShape.",
  196. node_state.GetName().c_str());
  197. GE_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_item), "[%s] Failed to PropagateOutputShapes.",
  198. node_state.GetName().c_str());
  199. return SUCCESS;
  200. }
  201. Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state) {
  202. auto &node_item = *node_state.GetNodeItem();
  203. if (node_item.kernel_task == nullptr) {
  204. GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx), "Failed to create task for node[%s]",
  205. node_state.GetName().c_str());
  206. } else {
  207. node_state.SetKernelTask(node_item.kernel_task);
  208. }
  209. GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  210. RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
  211. GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
  212. "[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
  213. RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
  214. GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
  215. return SUCCESS;
  216. }
  217. Status SubgraphExecutor::LaunchTasks() {
  218. while (true) {
  219. NodeState *node_state = nullptr;
  220. if (!ready_queue_.Pop(node_state)) {
  221. GELOGE(INTERNAL_ERROR, "[%s] Failed to pop node.", graph_item_->GetName().c_str());
  222. return INTERNAL_ERROR;
  223. }
  224. if (node_state == nullptr) {
  225. GELOGD("[%s] Got EOF from queue.", graph_item_->GetName().c_str());
  226. return SUCCESS;
  227. }
  228. GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone());
  229. GELOGD("[%s] Start to execute.", node_state->GetName().c_str());
  230. auto task_context = TaskContext::Create(*node_state->GetNodeItem(), context_, subgraph_context_.get());
  231. GE_CHECK_NOTNULL(task_context);
  232. task_context->SetForceInferShape(force_infer_shape_);
  233. auto shared_task_context = std::shared_ptr<TaskContext>(task_context.release());
  234. GE_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_),
  235. "[%s] Execute node failed.", node_state->GetName().c_str());
  236. GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
  237. }
  238. }
  239. Status SubgraphExecutor::ScheduleTasks() {
  240. GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str());
  241. auto prepare_future = std::async([&]() -> Status {
  242. auto ret = PrepareNodes();
  243. ready_queue_.Push(nullptr);
  244. return ret;
  245. });
  246. GELOGD("[%s] Start to execute subgraph.", graph_item_->GetName().c_str());
  247. auto ret = LaunchTasks();
  248. if (ret != SUCCESS) {
  249. GELOGE(ret, "[%s] Failed to execute subgraph.", graph_item_->GetName().c_str());
  250. subgraph_context_->OnError(ret);
  251. context_->SetErrorCode(ret);
  252. ready_queue_.Stop();
  253. prepare_future.wait();
  254. return ret;
  255. }
  256. GE_CHK_STATUS_RET(prepare_future.get(), "[%s] Error occurred in task preparation.", graph_item_->GetName().c_str());
  257. GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str());
  258. return SUCCESS;
  259. }
  260. Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs) { return subgraph_context_->GetOutputs(outputs); }
  261. Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc) {
  262. GE_CHK_STATUS_RET(GetOutputs(outputs), "[%s] Failed to get output tensors.", graph_item_->GetName().c_str());
  263. // copy output data from op to designated position
  264. GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc), "[%s] Failed to get output tensor desc.",
  265. graph_item_->GetName().c_str());
  266. if (outputs.size() != output_desc.size()) {
  267. GELOGE(INTERNAL_ERROR, "Number of output tensors(%zu) mismatch number of output tensor desc(%zu).", outputs.size(),
  268. output_desc.size());
  269. return INTERNAL_ERROR;
  270. }
  271. return SUCCESS;
  272. }
  273. Status SubgraphExecutor::Synchronize() {
  274. GELOGD("[%s] Synchronize start.", graph_item_->GetName().c_str());
  275. GE_CHK_RT_RET(rtStreamSynchronize(context_->stream));
  276. GELOGD("[%s] Done synchronizing successfully.", graph_item_->GetName().c_str());
  277. return SUCCESS;
  278. }
  279. Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {
  280. // get output tensors and tensor desc list
  281. std::vector<TensorValue> outputs;
  282. std::vector<ConstGeTensorDescPtr> output_desc_list;
  283. GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), "[%s] Failed to get output tensors.",
  284. graph_item_->GetName().c_str());
  285. GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list), "[%s] Failed to get output tensor desc.",
  286. graph_item_->GetName().c_str());
  287. if (outputs.size() != output_desc_list.size()) {
  288. GELOGE(INTERNAL_ERROR, "[%s] num output tensors = %zu, num output tensor desc = %zu",
  289. graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
  290. return INTERNAL_ERROR;
  291. }
  292. // mapping to parent task context
  293. for (size_t i = 0; i < outputs.size(); ++i) {
  294. int parent_output_index = graph_item_->GetParentOutputIndex(i);
  295. GE_CHECK_GE(parent_output_index, 0);
  296. // update tensor
  297. GELOGD("[%s] Updating output[%zu] to parent output[%d]", graph_item_->GetName().c_str(), i, parent_output_index);
  298. GELOGD("[%s] Updating output tensor, index = %d, tensor = %s", graph_item_->GetName().c_str(), parent_output_index,
  299. outputs[i].DebugString().c_str());
  300. GE_CHK_STATUS_RET(task_context.SetOutput(parent_output_index, outputs[i]));
  301. // updating shapes. dynamic format/dtype is not supported.
  302. // It should be noted that even the subgraph is of known shape, it is also necessary to update parent output desc,
  303. // for instance, IfOp may have two known-shaped subgraphs of different output shapes
  304. const auto &output_desc = output_desc_list[i];
  305. auto parent_output_desc = task_context.MutableOutputDesc(parent_output_index);
  306. GE_CHECK_NOTNULL(parent_output_desc);
  307. GELOGD("[%s] Updating output shape[%d] from [%s] to [%s]", graph_item_->GetName().c_str(), parent_output_index,
  308. parent_output_desc->MutableShape().ToString().c_str(), output_desc->GetShape().ToString().c_str());
  309. parent_output_desc->SetShape(output_desc->GetShape());
  310. GELOGD("[%s] Updating output original shape[%d] from [%s] to [%s]", graph_item_->GetName().c_str(),
  311. parent_output_index, parent_output_desc->GetOriginShape().ToString().c_str(),
  312. output_desc->GetOriginShape().ToString().c_str());
  313. parent_output_desc->SetOriginShape(output_desc->GetOriginShape());
  314. }
  315. return SUCCESS;
  316. }
  317. } // namespace hybrid
  318. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示