You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

task_context.cc 14 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "task_context.h"
  17. #include "framework/common/ge_inner_error_codes.h"
  18. #include "framework/common/debug/log.h"
  19. #include "graph/utils/tensor_utils.h"
  20. #include "graph/debug/ge_attr_define.h"
  21. #include "hybrid/executor/hybrid_execution_context.h"
  22. #include "hybrid/executor/subgraph_executor.h"
  23. namespace ge {
  24. namespace hybrid {
  25. TaskContext::TaskContext(GraphExecutionContext *execution_context, const NodeItem *node_item,
  26. SubgraphContext *subgraph_context)
  27. : node_item_(node_item), execution_context_(execution_context), subgraph_context_(subgraph_context) {}
  28. TaskContext::~TaskContext() {
  29. GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str());
  30. for (auto ws_addr : workspaces_) {
  31. execution_context_->allocator->Deallocate(ws_addr);
  32. }
  33. // release output
  34. for (int i = 0; i < NumOutputs(); ++i) {
  35. auto output_tensor = MutableOutput(i);
  36. if (output_tensor != nullptr) {
  37. output_tensor->Destroy();
  38. }
  39. }
  40. }
  41. std::unique_ptr<TaskContext> TaskContext::Create(const NodeItem &node_item, GraphExecutionContext *execution_context,
  42. SubgraphContext *subgraph_context) {
  43. GELOGI("[%s] To create task context, input start = %d, num_inputs = %d, output start = %d, num_outputs = %d.",
  44. node_item.NodeName().c_str(), node_item.input_start, node_item.num_inputs, node_item.output_start,
  45. node_item.num_outputs);
  46. if (node_item.input_start < 0 || node_item.output_start < 0) {
  47. GELOGE(INTERNAL_ERROR, "NodeItem not property initialized. input_start = %d, output_start = %d",
  48. node_item.input_start, node_item.output_start);
  49. return nullptr;
  50. }
  51. auto task_context =
  52. std::unique_ptr<TaskContext>(new (std::nothrow) TaskContext(execution_context, &node_item, subgraph_context));
  53. if (task_context == nullptr) {
  54. GELOGE(MEMALLOC_FAILED, "[%s] Failed to create instance of TaskContext.", node_item.NodeName().c_str());
  55. return nullptr;
  56. }
  57. task_context->node_item_ = &node_item;
  58. task_context->inputs_start_ = subgraph_context->all_inputs_.data() + node_item.input_start;
  59. task_context->outputs_start_ = subgraph_context->all_outputs_.data() + node_item.output_start;
  60. task_context->iteration_ = execution_context->iteration;
  61. return task_context;
  62. }
  63. int TaskContext::NumInputs() const { return node_item_->num_inputs; }
  64. int TaskContext::NumOutputs() const { return node_item_->num_outputs; }
  65. TensorValue *TaskContext::MutableInput(int index) {
  66. if (index < 0 || index >= node_item_->num_inputs) {
  67. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_inputs = %d", index, node_item_->num_inputs);
  68. return nullptr;
  69. }
  70. return inputs_start_ + index;
  71. }
  72. const TensorValue *TaskContext::GetOutput(int index) const {
  73. if (index < 0 || index >= node_item_->num_outputs) {
  74. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_outputs = %d", index, node_item_->num_outputs);
  75. return nullptr;
  76. }
  77. return outputs_start_ + index;
  78. }
  79. TensorValue *TaskContext::MutableOutput(int index) {
  80. if (index < 0 || index >= node_item_->num_outputs) {
  81. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_outputs = %d", index, node_item_->num_outputs);
  82. return nullptr;
  83. }
  84. return outputs_start_ + index;
  85. }
  86. std::size_t TaskContext::NumWorkspaces() const { return workspaces_.size(); }
  87. void *TaskContext::MutableWorkspace(int index) {
  88. if (index < 0 || static_cast<size_t>(index) >= workspaces_.size()) {
  89. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_workspaces = %d", index, node_item_->num_outputs);
  90. return nullptr;
  91. }
  92. return workspaces_[index];
  93. }
  94. const TensorValue *TaskContext::GetInput(int index) const {
  95. if (index < 0 || index >= node_item_->num_inputs) {
  96. GELOGE(PARAM_INVALID, "Index out of range. index = %d, num_inputs = %d", index, node_item_->num_inputs);
  97. return nullptr;
  98. }
  99. return inputs_start_ + index;
  100. }
  101. Status TaskContext::AllocateWorkspaces() {
  102. auto workspace_sizes = node_item_->node->GetOpDesc()->GetWorkspaceBytes();
  103. for (auto size : workspace_sizes) {
  104. void *workspace = execution_context_->allocator->Allocate(size);
  105. if (workspace == nullptr) {
  106. GELOGE(MEMALLOC_FAILED, "Failed to allocate workspace of size: %ld", size);
  107. return MEMALLOC_FAILED;
  108. }
  109. workspaces_.emplace_back(workspace);
  110. }
  111. return SUCCESS;
  112. }
  113. Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
  114. auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
  115. if (ret != SUCCESS) {
  116. GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
  117. execution_context_->callback_manager->Destroy();
  118. return ret;
  119. }
  120. return SUCCESS;
  121. }
  122. string TaskContext::TensorDesc2String(const GeTensorDesc &desc) {
  123. std::stringstream ss;
  124. ss << "[TensorDesc] ";
  125. ss << "DataType = " << desc.GetDataType();
  126. ss << ", Format = " << desc.GetFormat();
  127. ss << ", Shape = [";
  128. for (auto dim : desc.GetShape().GetDims()) {
  129. ss << dim << ", ";
  130. }
  131. ss << "]";
  132. return ss.str();
  133. }
  134. Status TaskContext::AllocateTensor(const GeTensorDesc &tensor_desc, TensorValue &tensor, AllocationAttr *attr) {
  135. int64_t size = 0;
  136. if (ge::TensorUtils::GetSize(tensor_desc, size) != GRAPH_SUCCESS) {
  137. GELOGE(INTERNAL_ERROR, "Failed to get tensor size");
  138. return INTERNAL_ERROR;
  139. }
  140. if (size == 0) {
  141. GELOGW("size from tensor_desc == 0");
  142. }
  143. auto buffer = TensorBuffer::Create(execution_context_->allocator, size, attr);
  144. GE_CHECK_NOTNULL(buffer);
  145. tensor = TensorValue(shared_ptr<TensorBuffer>(buffer.release()));
  146. return SUCCESS;
  147. }
  148. Status TaskContext::AllocateOutput(int index, const GeTensorDesc &tensor_desc, TensorValue **tensor,
  149. AllocationAttr *attr) {
  150. GELOGI("To allocate output for node: %s. index = %d, tensor desc = %s", node_item_->NodeName().c_str(), index,
  151. TensorDesc2String(tensor_desc).c_str());
  152. if (index < 0 || index >= node_item_->num_outputs) {
  153. GELOGE(PARAM_INVALID, "output index out of range. num_output = %d, index = %d", node_item_->num_outputs, index);
  154. return PARAM_INVALID;
  155. }
  156. if (outputs_start_[index].GetData() != nullptr) {
  157. GELOGI("already allocated as net output");
  158. return SUCCESS;
  159. }
  160. auto it = node_item_->ref_outputs.find(index);
  161. if (it != node_item_->ref_outputs.end()) {
  162. auto &ref_node = it->second;
  163. GELOGD("source node of %s:%d = %s, op_type = %s", node_item_->NodeName().c_str(), index,
  164. ref_node->GetName().c_str(), ref_node->GetType().c_str());
  165. TensorValue *ref_tensor = execution_context_->model->GetVariable(ref_node->GetName());
  166. GE_CHECK_NOTNULL(ref_tensor);
  167. outputs_start_[index] = *ref_tensor;
  168. } else {
  169. auto reuse_input = node_item_->reuse_inputs.find(index);
  170. if (reuse_input != node_item_->reuse_inputs.end()) {
  171. GELOGD("[%s] Output[%d] is referenced to input[%d]", GetNodeName(), index, reuse_input->second);
  172. outputs_start_[index] = inputs_start_[reuse_input->second];
  173. } else {
  174. GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr));
  175. GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu", node_item_->NodeName().c_str(), index,
  176. outputs_start_[index].GetSize());
  177. }
  178. }
  179. if (execution_context_->trace_enabled) {
  180. outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index));
  181. }
  182. if (tensor != nullptr) {
  183. *tensor = outputs_start_ + index;
  184. }
  185. return SUCCESS;
  186. }
  187. Status TaskContext::AllocateOutputs(AllocationAttr *attr) {
  188. for (int i = 0; i < node_item_->num_outputs; ++i) {
  189. const auto &output_desc = node_item_->op_desc->MutableOutputDesc(i);
  190. GE_CHECK_NOTNULL(output_desc);
  191. uint32_t mem_type = 0;
  192. (void)AttrUtils::GetInt(node_item_->op_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type);
  193. if (attr == nullptr) {
  194. auto tmp_attr = AllocationAttr(0, nullptr, static_cast<MemStorageType>(mem_type));
  195. GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, &tmp_attr));
  196. } else {
  197. attr->SetMemType(static_cast<MemStorageType>(mem_type));
  198. GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, attr));
  199. }
  200. }
  201. return SUCCESS;
  202. }
  203. Status TaskContext::AllocateTensor(size_t size, TensorValue &tensor, AllocationAttr *attr) {
  204. auto buffer = TensorBuffer::Create(execution_context_->allocator, size, attr);
  205. if (buffer == nullptr) {
  206. GELOGE(MEMALLOC_FAILED, "Failed to allocate buffer of size: %zu", size);
  207. return MEMALLOC_FAILED;
  208. }
  209. tensor = TensorValue(shared_ptr<TensorBuffer>(buffer.release()));
  210. return SUCCESS;
  211. }
  212. const NodeItem &TaskContext::GetNodeItem() const { return *node_item_; }
  213. Status TaskContext::SetOutput(int index, const TensorValue &tensor) {
  214. if (index < 0 || index >= node_item_->num_outputs) {
  215. GELOGE(PARAM_INVALID, "output index out of range. num_output = %d, index = %d", node_item_->num_outputs, index);
  216. return PARAM_INVALID;
  217. }
  218. GELOGD("Set %s:%d with tensor: %s", node_item_->NodeName().c_str(), index, tensor.DebugString().c_str());
  219. outputs_start_[index] = tensor;
  220. return SUCCESS;
  221. }
  222. rtStream_t TaskContext::GetStream() { return execution_context_->stream; }
  223. int64_t TaskContext::GetSessionId() const { return execution_context_->session_id; }
  224. Status TaskContext::GetStatus() const { return status_; }
  225. void TaskContext::SetStatus(Status status) {
  226. status_ = status;
  227. if (status != SUCCESS) {
  228. execution_context_->SetErrorCode(status);
  229. }
  230. }
  231. Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
  232. GE_CHECK_NOTNULL(buffer);
  233. if (ori_addr == nullptr) {
  234. *buffer = execution_context_->allocator->Allocate(size, nullptr);
  235. } else {
  236. AllocationAttr attr(ori_addr);
  237. *buffer = execution_context_->allocator->Allocate(size, &attr);
  238. }
  239. if (*buffer == nullptr) {
  240. GELOGE(MEMALLOC_FAILED, "Failed to allocate workspace of size = %zu", size);
  241. return MEMALLOC_FAILED;
  242. }
  243. GELOGD("Allocating workspace of size = %zu successfully", size);
  244. workspaces_.emplace_back(*buffer);
  245. return SUCCESS;
  246. }
  247. Status TaskContext::PropagateOutputs() {
  248. // propagate outputs
  249. for (int i = 0; i < NumOutputs(); ++i) {
  250. auto tensor = MutableOutput(i);
  251. GE_CHECK_NOTNULL(tensor);
  252. if (tensor->GetData() == nullptr) {
  253. GELOGD("[%s] Node output[%d] is null.", node_item_->NodeName().c_str(), i);
  254. }
  255. auto &output_nodes = node_item_->outputs[i];
  256. for (auto &dst_input_index_and_node : output_nodes) {
  257. auto dst_input_idx = dst_input_index_and_node.first;
  258. auto dst_node_item = dst_input_index_and_node.second;
  259. auto input_offset = dst_node_item->input_start + dst_input_idx;
  260. GELOGI(
  261. "Propagate output of node %s, output index = %d, dst node = %s, "
  262. "dst_input_index = %d, dst_input_offset = %d.",
  263. node_item_->NodeName().c_str(), i, dst_node_item->NodeName().c_str(), dst_input_idx, input_offset);
  264. if (subgraph_context_->all_inputs_.size() <= static_cast<size_t>(input_offset)) {
  265. GELOGE(INTERNAL_ERROR, "[%s] input index out of range. index = %d, total input num = %zu", GetNodeName(),
  266. input_offset, subgraph_context_->all_inputs_.size());
  267. return INTERNAL_ERROR;
  268. }
  269. subgraph_context_->all_inputs_[input_offset] = *tensor;
  270. if (execution_context_->trace_enabled) {
  271. subgraph_context_->all_inputs_[input_offset].SetName(node_item_->NodeName() + "_in_" +
  272. std::to_string(dst_input_idx));
  273. }
  274. }
  275. }
  276. return SUCCESS;
  277. }
  278. const void *TaskContext::GetVarBaseAddr() { return execution_context_->model->GetVarMemBase(); }
  279. const char *TaskContext::GetNodeName() const { return node_item_->NodeName().c_str(); }
  280. void TaskContext::ReleaseInput(int index) {
  281. auto input_tensor = MutableInput(index);
  282. if (input_tensor != nullptr) {
  283. input_tensor->Destroy();
  284. GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index);
  285. }
  286. }
  287. ConstGeTensorDescPtr TaskContext::GetOutputDesc(int index) {
  288. return node_item_->op_desc->MutableOutputDesc(static_cast<uint32_t>(index));
  289. }
  290. ConstGeTensorDescPtr TaskContext::GetInputDesc(int index) {
  291. return node_item_->op_desc->MutableInputDesc(static_cast<uint32_t>(index));
  292. }
  293. GeTensorDescPtr TaskContext::MutableInputDesc(int index) {
  294. return node_item_->op_desc->MutableInputDesc(static_cast<uint32_t>(index));
  295. }
  296. GeTensorDescPtr TaskContext::MutableOutputDesc(int index) {
  297. return node_item_->op_desc->MutableOutputDesc(static_cast<uint32_t>(index));
  298. }
  299. bool TaskContext::IsForceInferShape() const { return force_infer_shape_; }
  300. void TaskContext::SetForceInferShape(bool force_infer_shape) { force_infer_shape_ = force_infer_shape; }
  301. void TaskContext::NodeDone() { subgraph_context_->NodeDone(node_item_->node); }
  302. void TaskContext::OnError(Status error) {
  303. subgraph_context_->OnError(error);
  304. execution_context_->SetErrorCode(error);
  305. }
  306. bool TaskContext::IsTraceEnabled() const { return execution_context_->trace_enabled; }
  307. TensorValue *TaskContext::GetVariable(const std::string &name) { return execution_context_->model->GetVariable(name); }
  308. uint64_t TaskContext::GetIterationNumber() const { return iteration_; }
  309. bool TaskContext::IsDumpEnabled() const { return execution_context_->dump_enabled; }
  310. Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) const {
  311. if (!callback_fun) {
  312. return SUCCESS;
  313. }
  314. if (node_item_->has_observer) {
  315. return RegisterCallback(callback_fun);
  316. }
  317. callback_fun();
  318. return SUCCESS;
  319. }
  320. const DumpProperties &TaskContext::GetDumpProperties() const { return execution_context_->dump_properties; }
  321. } // namespace hybrid
  322. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示