You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

single_op_model.cc 25 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "single_op/single_op_model.h"
  17. #include <atomic>
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include "framework/common/debug/ge_log.h"
  22. #include "graph/debug/ge_attr_define.h"
  23. #include "graph/load/model_manager/model_utils.h"
  24. #include "graph/utils/attr_utils.h"
  25. #include "graph/utils/graph_utils.h"
  26. #include "graph/utils/tensor_utils.h"
  27. #include "runtime/rt.h"
  28. #include "task/aicpu_task_builder.h"
  29. #include "task/aicpu_kernel_task_builder.h"
  30. #include "task/tbe_task_builder.h"
  31. #include "hybrid/executor/hybrid_model_executor.h"
  32. #include "hybrid/node_executor/node_executor.h"
  33. static std::atomic<std::uint64_t> aicpu_kernel_id(0);
  34. using domi::TaskDef;
  35. using std::unique_ptr;
  36. using std::vector;
  37. namespace ge {
  38. namespace {
  39. const size_t kDataOutputNum = 1;
  40. Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
  41. auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
  42. GE_CHECK_NOTNULL(comp_graph);
  43. for (const auto &node : comp_graph->GetAllNodes()) {
  44. auto op_desc = node->GetOpDesc();
  45. GE_CHECK_NOTNULL(op_desc);
  46. const auto &depends = op_desc->GetOpInferDepends();
  47. if (!depends.empty()) {
  48. flag = true;
  49. return SUCCESS;
  50. }
  51. }
  52. return SUCCESS;
  53. }
  54. Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) {
  55. bool infer_depend_flag = false;
  56. GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed.");
  57. auto tasks = ge_model->GetModelTaskDefPtr()->task();
  58. int32_t kernel_task_num = 0;
  59. for (int i = 0; i < tasks.size(); ++i) {
  60. auto task_type = static_cast<rtModelTaskType_t>(tasks[i].type());
  61. if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
  62. const auto &context = task_type == RT_MODEL_TASK_KERNEL ? tasks[i].kernel().context() :
  63. tasks[i].kernel_with_handle().context();
  64. auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  65. if (kernel_type == ccKernelType::TE) {
  66. if (infer_depend_flag) {
  67. flag = true;
  68. return SUCCESS;
  69. }
  70. kernel_task_num++;
  71. if (kernel_task_num > 1) {
  72. flag = true;
  73. return SUCCESS;
  74. }
  75. }
  76. }
  77. }
  78. return SUCCESS;
  79. }
  80. } // namespace
  81. SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size)
  82. : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {}
  83. Status SingleOpModel::Init() {
  84. GE_CHK_STATUS_RET_NOLOG(InitModel());
  85. return LoadAllNodes();
  86. }
  87. Status SingleOpModel::InitModel() {
  88. ge::ModelData model;
  89. model.model_len = ori_model_size_;
  90. model.model_data = const_cast<void *>(ori_model_data_);
  91. auto ret = model_helper_.LoadModel(model);
  92. if (ret != SUCCESS) {
  93. GELOGE(ret, "[Load][Model] failed.");
  94. REPORT_CALL_ERROR("E19999", "InitModel fail for ModelHelper LoadModel failed.");
  95. return ret;
  96. }
  97. return SUCCESS;
  98. }
  99. void SingleOpModel::ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param) {
  100. int64_t value = 0;
  101. bool ret = false;
  102. std::shared_ptr<ge::GeModel> model = model_helper.GetGeModel();
  103. GE_CHECK_NOTNULL_JUST_RETURN(model);
  104. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_MEMORY_SIZE, value);
  105. param.memory_size = ret ? static_cast<uint64_t>(value) : 0;
  106. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, value);
  107. param.zero_copy_mem_size = ret ? static_cast<uint64_t>(value) : 0;
  108. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_WEIGHT_SIZE, value);
  109. param.weight_size = ret ? static_cast<uint64_t>(value) : 0;
  110. ret = ge::AttrUtils::GetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, value);
  111. param.base_addr = ret ? static_cast<uint64_t>(value) : 0;
  112. ret = ge::AttrUtils::GetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, value);
  113. param.weight_addr = ret ? static_cast<uint64_t>(value) : 0;
  114. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value);
  115. param.core_type = ret ? value : 0;
  116. GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu. core_type = %lu",
  117. param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type);
  118. }
  119. Status SingleOpModel::InitModelMem(StreamResource &res) {
  120. ParseOpModelParams(model_helper_, model_params_);
  121. if (model_params_.memory_size > model_params_.zero_copy_mem_size) {
  122. const string purpose("malloc feature map memory on model execute.");
  123. GELOGI("total memory: %lu, zero_copy_mem: %lu", model_params_.memory_size, model_params_.zero_copy_mem_size);
  124. model_params_.mem_base =
  125. res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size, false);
  126. if (model_params_.mem_base == nullptr) {
  127. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  128. }
  129. }
  130. if (model_params_.weight_size > 0 && has_weight_) {
  131. const string purpose("malloc weights memory on model execute.");
  132. model_params_.weight_base = res.MallocWeight(purpose, model_params_.weight_size);
  133. if (model_params_.weight_base == nullptr) {
  134. // no need to free memory, for that was handled by StreamResources
  135. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  136. }
  137. auto weight_buffer = model_helper_.GetGeModel()->GetWeight();
  138. GELOGI("To copy weight to device. weight size = %zu", weight_buffer.GetSize());
  139. GE_CHK_RT_RET(rtMemcpy(model_params_.weight_base,
  140. model_params_.weight_size,
  141. weight_buffer.GetData(),
  142. weight_buffer.GetSize(),
  143. RT_MEMCPY_HOST_TO_DEVICE));
  144. }
  145. return SUCCESS;
  146. }
  147. Status SingleOpModel::ParseInputNode(const OpDescPtr &op_desc) {
  148. vector<int64_t> offsets = op_desc->GetOutputOffset();
  149. if (offsets.size() != kDataOutputNum) {
  150. GELOGE(ACL_ERROR_GE_PARAM_INVALID,
  151. "[Parse][InputNode]Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
  152. op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
  153. REPORT_INNER_ERROR("E19999",
  154. "ParseInputNode fail for Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
  155. op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
  156. return ACL_ERROR_GE_PARAM_INVALID;
  157. }
  158. auto output_desc = op_desc->GetOutputDescPtr(0);
  159. GE_CHECK_NOTNULL(output_desc);
  160. int64_t tensor_size = 0;
  161. (void)TensorUtils::GetSize(*output_desc, tensor_size);
  162. input_offset_list_.emplace_back(offsets[0]);
  163. input_sizes_.emplace_back(tensor_size);
  164. GELOGI("[%s] parse input node: %s, size = %ld, offset = %u", model_name_.c_str(), op_desc->GetName().c_str(),
  165. tensor_size, static_cast<uint32_t>(offsets[0]));
  166. return SUCCESS;
  167. }
  168. void SingleOpModel::ParseOutputNode(const OpDescPtr &op_desc) {
  169. vector<int64_t> offsets = op_desc->GetInputOffset();
  170. for (uint32_t k = 0; k < static_cast<uint32_t>(offsets.size()); ++k) {
  171. auto input_desc = op_desc->GetInputDescPtr(k);
  172. if (input_desc == nullptr) {
  173. continue;
  174. }
  175. int64_t tensor_size = 0;
  176. (void)TensorUtils::GetSize(*input_desc, tensor_size);
  177. output_offset_list_.emplace_back(offsets[k]);
  178. output_sizes_.emplace_back(tensor_size);
  179. GELOGI("[%s] parse output node: %s, size = %ld, offset = %u", model_name_.c_str(), op_desc->GetName().c_str(),
  180. tensor_size, static_cast<uint32_t>(offsets[k]));
  181. }
  182. }
  183. Status SingleOpModel::LoadAllNodes() {
  184. auto ge_model = model_helper_.GetGeModel();
  185. GE_CHECK_NOTNULL(ge_model);
  186. Graph graph = ge_model->GetGraph();
  187. model_id_ = ge_model->GetModelId();
  188. auto compute_graph = GraphUtils::GetComputeGraph(graph);
  189. if (compute_graph == nullptr) {
  190. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][ComputeGraph] fail, model_name:%s.", model_name_.c_str());
  191. REPORT_CALL_ERROR("E19999", "LoadAllNodes fail for GetComputeGraph return nullptr, model_name:%s.",
  192. model_name_.c_str());
  193. return ACL_ERROR_GE_INTERNAL_ERROR;
  194. }
  195. auto nodes = compute_graph->GetDirectNode();
  196. size_t model_op_size = nodes.size();
  197. GELOGI("[%s] node size = %zu", model_name_.c_str(), model_op_size);
  198. for (size_t i = 0; i < model_op_size; ++i) {
  199. auto node = nodes.at(i);
  200. auto op_desc = node->GetOpDesc();
  201. GE_CHECK_NOTNULL(op_desc);
  202. op_list_[op_desc->GetId()] = node;
  203. auto op_type = op_desc->GetType();
  204. GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str());
  205. if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) {
  206. data_ops_.emplace_back(op_desc);
  207. continue;
  208. }
  209. if (op_type == CONSTANT || op_type == CONSTANTOP) {
  210. has_weight_ = true;
  211. continue;
  212. }
  213. if (op_type == NETOUTPUT) {
  214. netoutput_op_ = op_desc;
  215. continue;
  216. }
  217. ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc);
  218. ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc);
  219. }
  220. return SUCCESS;
  221. }
  222. Status SingleOpModel::ParseInputsAndOutputs() {
  223. for (auto &op_desc : data_ops_) {
  224. GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc));
  225. }
  226. ParseOutputNode(netoutput_op_);
  227. return SUCCESS;
  228. }
  229. Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) {
  230. int arg_index = 0;
  231. for (size_t i = 0; i < input_offset_list_.size(); ++i) {
  232. auto *addr = model_params_.mem_base + input_offset_list_[i];
  233. model_params_.addr_mapping_.emplace(reinterpret_cast<uintptr_t>(addr), arg_index++);
  234. single_op.input_sizes_.emplace_back(input_sizes_[i]);
  235. single_op.input_addr_list_.emplace_back(addr);
  236. }
  237. for (size_t i = 0; i < output_offset_list_.size(); ++i) {
  238. auto *addr = model_params_.mem_base + output_offset_list_[i];
  239. model_params_.addr_mapping_.emplace(reinterpret_cast<uintptr_t>(addr), arg_index++);
  240. single_op.output_sizes_.emplace_back(output_sizes_[i]);
  241. single_op.output_addr_list_.emplace_back(addr);
  242. }
  243. single_op.args_.resize(arg_index);
  244. return SUCCESS;
  245. }
  246. Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &single_op) {
  247. auto ge_model = model_helper_.GetGeModel();
  248. GE_CHECK_NOTNULL(ge_model);
  249. single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
  250. auto tasks = ge_model->GetModelTaskDefPtr()->task();
  251. for (int i = 0; i < tasks.size(); ++i) {
  252. const TaskDef &task_def = tasks[i];
  253. GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
  254. task_def.DebugString().c_str());
  255. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  256. if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
  257. const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
  258. task_def.kernel_with_handle().context();
  259. auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  260. if (kernel_type == ccKernelType::TE) {
  261. GELOGD("Building TBE task");
  262. TbeOpTask *tbe_task = nullptr;
  263. auto ret = BuildKernelTask(task_def, &tbe_task);
  264. if (ret != SUCCESS) {
  265. return ret;
  266. }
  267. ParseArgTable(tbe_task, single_op);
  268. tbe_task->SetModelArgs(model_name_, model_id_);
  269. if (tbe_task->tiling_buffer_ != nullptr) {
  270. tbe_task->stream_resource_ = stream_resource;
  271. }
  272. single_op.tasks_.emplace_back(tbe_task);
  273. } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
  274. GELOGD("Building AICPU_CC task");
  275. OpTask *task = nullptr;
  276. uint64_t singleop_kernel_id = aicpu_kernel_id++;
  277. GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id);
  278. auto ret = BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id);
  279. if (ret != SUCCESS) {
  280. return ret;
  281. }
  282. task->SetModelArgs(model_name_, model_id_);
  283. ParseArgTable(task, single_op);
  284. single_op.tasks_.emplace_back(task);
  285. } else {
  286. GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
  287. "[Check][KernelType]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u",
  288. context.kernel_type());
  289. REPORT_INNER_ERROR("E19999",
  290. "BuildTaskList fail for %u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.",
  291. context.kernel_type());
  292. return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID;
  293. }
  294. } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
  295. GELOGD("Building AICPU_TF task");
  296. AiCpuTask *aicpu_task = nullptr;
  297. bool depend_compute_flag = false;
  298. uint64_t singleop_kernel_id = aicpu_kernel_id++;
  299. GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id);
  300. auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id);
  301. if (ret != SUCCESS) {
  302. return ret;
  303. }
  304. aicpu_task->SetModelArgs(model_name_, model_id_);
  305. ParseArgTable(aicpu_task, single_op);
  306. single_op.tasks_.emplace_back(aicpu_task);
  307. } else {
  308. // skip
  309. GELOGD("Skip task type: %d", static_cast<int>(task_type));
  310. }
  311. }
  312. return SUCCESS;
  313. }
  314. void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) {
  315. if (task == nullptr) {
  316. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Parse][ArgTable] fail for input OpTask is nullptr.");
  317. REPORT_INNER_ERROR("E19999", "ParseArgTable fail for input OpTask is nullptr.");
  318. return;
  319. }
  320. // args: addr1, addr2, addr3 ...
  321. uintptr_t *arg_base = nullptr;
  322. size_t arg_num = 0;
  323. task->GetIoAddr(arg_base, arg_num);
  324. for (size_t i = 0; i < arg_num; ++i) {
  325. uintptr_t *ptr_to_addr = arg_base + i;
  326. uintptr_t addr = *ptr_to_addr;
  327. auto iter = model_params_.addr_mapping_.find(addr);
  328. if (iter != model_params_.addr_mapping_.end()) {
  329. int arg_index = iter->second;
  330. GELOGI("%s args[%zu] mapped to user designated args[%d]", task->GetOpdesc()->GetName().c_str(), i, arg_index);
  331. op.arg_table_[iter->second].emplace_back(ptr_to_addr);
  332. }
  333. }
  334. }
  335. Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) {
  336. GE_CHECK_NOTNULL(task);
  337. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  338. const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
  339. task_def.kernel_with_handle().context();
  340. auto iter = op_list_.find(context.op_index());
  341. if (iter == op_list_.end()) {
  342. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Param:TaskDef]op desc not found. op index = %u", context.op_index());
  343. REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for op desc not found. op index = %u", context.op_index());
  344. return ACL_ERROR_GE_INTERNAL_ERROR;
  345. }
  346. auto *tbe_task = new (std::nothrow) TbeOpTask();
  347. if (tbe_task == nullptr) {
  348. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][TbeOpTask]failed.");
  349. REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new TbeOpTask.");
  350. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  351. }
  352. auto builder = TbeTaskBuilder(model_name_, iter->second, task_def);
  353. auto ret = builder.BuildTask(*tbe_task, model_params_);
  354. if (ret != SUCCESS) {
  355. delete tbe_task;
  356. tbe_task = nullptr;
  357. return ret;
  358. }
  359. *task = tbe_task;
  360. return SUCCESS;
  361. }
  362. Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
  363. bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) {
  364. auto iter = op_list_.find(kernel_def.op_index());
  365. if (iter == op_list_.end()) {
  366. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
  367. "[Check][Param:KernelExDef]op not found. op index = %u", kernel_def.op_index());
  368. REPORT_INNER_ERROR("E19999",
  369. "BuildKernelExTask fail for param kernel_def, because op of kernel_def not found, op index:%u.",
  370. kernel_def.op_index());
  371. return ACL_ERROR_GE_INTERNAL_ERROR;
  372. }
  373. std::unique_ptr<AiCpuTask> aicpu_task(new (std::nothrow) AiCpuTask());
  374. if (aicpu_task == nullptr) {
  375. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AiCpuTask] failed.");
  376. REPORT_INNER_ERROR("E19999", "BuildKernelExTask fail for new AiCpuTask, model_name:%s.", model_name_.c_str());
  377. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  378. }
  379. auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def);
  380. auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id);
  381. if (ret != SUCCESS) {
  382. GELOGE(ret, "[Build][Task] failed, kernel_id:%lu.", kernel_id);
  383. return ret;
  384. }
  385. depend_compute_flag = (aicpu_task->GetUnknownType() == DEPEND_COMPUTE);
  386. *task = aicpu_task.release();
  387. return SUCCESS;
  388. }
  389. Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id) {
  390. const auto &context = kernel_def.context();
  391. auto iter = op_list_.find(context.op_index());
  392. if (iter == op_list_.end()) {
  393. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
  394. "[Check][Param:KernelDef] op desc not found. op index = %u", context.op_index());
  395. REPORT_INNER_ERROR("E19999",
  396. "BuildCpuKernelTask fail for kernel_def is invalid, because op of kernel_def not found, op index:%u.",
  397. context.op_index());
  398. return ACL_ERROR_GE_INTERNAL_ERROR;
  399. }
  400. std::unique_ptr<AiCpuCCTask> aicpucc_task(new (std::nothrow) AiCpuCCTask());
  401. if (aicpucc_task == nullptr) {
  402. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AiCpuCCTask] failed");
  403. REPORT_INNER_ERROR("E19999", "BuildCpuKernelTask fail for new AiCpuCCTask, model_name:%s.", model_name_.c_str());
  404. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  405. }
  406. auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def);
  407. auto ret = builder.BuildTask(*aicpucc_task, kernel_id, model_params_);
  408. if (ret != SUCCESS) {
  409. GELOGE(ret, "[Build][AiCpuCCTask]failed, kernel_id:%lu.", kernel_id);
  410. REPORT_CALL_ERROR("E19999", "BuildCpuKernelTask fail for build AiCpuTask, kernel_id:%lu.", kernel_id);
  411. return ret;
  412. }
  413. *task = aicpucc_task.release();
  414. return SUCCESS;
  415. }
  416. Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
  417. GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs());
  418. GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
  419. single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_));
  420. GE_CHECK_NOTNULL(single_op.running_param_);
  421. GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op));
  422. return BuildTaskList(&resource, single_op);
  423. }
  424. Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
  425. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  426. const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
  427. task_def.kernel_with_handle().context();
  428. auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  429. if (kernel_type == ccKernelType::TE) {
  430. GELOGD("Building TBE task");
  431. TbeOpTask *tbe_task = nullptr;
  432. GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
  433. tbe_task->SetModelArgs(model_name_, model_id_);
  434. single_op.op_task_.reset(tbe_task);
  435. } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
  436. GELOGD("Building AICPU_CC task");
  437. OpTask *task = nullptr;
  438. uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
  439. GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id);
  440. GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id));
  441. task->SetModelArgs(model_name_, model_id_);
  442. single_op.op_task_.reset(task);
  443. } else {
  444. GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
  445. "[Check][Param:TaskDef]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u",
  446. context.kernel_type());
  447. REPORT_INNER_ERROR("E19999",
  448. "BuildModelTaskKernel fail for got:%u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.",
  449. context.kernel_type());
  450. return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID;
  451. }
  452. return SUCCESS;
  453. }
  454. Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
  455. auto ge_model = model_helper_.GetGeModel();
  456. GE_CHECK_NOTNULL(ge_model);
  457. auto tasks = ge_model->GetModelTaskDefPtr()->task();
  458. for (int i = 0; i < tasks.size(); ++i) {
  459. const TaskDef &task_def = tasks[i];
  460. GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
  461. task_def.DebugString().c_str());
  462. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  463. if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
  464. if (single_op.op_task_ != nullptr) {
  465. GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
  466. REPORT_INNER_ERROR("E19999",
  467. "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
  468. return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
  469. }
  470. GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
  471. } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
  472. if (single_op.op_task_ != nullptr) {
  473. GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
  474. REPORT_INNER_ERROR("E19999",
  475. "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
  476. return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
  477. }
  478. GELOGD("Building AICPU_TF task");
  479. AiCpuTask *aicpu_task = nullptr;
  480. bool depend_compute_flag = false;
  481. uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
  482. GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id);
  483. GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true,
  484. depend_compute_flag, dynamic_singleop_kernel_id));
  485. if (depend_compute_flag) {
  486. if (i >= tasks.size() - 1) {
  487. GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found.");
  488. REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found.");
  489. return ACL_ERROR_GE_PARAM_INVALID;
  490. }
  491. ++i;
  492. const TaskDef &copy_task_def = tasks[i];
  493. GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex()));
  494. }
  495. aicpu_task->SetModelArgs(model_name_, model_id_);
  496. single_op.op_task_.reset(aicpu_task);
  497. } else {
  498. // skip
  499. GELOGD("Skip task type: %d", static_cast<int>(task_type));
  500. }
  501. }
  502. return SUCCESS;
  503. }
  504. Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &single_op) {
  505. single_op.num_inputs_ = data_ops_.size();
  506. single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
  507. GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
  508. model_params_.memory_size = UINT_MAX;
  509. auto ge_model = model_helper_.GetGeModel();
  510. GE_CHECK_NOTNULL(ge_model);
  511. bool need_hybrid_model = false;
  512. GE_CHK_STATUS_RET(NeedHybridModel(ge_model, need_hybrid_model), "[Check][NeedHybridModel] failed.");
  513. if (need_hybrid_model) {
  514. GELOGD("Build single op HybridModel.");
  515. GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
  516. auto root_model = model_helper_.GetGeRootModel();
  517. GE_CHECK_NOTNULL(root_model);
  518. root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
  519. root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
  520. single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
  521. GE_CHECK_NOTNULL(single_op.hybrid_model_);
  522. GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed.");
  523. int32_t device_id = 0;
  524. GE_CHK_RT_RET(rtGetDevice(&device_id));
  525. single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
  526. device_id,
  527. resource.GetStream()));
  528. GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
  529. GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
  530. return SUCCESS;
  531. }
  532. return BuildTaskListForDynamicOp(single_op);
  533. }
  534. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示