You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu_queue_schedule.cc 21 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago

  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/model_manager/cpu_queue_schedule.h"
  17. #include "common/debug/ge_log.h"
  18. #include "common/debug/log.h"
  19. namespace {
  20. const uint32_t kCoreDim = 1; // for rtCpuKernelLaunch
  21. const char *const kCpuTaskModelEnqueue = "modelEnqueue";
  22. const char *const kCpuTaskWaitEndGraph = "modelWaitEndGraph";
  23. const char *const kCpuTaskPrepareOutput = "bufferPrepareOutput";
  24. const char *const kCpuTaskModelDequeue = "modelDequeue";
  25. const char *const kCpuTaskModelRepeat = "modelRepeat";
  26. const char *const kCpuTaskZeroCopy = "zeroCpy";
  27. } // namespace
  28. namespace ge {
  29. CpuTaskInfo::CpuTaskInfo(rtStream_t stream) : args_(nullptr), args_size_(0) { stream_ = stream; }
  30. CpuTaskInfo::~CpuTaskInfo() {
  31. if (args_ == nullptr) {
  32. return;
  33. }
  34. rtError_t status = rtFree(args_);
  35. if (status != RT_ERROR_NONE) {
  36. GELOGW("Call rt free failed, status: 0x%x", status);
  37. }
  38. args_ = nullptr;
  39. }
  40. ///
  41. /// @ingroup ge
  42. /// @brief definiteness queue schedule, bind input queue to task.
  43. /// @param [in] queue_id: input queue id from user.
  44. /// @param [out] in_mbuf: input mbuf addr for input data.
  45. /// @return: 0 for success / others for failed
  46. ///
  47. Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
  48. if ((args_ != nullptr) || (args_size_ > 0)) {
  49. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
  50. "check invalid when CpuTaskModelDequeue %s", args_size_, __FUNCTION__);
  51. GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
  52. return FAILED;
  53. }
  54. args_size_ = sizeof(MbufQueueInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save in_mbuf.
  55. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  56. if (status != RT_ERROR_NONE) {
  57. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskModelDequeue %s",
  58. args_size_, status, __FUNCTION__);
  59. GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
  60. return RT_ERROR_TO_GE_STATUS(status);
  61. }
  62. in_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(MbufQueueInfo);
  63. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  64. MbufQueueInfo queue_info;
  65. queue_info.queue_id = queue_id;
  66. queue_info.in_mbuf = in_mbuf; // Placeholder, input mbuf addr will save to this place.
  67. status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE);
  68. if (status != RT_ERROR_NONE) {
  69. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskModelDequeue %s",
  70. args_size_, status, __FUNCTION__);
  71. GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
  72. return RT_ERROR_TO_GE_STATUS(status);
  73. }
  74. return SUCCESS;
  75. }
  76. Status CpuTaskModelDequeue::Distribute() {
  77. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  78. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  79. "check invalid when CpuTaskModelDequeue %s", args_size_, __FUNCTION__);
  80. GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
  81. return FAILED;
  82. }
  83. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_);
  84. if (status != RT_ERROR_NONE) {
  85. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskModelDequeue %s",
  86. status, __FUNCTION__);
  87. GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status);
  88. return RT_ERROR_TO_GE_STATUS(status);
  89. }
  90. GELOGI("Cpu kernel launch model dequeue task success.");
  91. return SUCCESS;
  92. }
  93. ///
  94. /// @ingroup ge
  95. /// @brief definiteness queue schedule, zero copy.
  96. /// @param [in] mbuf_list: input/output mbuf addr list for input/output data.
  97. /// @param [in] outside_addrs: model input/output memory addr
  98. /// @return: 0 for success / others for failed
  99. ///
  100. Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
  101. if ((args_ != nullptr) || (args_size_ > 0)) {
  102. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
  103. "check invalid when CpuTaskZeroCopy %s", args_size_, __FUNCTION__);
  104. GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
  105. return FAILED;
  106. }
  107. args_size_ = sizeof(AddrMapInfo);
  108. GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM));
  109. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  110. AddrMapInfo addr_map_info;
  111. // init src_addrs/dst_addrs
  112. vector<uint64_t> src_addrs;
  113. vector<uint64_t> dst_addrs;
  114. for (const auto &addrs : outside_addrs) {
  115. const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
  116. GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
  117. std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
  118. for (const auto &virtual_args_addr : virtual_args_addrs) {
  119. addr_map_info.addr_num += virtual_args_addr.second.size();
  120. for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
  121. src_addrs.emplace_back(mbuf_list.at(addrs.first));
  122. dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
  123. }
  124. }
  125. }
  126. GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);
  127. // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
  128. GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
  129. rtError_t status = rtMemcpy(src_addr_, src_addrs.size() * sizeof(uint64_t), src_addrs.data(),
  130. src_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
  131. GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
  132. return RT_ERROR_TO_GE_STATUS(status);)
  133. GE_CHK_RT_RET(rtMalloc(&dst_addr_, dst_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));
  134. status = rtMemcpy(dst_addr_, dst_addrs.size() * sizeof(uint64_t), dst_addrs.data(),
  135. dst_addrs.size() * sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
  136. GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
  137. return RT_ERROR_TO_GE_STATUS(status);)
  138. // src_addr_list is init to src_addr, which is the point to src_addrs
  139. if (!src_addrs.empty() && !dst_addrs.empty()) {
  140. addr_map_info.src_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(src_addr_));
  141. addr_map_info.dst_addr_list = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(dst_addr_));
  142. GELOGI("src_addr_list is %lu, dst_addr_list is %lu", addr_map_info.src_addr_list, addr_map_info.dst_addr_list);
  143. }
  144. status = rtMemcpy(args_, args_size_, &addr_map_info, sizeof(AddrMapInfo), RT_MEMCPY_HOST_TO_DEVICE);
  145. GE_IF_BOOL_EXEC(status != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", status);
  146. return RT_ERROR_TO_GE_STATUS(status);)
  147. return SUCCESS;
  148. }
  149. Status CpuTaskZeroCopy::Distribute() {
  150. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  151. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  152. "check invalid when CpuTaskZeroCopy %s", args_size_, __FUNCTION__);
  153. GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
  154. return FAILED;
  155. }
  156. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_);
  157. if (status != RT_ERROR_NONE) {
  158. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskZeroCopy %s",
  159. status, __FUNCTION__);
  160. GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status);
  161. return RT_ERROR_TO_GE_STATUS(status);
  162. }
  163. GELOGI("Cpu kernel launch zero copy task success.");
  164. return SUCCESS;
  165. }
  166. CpuTaskZeroCopy::~CpuTaskZeroCopy() {
  167. if (src_addr_ == nullptr && dst_addr_ == nullptr) {
  168. return;
  169. }
  170. if (src_addr_ != nullptr) {
  171. rtError_t status = rtFree(src_addr_);
  172. if (status != RT_ERROR_NONE) {
  173. GELOGW("Call rt free failed, status: 0x%x", status);
  174. }
  175. }
  176. if (dst_addr_ != nullptr) {
  177. rtError_t status = rtFree(dst_addr_);
  178. if (status != RT_ERROR_NONE) {
  179. GELOGW("Call rt free failed, status: 0x%x", status);
  180. }
  181. }
  182. src_addr_ = nullptr;
  183. dst_addr_ = nullptr;
  184. }
  185. ///
  186. /// @ingroup ge
  187. /// @brief definiteness queue schedule, bind output queue to task.
  188. /// @param [in] addr: NetOutput Op input tensor address.
  189. /// @param [in] size: NetOutput Op input tensor size.
  190. /// @param [in] in_mbuf: input mbuf addr for input data.
  191. /// @param [out] out_mbuf: output mbuf addr for output data.
  192. /// @return: 0 for success / others for failed
  193. ///
  194. Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mbuf, uintptr_t &out_mbuf) {
  195. if ((args_ != nullptr) || (args_size_ > 0)) {
  196. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
  197. "check invalid when CpuTaskPrepareOutput %s", args_size_, __FUNCTION__);
  198. GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
  199. return FAILED;
  200. }
  201. args_size_ = sizeof(PrepareOutputInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save out_mbuf.
  202. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  203. if (status != RT_ERROR_NONE) {
  204. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskPrepareOutput %s",
  205. args_size_, status, __FUNCTION__);
  206. GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
  207. return RT_ERROR_TO_GE_STATUS(status);
  208. }
  209. out_mbuf = reinterpret_cast<uintptr_t>(args_) + sizeof(PrepareOutputInfo);
  210. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  211. // Get NetOutput Input address and bind to queue.
  212. PrepareOutputInfo prepare;
  213. prepare.data_size = size;
  214. prepare.data_addr = addr;
  215. prepare.in_mbuf = in_mbuf;
  216. prepare.out_mbuf = out_mbuf; // Placeholder, output mbuf addr will save to this place.
  217. status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE);
  218. if (status != RT_ERROR_NONE) {
  219. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskPrepareOutput %s",
  220. args_size_, status, __FUNCTION__);
  221. GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
  222. return RT_ERROR_TO_GE_STATUS(status);
  223. }
  224. return SUCCESS;
  225. }
  226. Status CpuTaskPrepareOutput::Distribute() {
  227. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  228. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  229. "check invalid when CpuTaskPrepareOutput %s", args_size_, __FUNCTION__);
  230. GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
  231. return FAILED;
  232. }
  233. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_);
  234. if (status != RT_ERROR_NONE) {
  235. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskPrepareOutput %s",
  236. status, __FUNCTION__);
  237. GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status);
  238. return RT_ERROR_TO_GE_STATUS(status);
  239. }
  240. GELOGI("Cpu kernel launch prepare output task success.");
  241. return SUCCESS;
  242. }
  243. ///
  244. /// @ingroup ge
  245. /// @brief definiteness queue schedule, bind output queue to task.
  246. /// @param [in] queue_id: output queue id from user.
  247. /// @param [in] out_mbuf: mbuf for output data.
  248. /// @return: 0 for success / others for failed
  249. ///
  250. Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
  251. if ((args_ != nullptr) || (args_size_ > 0)) {
  252. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
  253. "check invalid when CpuTaskModelEnqueue %s", args_size_, __FUNCTION__);
  254. GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
  255. return FAILED;
  256. }
  257. // Get NetOutput Input address and bind to queue.
  258. args_size_ = sizeof(MbufQueueInfo);
  259. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  260. if (status != RT_ERROR_NONE) {
  261. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskModelEnqueue %s",
  262. args_size_, status, __FUNCTION__);
  263. GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
  264. return RT_ERROR_TO_GE_STATUS(status);
  265. }
  266. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  267. MbufQueueInfo queue_info;
  268. queue_info.queue_id = queue_id;
  269. queue_info.in_mbuf = out_mbuf;
  270. status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
  271. if (status != RT_ERROR_NONE) {
  272. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskModelEnqueue %s",
  273. args_size_, status, __FUNCTION__);
  274. GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
  275. return RT_ERROR_TO_GE_STATUS(status);
  276. }
  277. return SUCCESS;
  278. }
  279. Status CpuTaskModelEnqueue::Distribute() {
  280. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  281. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_ is 0 or stream_ is nullptr, arg_size:%u,"
  282. "check invalid when CpuTaskModelEnqueue %s", args_size_, __FUNCTION__);
  283. GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
  284. return FAILED;
  285. }
  286. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_);
  287. if (status != RT_ERROR_NONE) {
  288. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskModelEnqueue %s",
  289. status, __FUNCTION__);
  290. GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status);
  291. return RT_ERROR_TO_GE_STATUS(status);
  292. }
  293. GELOGI("Cpu kernel launch model enqueue task success.");
  294. return SUCCESS;
  295. }
  296. ///
  297. /// @ingroup ge
  298. /// @brief definiteness queue schedule, active entry stream.
  299. /// @param [in] stream: stream to be active.
  300. /// @return: 0 for success / others for failed
  301. ///
  302. Status CpuTaskActiveEntry::Init(rtStream_t stream) {
  303. if (stream == nullptr) {
  304. REPORT_INNER_ERROR("E19999", "Param stream is nullptr, check invalid when CpuTaskActiveEntry %s", __FUNCTION__);
  305. GELOGE(FAILED, "Task active stream not valid");
  306. return FAILED;
  307. }
  308. active_stream_ = stream;
  309. return SUCCESS;
  310. }
  311. Status CpuTaskActiveEntry::Distribute() {
  312. if ((active_stream_ == nullptr) || (stream_ == nullptr)) {
  313. REPORT_INNER_ERROR("E19999", "Param stream is nullptr or active_stream_ is nullptr, "
  314. "check invalid when CpuTaskActiveEntry %s", __FUNCTION__);
  315. GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
  316. return FAILED;
  317. }
  318. rtError_t ret = rtStreamActive(active_stream_, stream_);
  319. if (ret != RT_ERROR_NONE) {
  320. REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X, when CpuTaskActiveEntry %s",
  321. ret, __FUNCTION__);
  322. GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret);
  323. return RT_ERROR_TO_GE_STATUS(ret);
  324. }
  325. GELOGI("Cpu kernel launch active entry task success.");
  326. return SUCCESS;
  327. }
  328. ///
  329. /// @ingroup ge
  330. /// @brief definiteness queue schedule, wait for end graph.
  331. /// @param [in] model_id: model id for wait end graph.
  332. /// @return: 0 for success / others for failed
  333. ///
  334. Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {
  335. if ((args_ != nullptr) || (args_size_ > 0)) {
  336. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
  337. "check invalid when CpuTaskWaitEndGraph %s", args_size_, __FUNCTION__);
  338. GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
  339. return FAILED;
  340. }
  341. args_size_ = sizeof(model_id);
  342. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  343. if (status != RT_ERROR_NONE) {
  344. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskWaitEndGraph %s",
  345. args_size_, status, __FUNCTION__);
  346. GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
  347. return RT_ERROR_TO_GE_STATUS(status);
  348. }
  349. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  350. status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
  351. if (status != RT_ERROR_NONE) {
  352. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskWaitEndGraph %s",
  353. args_size_, status, __FUNCTION__);
  354. GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
  355. return RT_ERROR_TO_GE_STATUS(status);
  356. }
  357. return SUCCESS;
  358. }
  359. Status CpuTaskWaitEndGraph::Distribute() {
  360. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  361. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  362. "check invalid when CpuTaskWaitEndGraph %s", args_size_, __FUNCTION__);
  363. GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
  364. return FAILED;
  365. }
  366. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_);
  367. if (status != RT_ERROR_NONE) {
  368. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskWaitEndGraph %s",
  369. status, __FUNCTION__);
  370. GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status);
  371. return RT_ERROR_TO_GE_STATUS(status);
  372. }
  373. GELOGI("Cpu kernel launch wait end task success.");
  374. return SUCCESS;
  375. }
  376. ///
  377. /// @ingroup ge
  378. /// @brief definiteness queue schedule, repeat run model.
  379. /// @param [in] model_id: model id for repeat run.
  380. /// @return: 0 for success / others for failed
  381. ///
  382. Status CpuTaskModelRepeat::Init(uint32_t model_id) {
  383. if ((args_ != nullptr) || (args_size_ > 0)) {
  384. REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
  385. "check invalid when CpuTaskModelRepeat %s", args_size_, __FUNCTION__);
  386. GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
  387. return FAILED;
  388. }
  389. args_size_ = sizeof(model_id);
  390. rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
  391. if (status != RT_ERROR_NONE) {
  392. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskModelRepeat %s",
  393. args_size_, status, __FUNCTION__);
  394. GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
  395. return RT_ERROR_TO_GE_STATUS(status);
  396. }
  397. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
  398. status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
  399. if (status != RT_ERROR_NONE) {
  400. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskModelRepeat %s",
  401. args_size_, status, __FUNCTION__);
  402. GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
  403. return RT_ERROR_TO_GE_STATUS(status);
  404. }
  405. return SUCCESS;
  406. }
  407. Status CpuTaskModelRepeat::Distribute() {
  408. if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
  409. REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
  410. "check invalid when CpuTaskModelRepeat %s", args_size_, __FUNCTION__);
  411. GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
  412. return FAILED;
  413. }
  414. rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_);
  415. if (status != RT_ERROR_NONE) {
  416. REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskModelRepeat %s",
  417. status, __FUNCTION__);
  418. GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status);
  419. return RT_ERROR_TO_GE_STATUS(status);
  420. }
  421. GELOGI("Cpu kernel launch repeat task success.");
  422. return SUCCESS;
  423. }
  424. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示