You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_context.cc 9.6 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/run_context.h"
  17. #include "common/util.h"
  18. #include "framework/common/debug/ge_log.h"
  19. #include "graph/debug/ge_attr_define.h"
  20. namespace ge {
  21. RunContextUtil::~RunContextUtil() { DestroyRtModelResources(); }
  22. Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size,
  23. std::map<int64_t, uint8_t *> mem_type_to_data_mem_base,
  24. std::map<int64_t, uint64_t> mem_type_to_data_mem_size, uint8_t *weight_mem_base,
  25. uint64_t weight_mem_size) {
  26. if ((data_mem_size > 0) && (data_mem_base == nullptr)) {
  27. REPORT_INNER_ERROR("E19999", "InitMemInfo param data_mem_base is null but data_mem_size = %lu", data_mem_size);
  28. GELOGE(PARAM_INVALID, "InitMemInfo param data_mem_base is null but data_mem_size = %lu.", data_mem_size);
  29. return PARAM_INVALID;
  30. }
  31. if ((weight_mem_size > 0) && (weight_mem_base == nullptr)) {
  32. REPORT_INNER_ERROR("E19999", "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu",
  33. weight_mem_size);
  34. GELOGE(PARAM_INVALID, "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu.", weight_mem_size);
  35. return PARAM_INVALID;
  36. }
  37. if (mem_type_to_data_mem_base.empty() || mem_type_to_data_mem_size.empty() ||
  38. mem_type_to_data_mem_base.size() != mem_type_to_data_mem_size.size()) {
  39. REPORT_INNER_ERROR("E19999", "InitMemInfo param mem_type_to_data_mem_base size[%zu] "
  40. "is not equal to the size of mem_type_to_data_mem_size[%zu].",
  41. mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
  42. GELOGE(PARAM_INVALID,
  43. "InitMemInfo param mem_type_to_data_mem_base size[%zu] is not equal to the size of "
  44. "mem_type_to_data_mem_size[%zu].",
  45. mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
  46. return PARAM_INVALID;
  47. }
  48. data_mem_base_ = data_mem_base;
  49. data_mem_size_ = data_mem_size;
  50. weight_mem_base_ = weight_mem_base;
  51. weight_mem_size_ = weight_mem_size;
  52. mem_type_to_data_mem_base_ = mem_type_to_data_mem_base;
  53. mem_type_to_data_mem_size_ = mem_type_to_data_mem_size;
  54. return SUCCESS;
  55. }
  56. Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t event_num, uint32_t label_num) {
  57. // Create rt model
  58. rtError_t rt_ret = rtModelCreate(&rt_model_, 0);
  59. if (rt_ret != RT_ERROR_NONE) {
  60. REPORT_CALL_ERROR("E19999", "call rtModelCreate fail, ret:%d, when %s", static_cast<int>(rt_ret), __FUNCTION__);
  61. GELOGE(RT_FAILED, "rtModelCreate failed. rt_ret = %d", static_cast<int>(rt_ret));
  62. return RT_FAILED;
  63. }
  64. // Create rt Stream and bind with model
  65. for (uint32_t i = 0; i < stream_num; ++i) {
  66. rtStream_t stream = nullptr;
  67. rt_ret = rtStreamCreate(&stream, 0);
  68. if (rt_ret != RT_ERROR_NONE) {
  69. REPORT_CALL_ERROR("E19999", "call rtStreamCreate fail, ret:%d, index:%u, when %s",
  70. static_cast<int>(rt_ret), i, __FUNCTION__);
  71. GELOGE(RT_FAILED, "rtStreamCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  72. return RT_FAILED;
  73. }
  74. stream_list_.emplace_back(stream);
  75. rt_ret = rtModelBindStream(rt_model_, stream, 0);
  76. if (rt_ret != RT_ERROR_NONE) {
  77. REPORT_CALL_ERROR("E19999", "call rtModelBindStream fail, ret:%d, index:%u, when %s",
  78. static_cast<int>(rt_ret), i, __FUNCTION__);
  79. GELOGE(RT_FAILED, "Bind stream and model failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  80. return RT_FAILED;
  81. }
  82. }
  83. // Create rt event
  84. for (uint32_t i = 0; i < event_num; ++i) {
  85. rtEvent_t event = nullptr;
  86. rt_ret = rtEventCreate(&event);
  87. if (rt_ret != RT_ERROR_NONE) {
  88. REPORT_CALL_ERROR("E19999", "call rtEventCreate fail, ret:%d, index:%u, when %s",
  89. static_cast<int>(rt_ret), i, __FUNCTION__);
  90. GELOGE(RT_FAILED, "rtEventCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  91. return RT_FAILED;
  92. }
  93. event_list_.emplace_back(event);
  94. }
  95. // Create rt label
  96. for (uint32_t i = 0; i < label_num; ++i) {
  97. rtLabel_t label = nullptr;
  98. rt_ret = rtLabelCreateV2(&label, rt_model_);
  99. if (rt_ret != RT_ERROR_NONE) {
  100. REPORT_CALL_ERROR("E19999", "call rtLabelCreateV2 fail, ret:%d, index:%u, when %s",
  101. static_cast<int>(rt_ret), i, __FUNCTION__);
  102. GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  103. return RT_FAILED;
  104. }
  105. label_list_.emplace_back(label);
  106. }
  107. return SUCCESS;
  108. }
  109. void RunContextUtil::DestroyRtModelResources() noexcept {
  110. rtError_t rt_ret;
  111. for (size_t i = 0; i < stream_list_.size(); i++) {
  112. // Unbind stream to model first
  113. (void)rtModelUnbindStream(rt_model_, stream_list_[i]);
  114. rt_ret = rtStreamDestroy(stream_list_[i]);
  115. if (rt_ret != RT_ERROR_NONE) {
  116. GELOGW("Destroy stream failed. rt_ret = %d, index = %zu.", static_cast<int>(rt_ret), i);
  117. }
  118. }
  119. stream_list_.clear();
  120. for (size_t i = 0; i < event_list_.size(); i++) {
  121. rt_ret = rtEventDestroy(event_list_[i]);
  122. if (rt_ret != RT_ERROR_NONE) {
  123. GELOGW("Destroy event failed. rt_ret = %d, index = %zu.", static_cast<int>(rt_ret), i);
  124. }
  125. }
  126. event_list_.clear();
  127. for (size_t i = 0; i < label_list_.size(); ++i) {
  128. rt_ret = rtLabelDestroy(label_list_[i]);
  129. if (rt_ret != RT_ERROR_NONE) {
  130. GELOGW("Destroy label failed. rt_ret = %d, index = %zu.", static_cast<int>(rt_ret), i);
  131. }
  132. }
  133. label_list_.clear();
  134. if (rt_model_ != nullptr) {
  135. rt_ret = rtModelDestroy(rt_model_);
  136. if (rt_ret != RT_ERROR_NONE) {
  137. GELOGW("Destroy rt model failed. rt_ret = %d.", static_cast<int>(rt_ret));
  138. }
  139. rt_model_ = nullptr;
  140. }
  141. }
  142. Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &graph, Buffer &buffer,
  143. const uint64_t session_id) {
  144. GELOGD("Begin to Create RunContext, session_id = %lu", session_id);
  145. // check params
  146. if (graph == nullptr) {
  147. REPORT_INNER_ERROR("E19999", "Check param graph nullptr, session_id:%lu, when %s", session_id, __FUNCTION__);
  148. GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id);
  149. return PARAM_INVALID;
  150. }
  151. uint32_t stream_num = 0;
  152. if (!AttrUtils::GetInt(&model, ATTR_MODEL_STREAM_NUM, stream_num)) {
  153. REPORT_INNER_ERROR("Get Attr:%s fail for model, session_id:%lu, when %s",
  154. ATTR_MODEL_STREAM_NUM.c_str(), session_id, __FUNCTION__);
  155. GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id);
  156. return INTERNAL_ERROR;
  157. }
  158. GELOGD("Stream_num = %u", stream_num);
  159. uint32_t event_num = 0;
  160. if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) {
  161. REPORT_INNER_ERROR("Get Attr:%s fail for model, session_id:%lu, when %s",
  162. ATTR_MODEL_EVENT_NUM.c_str(), session_id, __FUNCTION__);
  163. GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id);
  164. return INTERNAL_ERROR;
  165. }
  166. GELOGD("Event_num = %u", event_num);
  167. uint32_t label_num = 0;
  168. if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) {
  169. REPORT_INNER_ERROR("Get Attr:%s fail for model, session_id:%lu, when %s",
  170. ATTR_MODEL_LABEL_NUM.c_str(), session_id, __FUNCTION__);
  171. GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id);
  172. return INTERNAL_ERROR;
  173. }
  174. GELOGD("Label_num = %u", label_num);
  175. Status ret = CreateRtModelResources(stream_num, event_num, label_num);
  176. if (ret != SUCCESS) {
  177. GELOGE(ret, "CreateRtModelResources failed. session_id=%lu", session_id);
  178. DestroyRtModelResources();
  179. return ret;
  180. }
  181. GELOGI("CreateRunContext: data_mem_base_ = %p, weight_mem_base_ = %p, memory_size = %lu, weight_size = %lu",
  182. data_mem_base_, weight_mem_base_, data_mem_size_, weight_mem_size_);
  183. PrintMemInfo();
  184. run_context_ = {rt_model_,
  185. nullptr,
  186. session_id,
  187. data_mem_size_,
  188. data_mem_base_,
  189. mem_type_to_data_mem_size_,
  190. mem_type_to_data_mem_base_,
  191. weight_mem_size_,
  192. weight_mem_base_,
  193. buffer,
  194. stream_list_,
  195. event_list_,
  196. label_list_};
  197. return SUCCESS;
  198. }
  199. void RunContextUtil::PrintMemInfo() {
  200. for (auto iter : mem_type_to_data_mem_base_) {
  201. GELOGD("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second);
  202. }
  203. for (auto iter : mem_type_to_data_mem_size_) {
  204. GELOGD("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second);
  205. }
  206. }
  207. RunContext &RunContextUtil::GetRunContext() { return run_context_; }
  208. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示