You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run_context.cc 9.7 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/run_context.h"
  17. #include "common/util.h"
  18. #include "framework/common/debug/ge_log.h"
  19. #include "graph/debug/ge_attr_define.h"
  20. #include "graph/common/omg_util.h"
  21. namespace ge {
  22. RunContextUtil::~RunContextUtil() { DestroyRtModelResources(); }
  23. Status RunContextUtil::InitMemInfo(uint8_t *data_mem_base, uint64_t data_mem_size,
  24. std::map<int64_t, uint8_t *> mem_type_to_data_mem_base,
  25. std::map<int64_t, uint64_t> mem_type_to_data_mem_size, uint8_t *weight_mem_base,
  26. uint64_t weight_mem_size) {
  27. if ((data_mem_size > 0) && (data_mem_base == nullptr)) {
  28. REPORT_INNER_ERROR("E19999", "InitMemInfo param data_mem_base is null but data_mem_size = %lu", data_mem_size);
  29. GELOGE(PARAM_INVALID, "InitMemInfo param data_mem_base is null but data_mem_size = %lu.", data_mem_size);
  30. return PARAM_INVALID;
  31. }
  32. if ((weight_mem_size > 0) && (weight_mem_base == nullptr)) {
  33. REPORT_INNER_ERROR("E19999", "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu",
  34. weight_mem_size);
  35. GELOGE(PARAM_INVALID, "InitMemInfo param weight_mem_base is null but weight_mem_size = %lu.", weight_mem_size);
  36. return PARAM_INVALID;
  37. }
  38. if (mem_type_to_data_mem_base.empty() || mem_type_to_data_mem_size.empty() ||
  39. mem_type_to_data_mem_base.size() != mem_type_to_data_mem_size.size()) {
  40. REPORT_INNER_ERROR("E19999", "InitMemInfo param mem_type_to_data_mem_base size[%zu] "
  41. "is not equal to the size of mem_type_to_data_mem_size[%zu].",
  42. mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
  43. GELOGE(PARAM_INVALID,
  44. "InitMemInfo param mem_type_to_data_mem_base size[%zu] is not equal to the size of "
  45. "mem_type_to_data_mem_size[%zu].",
  46. mem_type_to_data_mem_base.size(), mem_type_to_data_mem_size.size());
  47. return PARAM_INVALID;
  48. }
  49. data_mem_base_ = data_mem_base;
  50. data_mem_size_ = data_mem_size;
  51. weight_mem_base_ = weight_mem_base;
  52. weight_mem_size_ = weight_mem_size;
  53. mem_type_to_data_mem_base_ = mem_type_to_data_mem_base;
  54. mem_type_to_data_mem_size_ = mem_type_to_data_mem_size;
  55. return SUCCESS;
  56. }
  57. Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t event_num, uint32_t label_num) {
  58. // Create rt model
  59. rtError_t rt_ret = rtModelCreate(&rt_model_, 0);
  60. if (rt_ret != RT_ERROR_NONE) {
  61. REPORT_CALL_ERROR("E19999", "call rtModelCreate failed, ret:%d,", static_cast<int>(rt_ret));
  62. GELOGE(RT_FAILED, "rtModelCreate failed. rt_ret = %d", static_cast<int>(rt_ret));
  63. return RT_FAILED;
  64. }
  65. // Create rt Stream and bind with model
  66. for (uint32_t i = 0; i < stream_num; ++i) {
  67. rtStream_t stream = nullptr;
  68. rt_ret = rtStreamCreate(&stream, 0);
  69. if (rt_ret != RT_ERROR_NONE) {
  70. REPORT_CALL_ERROR("E19999", "call rtStreamCreate failed, ret:%d, index:%u,",
  71. static_cast<int>(rt_ret), i);
  72. GELOGE(RT_FAILED, "rtStreamCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  73. return RT_FAILED;
  74. }
  75. stream_list_.emplace_back(stream);
  76. rt_ret = rtModelBindStream(rt_model_, stream, 0);
  77. if (rt_ret != RT_ERROR_NONE) {
  78. REPORT_CALL_ERROR("E19999", "call rtModelBindStream failed, ret:%d, index:%u,",
  79. static_cast<int>(rt_ret), i);
  80. GELOGE(RT_FAILED, "Bind stream and model failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  81. return RT_FAILED;
  82. }
  83. }
  84. // Create rt event
  85. uint32_t create_flag = static_cast<uint32_t>((event_num > kEventReuseThreshold) ? RT_EVENT_WITH_FLAG :
  86. RT_EVENT_DEFAULT);
  87. for (uint32_t i = 0; i < event_num; ++i) {
  88. rtEvent_t event = nullptr;
  89. rt_ret = rtEventCreateWithFlag(&event, create_flag);
  90. if (rt_ret != RT_ERROR_NONE) {
  91. REPORT_CALL_ERROR("E19999", "call rtEventCreate failed, ret:%d, index:%u,",
  92. static_cast<int>(rt_ret), i);
  93. GELOGE(RT_FAILED, "rtEventCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  94. return RT_FAILED;
  95. }
  96. event_list_.emplace_back(event);
  97. }
  98. // Create rt label
  99. for (uint32_t i = 0; i < label_num; ++i) {
  100. rtLabel_t label = nullptr;
  101. rt_ret = rtLabelCreateV2(&label, rt_model_);
  102. if (rt_ret != RT_ERROR_NONE) {
  103. REPORT_CALL_ERROR("E19999", "call rtLabelCreateV2 failed, ret:%d, index:%u,",
  104. static_cast<int>(rt_ret), i);
  105. GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i);
  106. return RT_FAILED;
  107. }
  108. label_list_.emplace_back(label);
  109. }
  110. return SUCCESS;
  111. }
  112. void RunContextUtil::DestroyRtModelResources() noexcept {
  113. rtError_t rt_ret;
  114. for (size_t i = 0; i < stream_list_.size(); i++) {
  115. // Unbind stream to model first
  116. (void)rtModelUnbindStream(rt_model_, stream_list_[i]);
  117. rt_ret = rtStreamDestroy(stream_list_[i]);
  118. if (rt_ret != RT_ERROR_NONE) {
  119. GELOGW("Destroy stream failed. rt_ret = %d, index = %zu.", static_cast<int>(rt_ret), i);
  120. }
  121. }
  122. stream_list_.clear();
  123. for (size_t i = 0; i < event_list_.size(); i++) {
  124. rt_ret = rtEventDestroy(event_list_[i]);
  125. if (rt_ret != RT_ERROR_NONE) {
  126. GELOGW("Destroy event failed. rt_ret = %d, index = %zu.", static_cast<int>(rt_ret), i);
  127. }
  128. }
  129. event_list_.clear();
  130. for (size_t i = 0; i < label_list_.size(); ++i) {
  131. rt_ret = rtLabelDestroy(label_list_[i]);
  132. if (rt_ret != RT_ERROR_NONE) {
  133. GELOGW("Destroy label failed. rt_ret = %d, index = %zu.", static_cast<int>(rt_ret), i);
  134. }
  135. }
  136. label_list_.clear();
  137. if (rt_model_ != nullptr) {
  138. rt_ret = rtModelDestroy(rt_model_);
  139. if (rt_ret != RT_ERROR_NONE) {
  140. GELOGW("Destroy rt model failed. rt_ret = %d.", static_cast<int>(rt_ret));
  141. }
  142. rt_model_ = nullptr;
  143. }
  144. }
  145. Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &graph, Buffer &buffer,
  146. const uint64_t session_id) {
  147. GELOGD("Begin to Create RunContext, session_id = %lu", session_id);
  148. // check params
  149. if (graph == nullptr) {
  150. REPORT_INNER_ERROR("E19999", "Check param graph nullptr, session_id:%lu,", session_id);
  151. GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id);
  152. return PARAM_INVALID;
  153. }
  154. uint32_t stream_num = 0;
  155. if (!AttrUtils::GetInt(&model, ATTR_MODEL_STREAM_NUM, stream_num)) {
  156. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed for model, session_id:%lu,",
  157. ATTR_MODEL_STREAM_NUM.c_str(), session_id);
  158. GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id);
  159. return INTERNAL_ERROR;
  160. }
  161. GELOGD("Stream_num = %u", stream_num);
  162. uint32_t event_num = 0;
  163. if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) {
  164. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed for model, session_id:%lu,",
  165. ATTR_MODEL_EVENT_NUM.c_str(), session_id);
  166. GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id);
  167. return INTERNAL_ERROR;
  168. }
  169. GELOGD("Event_num = %u", event_num);
  170. uint32_t label_num = 0;
  171. if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) {
  172. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed for model, session_id:%lu,",
  173. ATTR_MODEL_LABEL_NUM.c_str(), session_id);
  174. GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id);
  175. return INTERNAL_ERROR;
  176. }
  177. GELOGD("Label_num = %u", label_num);
  178. Status ret = CreateRtModelResources(stream_num, event_num, label_num);
  179. if (ret != SUCCESS) {
  180. GELOGE(ret, "CreateRtModelResources failed. session_id=%lu", session_id);
  181. DestroyRtModelResources();
  182. return ret;
  183. }
  184. GELOGI("CreateRunContext: data_mem_base_ = %p, weight_mem_base_ = %p, memory_size = %lu, weight_size = %lu",
  185. data_mem_base_, weight_mem_base_, data_mem_size_, weight_mem_size_);
  186. PrintMemInfo();
  187. run_context_ = {rt_model_,
  188. nullptr,
  189. session_id,
  190. data_mem_size_,
  191. data_mem_base_,
  192. mem_type_to_data_mem_size_,
  193. mem_type_to_data_mem_base_,
  194. weight_mem_size_,
  195. weight_mem_base_,
  196. buffer,
  197. stream_list_,
  198. event_list_,
  199. label_list_};
  200. return SUCCESS;
  201. }
  202. void RunContextUtil::PrintMemInfo() {
  203. for (auto iter : mem_type_to_data_mem_base_) {
  204. GELOGD("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second);
  205. }
  206. for (auto iter : mem_type_to_data_mem_size_) {
  207. GELOGD("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second);
  208. }
  209. }
  210. RunContext &RunContextUtil::GetRunContext() { return run_context_; }
  211. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示