You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_dumper.cc 44 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/model_manager/data_dumper.h"
  17. #include <cstdlib>
  18. #include <ctime>
  19. #include <map>
  20. #include <utility>
  21. #include <vector>
  22. #include "common/debug/memory_dumper.h"
  23. #include "common/properties_manager.h"
  24. #include "common/util.h"
  25. #include "framework/common/debug/ge_log.h"
  26. #include "framework/common/util.h"
  27. #include "graph/anchor.h"
  28. #include "graph/debug/ge_attr_define.h"
  29. #include "graph/load/model_manager/model_utils.h"
  30. #include "graph/manager/util/debug.h"
  31. #include "graph/utils/attr_utils.h"
  32. #include "graph/utils/tensor_utils.h"
  33. #include "proto/dump_task.pb.h"
  34. #include "proto/ge_ir.pb.h"
  35. #include "proto/op_mapping_info.pb.h"
  36. #include "runtime/base.h"
  37. #include "runtime/mem.h"
  38. namespace {
  39. const uint32_t kAicpuLoadFlag = 1;
  40. const uint32_t kAicpuUnloadFlag = 0;
  41. const int64_t kOpDebugSize = 2048;
  42. const int64_t kOpDebugShape = 2048;
  43. const int8_t kDecimal = 10;
  44. const uint32_t kAddrLen = sizeof(void *);
  45. const char *const kDumpOutput = "output";
  46. const char *const kDumpInput = "input";
  47. const char *const kDumpAll = "all";
  48. // parse for format like nodename:input:index
  49. static bool ParseNameIndex(const std::string &node_name_index, std::string &node_name, std::string &input_or_output,
  50. size_t &index) {
  51. auto sep = node_name_index.rfind(':');
  52. if (sep == std::string::npos) {
  53. return false;
  54. }
  55. auto index_str = node_name_index.substr(sep + 1);
  56. index = static_cast<size_t>(std::strtol(index_str.c_str(), nullptr, kDecimal));
  57. auto node_name_without_index = node_name_index.substr(0, sep);
  58. sep = node_name_without_index.rfind(':');
  59. if (sep == std::string::npos) {
  60. return false;
  61. }
  62. node_name = node_name_without_index.substr(0, sep);
  63. input_or_output = node_name_without_index.substr(sep + 1);
  64. return !(input_or_output != kDumpInput && input_or_output != kDumpOutput);
  65. }
  66. static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) {
  67. return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1);
  68. }
  69. static uint64_t GetNowTime() {
  70. uint64_t ret = 0;
  71. mmTimeval tv;
  72. if (mmGetTimeOfDay(&tv, nullptr) == 0) {
  73. ret = tv.tv_sec * 1000000ULL + tv.tv_usec;
  74. }
  75. return ret;
  76. }
  77. static void ReplaceStringElem(std::string &str) {
  78. for_each(str.begin(), str.end(), [](char &ch) {
  79. if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) {
  80. ch = '_';
  81. }
  82. });
  83. }
  84. } // namespace
  85. static int32_t GetIrDataType(ge::DataType data_type) {
  86. static const std::map<ge::DataType, ge::proto::DataType> data_type_map = {
  87. {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED},
  88. {ge::DT_FLOAT, ge::proto::DT_FLOAT},
  89. {ge::DT_FLOAT16, ge::proto::DT_FLOAT16},
  90. {ge::DT_INT8, ge::proto::DT_INT8},
  91. {ge::DT_UINT8, ge::proto::DT_UINT8},
  92. {ge::DT_INT16, ge::proto::DT_INT16},
  93. {ge::DT_UINT16, ge::proto::DT_UINT16},
  94. {ge::DT_INT32, ge::proto::DT_INT32},
  95. {ge::DT_INT64, ge::proto::DT_INT64},
  96. {ge::DT_UINT32, ge::proto::DT_UINT32},
  97. {ge::DT_UINT64, ge::proto::DT_UINT64},
  98. {ge::DT_BOOL, ge::proto::DT_BOOL},
  99. {ge::DT_DOUBLE, ge::proto::DT_DOUBLE},
  100. {ge::DT_DUAL, ge::proto::DT_DUAL},
  101. {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8},
  102. {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8},
  103. {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64},
  104. {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128},
  105. {ge::DT_QINT8, ge::proto::DT_QINT8},
  106. {ge::DT_QINT16, ge::proto::DT_QINT16},
  107. {ge::DT_QINT32, ge::proto::DT_QINT32},
  108. {ge::DT_QUINT8, ge::proto::DT_QUINT8},
  109. {ge::DT_QUINT16, ge::proto::DT_QUINT16},
  110. {ge::DT_RESOURCE, ge::proto::DT_RESOURCE},
  111. {ge::DT_STRING_REF, ge::proto::DT_STRING_REF},
  112. {ge::DT_STRING, ge::proto::DT_STRING},
  113. {ge::DT_VARIANT, ge::proto::DT_VARIANT},
  114. };
  115. auto iter = data_type_map.find(data_type);
  116. if (iter == data_type_map.end()) {
  117. return static_cast<int32_t>(ge::proto::DT_UNDEFINED);
  118. }
  119. return static_cast<int32_t>(iter->second);
  120. }
  121. namespace ge {
  122. DataDumper::~DataDumper() {
  123. ReleaseDevMem(&dev_mem_load_);
  124. ReleaseDevMem(&dev_mem_unload_);
  125. }
  126. void DataDumper::ReleaseDevMem(void **ptr) noexcept {
  127. if (ptr == nullptr) {
  128. return;
  129. }
  130. if (*ptr != nullptr) {
  131. rtError_t rt_ret = rtFree(*ptr);
  132. if (rt_ret != RT_ERROR_NONE) {
  133. GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", rt_ret);
  134. }
  135. *ptr = nullptr;
  136. }
  137. }
  138. void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond) {
  139. global_step_ = reinterpret_cast<uintptr_t>(global_step);
  140. loop_per_iter_ = reinterpret_cast<uintptr_t>(loop_per_iter);
  141. loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
  142. }
  143. void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
  144. if (node != nullptr) {
  145. auto input_op_desc = node->GetOpDesc();
  146. if (input_op_desc == nullptr) {
  147. GELOGE(PARAM_INVALID, "input op desc is null.");
  148. return;
  149. }
  150. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  151. for (auto &dst_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
  152. ge::NodePtr dst_node = dst_in_data_anchor->GetOwnerNode();
  153. auto op_desc = dst_node->GetOpDesc();
  154. if (op_desc == nullptr) {
  155. GELOGE(PARAM_INVALID, "input op desc is null.");
  156. return;
  157. }
  158. input_map_.insert(
  159. {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}});
  160. }
  161. }
  162. }
  163. }
  164. void DataDumper::SaveEndGraphId(uint32_t task_id, uint32_t stream_id) {
  165. end_graph_task_id_ = task_id;
  166. end_graph_stream_id_ = stream_id;
  167. }
  168. void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, bool is_op_debug) {
  169. op_debug_task_id_ = task_id;
  170. op_debug_stream_id_ = stream_id;
  171. op_debug_addr_ = op_debug_addr;
  172. is_op_debug_ = is_op_debug;
  173. }
  174. void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id,
  175. uint32_t stream_id) {
  176. GELOGD("Start SaveDumpOpInfo of task_id: %u, stream_id: %u", task_id, stream_id);
  177. OpDescInfo op_desc_info;
  178. op_desc_info.op_name = op->GetName();
  179. op_desc_info.op_type = op->GetType();
  180. op_desc_info.task_id = task_id;
  181. op_desc_info.stream_id = stream_id;
  182. for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
  183. GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
  184. if (input_tensor_desc == nullptr) {
  185. continue;
  186. }
  187. op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat());
  188. op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
  189. op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType());
  190. int64_t input_size = 0;
  191. if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) {
  192. GELOGW("Get input size failed");
  193. return;
  194. }
  195. GELOGD("Save dump op info, the input size is %ld", input_size);
  196. op_desc_info.input_size.emplace_back(input_size);
  197. }
  198. for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
  199. GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
  200. if (output_tensor_desc == nullptr) {
  201. continue;
  202. }
  203. op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat());
  204. op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
  205. op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType());
  206. int64_t output_size = 0;
  207. if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) {
  208. GELOGW("Get input size failed");
  209. return;
  210. }
  211. GELOGD("Save dump op info, the output size is %ld", output_size);
  212. op_desc_info.output_size.emplace_back(output_size);
  213. }
  214. op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op);
  215. op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op);
  216. op_desc_info_.emplace_back(op_desc_info);
  217. }
  218. bool DataDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
  219. GELOGI("There are %zu op need to dump.", op_desc_info_.size());
  220. for (size_t index = 0; index < op_desc_info_.size(); ++index) {
  221. OpDescInfo dump_op_info = op_desc_info_.at(index);
  222. if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) {
  223. GELOGI("find exception op of task_id: %u, stream_id: %u.", task_id, stream_id);
  224. op_desc_info = dump_op_info;
  225. return true;
  226. }
  227. }
  228. return false;
  229. }
  230. void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
  231. uintptr_t args) {
  232. if (op_desc == nullptr) {
  233. GELOGE(PARAM_INVALID, "Opdesc is nullptr");
  234. return;
  235. }
  236. GELOGI("Save dump task %s, task id: %u, stream id: %u", op_desc->GetName().c_str(), task_id, stream_id);
  237. op_list_.push_back({task_id, stream_id, op_desc, args, true});
  238. for (auto iter = input_map_.equal_range(op_desc->GetName()); iter.first != iter.second; ++iter.first) {
  239. InnerInputMapping &inner_input_mapping = iter.first->second;
  240. auto &data_op = inner_input_mapping.data_op;
  241. if (data_op == nullptr) {
  242. GELOGE(PARAM_INVALID, "data_op is null.");
  243. return;
  244. }
  245. auto input_tensor = op_desc->GetInputDescPtr(inner_input_mapping.input_anchor_index);
  246. if (input_tensor == nullptr) {
  247. GELOGE(PARAM_INVALID, "input_tensor is null, index: %d, size: %zu.", inner_input_mapping.input_anchor_index,
  248. op_desc->GetInputsSize());
  249. return;
  250. }
  251. int64_t data_size = 0;
  252. if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) {
  253. GELOGI("Get aipp data size according to attr is %ld", data_size);
  254. } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) {
  255. GELOGE(PARAM_INVALID, "Get input size filed");
  256. return;
  257. }
  258. GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id,
  259. stream_id, data_size);
  260. op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index,
  261. inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size});
  262. }
  263. }
  264. static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond,
  265. aicpu::dump::OpMappingInfo &op_mapping_info) {
  266. if (step_id != 0) {
  267. GELOGI("step_id exists.");
  268. op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
  269. }
  270. if (loop_per_iter != 0) {
  271. GELOGI("loop_per_iter exists.");
  272. op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
  273. }
  274. if (loop_cond != 0) {
  275. GELOGI("loop_cond exists.");
  276. op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
  277. }
  278. }
  279. Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
  280. const uintptr_t &addr, size_t index) {
  281. output.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
  282. output.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));
  283. for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
  284. output.mutable_shape()->add_dim(dim);
  285. }
  286. for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
  287. output.mutable_origin_shape()->add_dim(dim);
  288. }
  289. int64_t output_size = 0;
  290. if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
  291. REPORT_CALL_ERROR("E19999", "Get tensor size fail when DataDumper %s", __FUNCTION__);
  292. GELOGE(PARAM_INVALID, "Get output size filed");
  293. return PARAM_INVALID;
  294. }
  295. GELOGD("Get output size in dump is %ld", output_size);
  296. std::string origin_name;
  297. int32_t origin_output_index = -1;
  298. (void)AttrUtils::GetStr(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  299. (void)AttrUtils::GetInt(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  300. output.set_size(output_size);
  301. output.set_original_name(origin_name);
  302. output.set_original_output_index(origin_output_index);
  303. output.set_original_output_format(static_cast<int32_t>(tensor_descs.at(index).GetOriginFormat()));
  304. output.set_original_output_data_type(static_cast<int32_t>(tensor_descs.at(index).GetOriginDataType()));
  305. output.set_address(static_cast<uint64_t>(addr));
  306. return SUCCESS;
  307. }
  308. Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output,
  309. size_t i, const std::string &node_name_index) {
  310. std::string dump_op_name;
  311. std::string input_or_output;
  312. size_t index;
  313. // parser and find which node's input or output tensor desc is chosen for dump info
  314. if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
  315. GELOGE(PARAM_INVALID, "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
  316. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
  317. return PARAM_INVALID;
  318. }
  319. GE_CHECK_NOTNULL(compute_graph_);
  320. auto replace_node = compute_graph_->FindNode(dump_op_name);
  321. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
  322. "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
  323. " cannot find redirect node[%s].",
  324. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
  325. dump_op_name.c_str());
  326. auto replace_opdesc = replace_node->GetOpDesc();
  327. GE_CHECK_NOTNULL(replace_opdesc);
  328. auto iter = ref_info_.find(replace_opdesc);
  329. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
  330. "Op [%s] output desc[%zu] cannot find any saved redirect node[%s]'s info.",
  331. inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
  332. GE_CHECK_NOTNULL(iter->second);
  333. auto addr = reinterpret_cast<uintptr_t>(iter->second);
  334. if (input_or_output == kDumpInput) {
  335. const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
  336. addr += kAddrLen * index;
  337. GE_CHK_STATUS_RET(GenerateOutput(output, replace_input_descs, addr, index), "Generate output failed");
  338. } else if (input_or_output == kDumpOutput) {
  339. const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
  340. const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
  341. addr += (index + replace_input_size) * kAddrLen;
  342. GE_CHK_STATUS_RET(GenerateOutput(output, replace_output_descs, addr, index), "Generate output failed");
  343. }
  344. GELOGD("Op [%s] output desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
  345. inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
  346. return SUCCESS;
  347. }
  348. Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  349. const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
  350. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
  351. if (output_descs.size() != output_addrs.size()) {
  352. REPORT_INNER_ERROR("E19999", "output_desc size:%zu != output addr size:%zu in op:%s(%s) when DataDumper %s",
  353. output_descs.size(), output_addrs.size(),
  354. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), __FUNCTION__);
  355. GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
  356. inner_dump_info.op->GetName().c_str(), output_descs.size());
  357. return PARAM_INVALID;
  358. }
  359. std::vector<int64_t> v_memory_type;
  360. bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
  361. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != output_descs.size()),
  362. "DumpOutputWithTask[%s], output size[%zu], output memory type size[%zu]",
  363. inner_dump_info.op->GetName().c_str(), output_descs.size(),
  364. v_memory_type.size());
  365. for (size_t i = 0; i < output_descs.size(); ++i) {
  366. aicpu::dump::Output output;
  367. std::string node_name_index;
  368. const auto &output_desc = output_descs.at(i);
  369. // check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF
  370. if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) {
  371. GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed");
  372. task.mutable_output()->Add(std::move(output));
  373. } else {
  374. if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
  375. GELOGI("[L1Fusion] DumpOutputWithTask[%s] output[%zu] is l1 addr.", inner_dump_info.op->GetName().c_str(), i);
  376. int64_t output_size = 0;
  377. if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
  378. REPORT_CALL_ERROR("E19999", "Get output tensor size fail in op:%s(%s), index:%zu, when DataDumper %s",
  379. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i,
  380. __FUNCTION__);
  381. GELOGE(PARAM_INVALID, "Get output size failed.");
  382. return PARAM_INVALID;
  383. }
  384. GELOGI("Get output size of l1_fusion_dump is %ld", output_size);
  385. GenerateOpBuffer(output_size, task);
  386. } else {
  387. const auto input_size = inner_dump_info.op->GetInputsSize();
  388. auto addr = inner_dump_info.args + (i + input_size) * kAddrLen;
  389. GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed");
  390. task.mutable_output()->Add(std::move(output));
  391. }
  392. }
  393. }
  394. return SUCCESS;
  395. }
  396. Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  397. GELOGI("Start dump output");
  398. if (inner_dump_info.is_task) {
  399. // tbe or aicpu op, these ops are with task
  400. return DumpOutputWithTask(inner_dump_info, task);
  401. }
  402. // else data, const or variable op
  403. aicpu::dump::Output output;
  404. auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
  405. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
  406. if (output_tensor == nullptr) {
  407. REPORT_INNER_ERROR("E19999", "output_desc tensor is nullptr in op:%s(%s), index:%u, "
  408. "check invalid when DataDumper %s",
  409. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(),
  410. inner_dump_info.output_anchor_index, __FUNCTION__);
  411. GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
  412. inner_dump_info.op->GetOutputsSize());
  413. return PARAM_INVALID;
  414. }
  415. output.set_data_type(static_cast<int32_t>(GetIrDataType(output_tensor->GetDataType())));
  416. output.set_format(static_cast<int32_t>(output_tensor->GetFormat()));
  417. for (auto dim : inner_dump_info.dims) {
  418. output.mutable_shape()->add_dim(dim);
  419. }
  420. std::string origin_name;
  421. int32_t origin_output_index = -1;
  422. (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  423. (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  424. output.set_size(inner_dump_info.data_size);
  425. output.set_original_name(origin_name);
  426. output.set_original_output_index(origin_output_index);
  427. output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat()));
  428. output.set_original_output_data_type(static_cast<int32_t>(output_tensor->GetOriginDataType()));
  429. // due to lhisi virtual addr bug, cannot use args now
  430. if (inner_dump_info.output_anchor_index >= static_cast<int>(output_addrs.size())) {
  431. REPORT_INNER_ERROR("E19999", "output_anchor_index:%u >= output addr size:%zu in op:%s(%s), "
  432. "check invalid when DataDumper %s", inner_dump_info.output_anchor_index, output_addrs.size(),
  433. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), __FUNCTION__);
  434. GELOGE(FAILED, "Index is out of range.");
  435. return FAILED;
  436. }
  437. auto data_addr = inner_dump_info.args + kAddrLen * static_cast<uint32_t>(inner_dump_info.input_anchor_index);
  438. output.set_address(static_cast<uint64_t>(data_addr));
  439. task.mutable_output()->Add(std::move(output));
  440. return SUCCESS;
  441. }
  442. Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
  443. const uintptr_t &addr, size_t index) {
  444. input.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
  445. input.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));
  446. for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
  447. input.mutable_shape()->add_dim(dim);
  448. }
  449. for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
  450. input.mutable_origin_shape()->add_dim(dim);
  451. }
  452. int64_t input_size = 0;
  453. if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
  454. GELOGI("Get aipp input size according to attr is %ld", input_size);
  455. } else if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), input_size) != SUCCESS) {
  456. REPORT_CALL_ERROR("E19999", "Get tensor size fail when DataDumper %s", __FUNCTION__);
  457. GELOGE(PARAM_INVALID, "Get input size filed");
  458. return PARAM_INVALID;
  459. }
  460. GELOGD("Get input size in dump is %ld", input_size);
  461. input.set_size(input_size);
  462. input.set_address(static_cast<uint64_t>(addr));
  463. return SUCCESS;
  464. }
  465. Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i,
  466. const std::string &node_name_index) {
  467. std::string dump_op_name;
  468. std::string input_or_output;
  469. size_t index;
  470. // parser and find which node's input or output tensor desc is chosen for dump info
  471. if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
  472. GELOGE(PARAM_INVALID, "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
  473. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
  474. return PARAM_INVALID;
  475. }
  476. GE_CHECK_NOTNULL(compute_graph_);
  477. auto replace_node = compute_graph_->FindNode(dump_op_name);
  478. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
  479. "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
  480. " cannot find redirect node[%s].",
  481. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
  482. dump_op_name.c_str());
  483. auto replace_opdesc = replace_node->GetOpDesc();
  484. GE_CHECK_NOTNULL(replace_opdesc);
  485. auto iter = ref_info_.find(replace_opdesc);
  486. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
  487. "Op [%s] input desc[%zu] cannot find any saved redirect node[%s]'s info.",
  488. inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
  489. GE_CHECK_NOTNULL(iter->second);
  490. auto addr = reinterpret_cast<uintptr_t>(iter->second);
  491. if (input_or_output == kDumpInput) {
  492. const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
  493. addr += kAddrLen * index;
  494. GE_CHK_STATUS_RET(GenerateInput(input, replace_input_descs, addr, index), "Generate input failed");
  495. } else if (input_or_output == kDumpOutput) {
  496. const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
  497. const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
  498. addr += (index + replace_input_size) * kAddrLen;
  499. GE_CHK_STATUS_RET(GenerateInput(input, replace_output_descs, addr, index), "Generate input failed");
  500. }
  501. GELOGD("Op [%s] input desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
  502. inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
  503. return SUCCESS;
  504. }
  505. Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  506. GELOGI("Start dump input");
  507. const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
  508. const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op);
  509. if (input_descs.size() != input_addrs.size()) {
  510. REPORT_INNER_ERROR("E19999", "input_desc size:%zu != input addr size:%zu in op:%s(%s) when DataDumper %s",
  511. input_descs.size(), input_addrs.size(),
  512. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), __FUNCTION__);
  513. GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
  514. inner_dump_info.op->GetName().c_str(), input_descs.size());
  515. return PARAM_INVALID;
  516. }
  517. std::vector<int64_t> v_memory_type;
  518. bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type);
  519. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != input_descs.size()),
  520. "DumpInput[%s], input size[%zu], input memory type size[%zu]",
  521. inner_dump_info.op->GetName().c_str(), input_descs.size(), v_memory_type.size());
  522. for (size_t i = 0; i < input_descs.size(); ++i) {
  523. aicpu::dump::Input input;
  524. std::string node_name_index;
  525. // check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF
  526. if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) {
  527. GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed");
  528. task.mutable_input()->Add(std::move(input));
  529. // normal dump without attr
  530. } else {
  531. if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
  532. GELOGI("[L1Fusion] DumpInput[%s] input[%zu] is l1 addr", inner_dump_info.op->GetName().c_str(), i);
  533. int64_t input_size = 0;
  534. if (AttrUtils::GetInt(input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
  535. GELOGI("Get aipp input size according to attr is %ld", input_size);
  536. } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
  537. REPORT_CALL_ERROR("E19999", "Get input tensor size fail in op:%s(%s), index:%zu, when DataDumper %s",
  538. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i,
  539. __FUNCTION__);
  540. GELOGE(PARAM_INVALID, "Get input size failed.");
  541. return PARAM_INVALID;
  542. }
  543. GELOGI("Get input size of l1_fusion_dump is %ld", input_size);
  544. GenerateOpBuffer(input_size, task);
  545. } else {
  546. auto addr = inner_dump_info.args + kAddrLen * i;
  547. GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed");
  548. task.mutable_input()->Add(std::move(input));
  549. }
  550. }
  551. }
  552. return SUCCESS;
  553. }
  554. void DataDumper::GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task) {
  555. aicpu::dump::OpBuffer op_buffer;
  556. op_buffer.set_buffer_type(aicpu::dump::BufferType::L1);
  557. op_buffer.set_address(reinterpret_cast<uintptr_t>(l1_fusion_addr_));
  558. op_buffer.set_size(size);
  559. task.mutable_buffer()->Add(std::move(op_buffer));
  560. }
  561. Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  562. std::string proto_str;
  563. size_t proto_size = op_mapping_info.ByteSizeLong();
  564. bool ret = op_mapping_info.SerializeToString(&proto_str);
  565. if (!ret || proto_size == 0) {
  566. REPORT_INNER_ERROR("E19999", "Serialize proto to string fail when DataDumper %s", __FUNCTION__);
  567. GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
  568. return PARAM_INVALID;
  569. }
  570. if (dev_mem_load_ != nullptr) {
  571. GELOGW("dev_mem_load_ has been used.");
  572. ReleaseDevMem(&dev_mem_load_);
  573. }
  574. rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM);
  575. if (rt_ret != RT_ERROR_NONE) {
  576. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when DataDumper %s",
  577. proto_size, rt_ret, __FUNCTION__);
  578. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  579. return RT_ERROR_TO_GE_STATUS(rt_ret);
  580. }
  581. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size)
  582. rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  583. if (rt_ret != RT_ERROR_NONE) {
  584. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when DataDumper %s",
  585. proto_size, rt_ret, __FUNCTION__);
  586. GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
  587. return RT_ERROR_TO_GE_STATUS(rt_ret);
  588. }
  589. rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size);
  590. if (rt_ret != RT_ERROR_NONE) {
  591. REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, ret:0x%X, when DataDumper %s", rt_ret, __FUNCTION__);
  592. GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
  593. return RT_ERROR_TO_GE_STATUS(rt_ret);
  594. }
  595. load_flag_ = true;
  596. GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size);
  597. return SUCCESS;
  598. }
  599. Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  600. std::string proto_str;
  601. size_t proto_size = op_mapping_info.ByteSizeLong();
  602. bool ret = op_mapping_info.SerializeToString(&proto_str);
  603. if (!ret || proto_size == 0) {
  604. REPORT_INNER_ERROR("E19999", "Serialize proto to string fail when DataDumper %s", __FUNCTION__);
  605. GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
  606. return PARAM_INVALID;
  607. }
  608. if (dev_mem_unload_ != nullptr) {
  609. GELOGW("dev_mem_unload_ has been used.");
  610. ReleaseDevMem(&dev_mem_unload_);
  611. }
  612. rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM);
  613. if (rt_ret != RT_ERROR_NONE) {
  614. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when DataDumper %s",
  615. proto_size, rt_ret, __FUNCTION__);
  616. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  617. return RT_ERROR_TO_GE_STATUS(rt_ret);
  618. }
  619. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size)
  620. rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  621. if (rt_ret != RT_ERROR_NONE) {
  622. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when DataDumper %s",
  623. proto_size, rt_ret, __FUNCTION__);
  624. GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
  625. return RT_ERROR_TO_GE_STATUS(rt_ret);
  626. }
  627. rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size);
  628. if (rt_ret != RT_ERROR_NONE) {
  629. REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, ret:0x%X, when DataDumper %s", rt_ret, __FUNCTION__);
  630. GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
  631. return RT_ERROR_TO_GE_STATUS(rt_ret);
  632. }
  633. load_flag_ = false;
  634. GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
  635. return SUCCESS;
  636. }
  637. Status DataDumper::LoadDumpInfo() {
  638. std::string dump_list_key;
  639. PrintCheckLog(dump_list_key);
  640. if (op_list_.empty()) {
  641. GELOGD("op_list_ is empty");
  642. }
  643. aicpu::dump::OpMappingInfo op_mapping_info;
  644. auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id_) + "/";
  645. op_mapping_info.set_dump_path(dump_path);
  646. op_mapping_info.set_model_name(dump_list_key);
  647. op_mapping_info.set_model_id(model_id_);
  648. op_mapping_info.set_flag(kAicpuLoadFlag);
  649. op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
  650. SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  651. auto ret = BuildTaskInfo(op_mapping_info);
  652. if (ret != SUCCESS) {
  653. GELOGE(ret, "Build task info failed");
  654. return ret;
  655. }
  656. SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info);
  657. SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info);
  658. if (!op_list_.empty() || is_op_debug_ || is_end_graph_) {
  659. auto ret = ExecuteLoadDumpInfo(op_mapping_info);
  660. if (ret != SUCCESS) {
  661. GELOGE(ret, "Execute load dump info failed");
  662. return ret;
  663. }
  664. }
  665. return SUCCESS;
  666. }
  667. Status DataDumper::BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  668. for (const auto &op_iter : op_list_) {
  669. auto op_desc = op_iter.op;
  670. GELOGD("Op %s in model begin to add task in op_mapping_info", op_desc->GetName().c_str());
  671. aicpu::dump::Task task;
  672. task.set_end_graph(false);
  673. task.set_task_id(op_iter.task_id);
  674. task.set_stream_id(op_iter.stream_id);
  675. task.mutable_op()->set_op_name(op_desc->GetName());
  676. task.mutable_op()->set_op_type(op_desc->GetType());
  677. if (dump_properties_.GetDumpMode() == kDumpOutput) {
  678. Status ret = DumpOutput(op_iter, task);
  679. if (ret != SUCCESS) {
  680. GELOGE(ret, "Dump output failed");
  681. return ret;
  682. }
  683. op_mapping_info.mutable_task()->Add(std::move(task));
  684. continue;
  685. }
  686. if (dump_properties_.GetDumpMode() == kDumpInput) {
  687. if (op_iter.is_task) {
  688. Status ret = DumpInput(op_iter, task);
  689. if (ret != SUCCESS) {
  690. GELOGE(ret, "Dump input failed");
  691. return ret;
  692. }
  693. }
  694. op_mapping_info.mutable_task()->Add(std::move(task));
  695. continue;
  696. }
  697. if (dump_properties_.GetDumpMode() == kDumpAll || is_op_debug_) {
  698. auto ret = DumpOutput(op_iter, task);
  699. if (ret != SUCCESS) {
  700. GELOGE(ret, "Dump output failed when in dumping all");
  701. return ret;
  702. }
  703. if (op_iter.is_task) {
  704. ret = DumpInput(op_iter, task);
  705. if (ret != SUCCESS) {
  706. GELOGE(ret, "Dump input failed when in dumping all");
  707. return ret;
  708. }
  709. }
  710. op_mapping_info.mutable_task()->Add(std::move(task));
  711. continue;
  712. }
  713. }
  714. return SUCCESS;
  715. }
  716. void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
  717. aicpu::dump::OpMappingInfo &op_mapping_info) {
  718. if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput ||
  719. dump_properties_.GetDumpMode() == kDumpAll) {
  720. aicpu::dump::Task task;
  721. task.set_end_graph(true);
  722. task.set_task_id(end_graph_task_id_);
  723. task.set_stream_id(end_graph_stream_id_);
  724. task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH);
  725. task.mutable_op()->set_op_type(ENDGRAPH);
  726. op_mapping_info.mutable_task()->Add(std::move(task));
  727. is_end_graph_ = true;
  728. if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) {
  729. GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u",
  730. op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_);
  731. return;
  732. }
  733. GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
  734. }
  735. }
  736. void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
  737. aicpu::dump::OpMappingInfo &op_mapping_info) {
  738. if (is_op_debug_) {
  739. GELOGI("add op_debug_info to aicpu, task_id is %u, stream_id is %u", task_id, stream_id);
  740. aicpu::dump::Task task;
  741. task.set_end_graph(false);
  742. task.set_task_id(task_id);
  743. task.set_stream_id(stream_id);
  744. task.mutable_op()->set_op_name(NODE_NAME_OP_DEBUG);
  745. task.mutable_op()->set_op_type(OP_TYPE_OP_DEBUG);
  746. // set output
  747. aicpu::dump::Output output;
  748. output.set_data_type(DT_UINT8);
  749. output.set_format(FORMAT_ND);
  750. output.mutable_shape()->add_dim(kOpDebugShape);
  751. output.set_original_name(NODE_NAME_OP_DEBUG);
  752. output.set_original_output_index(0);
  753. output.set_original_output_format(FORMAT_ND);
  754. output.set_original_output_data_type(DT_UINT8);
  755. // due to lhisi virtual addr bug, cannot use args now
  756. output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr)));
  757. output.set_size(kOpDebugSize);
  758. task.mutable_output()->Add(std::move(output));
  759. op_mapping_info.mutable_task()->Add(std::move(task));
  760. }
  761. }
  762. Status DataDumper::UnloadDumpInfo() {
  763. if (!load_flag_) {
  764. load_flag_ = false;
  765. return SUCCESS;
  766. }
  767. GELOGI("UnloadDumpInfo start.");
  768. aicpu::dump::OpMappingInfo op_mapping_info;
  769. op_mapping_info.set_model_id(model_id_);
  770. op_mapping_info.set_flag(kAicpuUnloadFlag);
  771. for (const auto &op_iter : op_list_) {
  772. aicpu::dump::Task task;
  773. task.set_task_id(op_iter.task_id);
  774. task.set_stream_id(op_iter.stream_id);
  775. op_mapping_info.mutable_task()->Add(std::move(task));
  776. }
  777. auto ret = ExecuteUnLoadDumpInfo(op_mapping_info);
  778. if (ret != SUCCESS) {
  779. GELOGE(ret, "Execute unload dump info failed");
  780. return ret;
  781. }
  782. return SUCCESS;
  783. }
  784. void DataDumper::DumpShrink() {
  785. compute_graph_.reset();
  786. input_map_.clear();
  787. ref_info_.clear();
  788. }
  789. void DataDumper::PrintCheckLog(string &dump_list_key) {
  790. std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
  791. if (model_list.empty()) {
  792. return;
  793. }
  794. bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
  795. bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
  796. dump_list_key = not_find_by_omname ? model_name_ : om_name_;
  797. GELOGI("%zu op need dump in known shape model %s.", op_list_.size(), dump_list_key.c_str());
  798. if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
  799. if (not_find_by_omname && not_find_by_modelname) {
  800. std::string model_list_str;
  801. for (auto &model : model_list) {
  802. model_list_str += "[" + model + "].";
  803. }
  804. GELOGW("Model %s will not be set to dump, dump list: %s", dump_list_key.c_str(), model_list_str.c_str());
  805. return;
  806. }
  807. }
  808. std::set<std::string> config_dump_op_list = dump_properties_.GetPropertyValue(dump_list_key);
  809. std::set<std::string> dump_op_list;
  810. for (auto &inner_dump_info : op_list_) {
  811. // oplist value OpDescPtr is not nullptr
  812. dump_op_list.insert(inner_dump_info.op->GetName());
  813. }
  814. for (auto &dump_op : config_dump_op_list) {
  815. if (dump_op_list.find(dump_op) == dump_op_list.end()) {
  816. GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str());
  817. }
  818. }
  819. }
  820. Status DataDumper::DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file) {
  821. GELOGI("Start to dump exception input");
  822. for (size_t i = 0; i < op_desc_info.input_addrs.size(); i++) {
  823. if (Debug::DumpDevMem(dump_file.data(), op_desc_info.input_addrs.at(i), op_desc_info.input_size.at(i)) != SUCCESS) {
  824. GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i);
  825. return PARAM_INVALID;
  826. }
  827. }
  828. return SUCCESS;
  829. }
  830. Status DataDumper::DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file) {
  831. GELOGI("Start to dump exception output");
  832. for (size_t i = 0; i < op_desc_info.output_addrs.size(); i++) {
  833. if (Debug::DumpDevMem(dump_file.data(), op_desc_info.output_addrs.at(i), op_desc_info.output_size.at(i)) !=
  834. SUCCESS) {
  835. GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i);
  836. return PARAM_INVALID;
  837. }
  838. }
  839. return SUCCESS;
  840. }
  841. Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exception_infos) {
  842. GELOGI("Start to dump exception info");
  843. for (const rtExceptionInfo &iter : exception_infos) {
  844. OpDescInfo op_desc_info;
  845. if (GetOpDescInfo(iter.streamid, iter.taskid, op_desc_info)) {
  846. toolkit::dumpdata::DumpData dump_data;
  847. dump_data.set_version("2.0");
  848. dump_data.set_dump_time(GetNowTime());
  849. dump_data.set_op_name(op_desc_info.op_name);
  850. for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) {
  851. toolkit::dumpdata::OpInput input;
  852. input.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.input_data_type[i])));
  853. input.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.input_format[i]));
  854. for (auto dim : op_desc_info.input_shape[i]) {
  855. input.mutable_shape()->add_dim(dim);
  856. }
  857. input.set_size(op_desc_info.input_size[i]);
  858. GELOGI("The input size int exception is %ld", op_desc_info.input_size[i]);
  859. dump_data.mutable_input()->Add(std::move(input));
  860. }
  861. for (size_t j = 0; j < op_desc_info.output_format.size(); ++j) {
  862. toolkit::dumpdata::OpOutput output;
  863. output.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.output_data_type[j])));
  864. output.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.output_format[j]));
  865. for (auto dim : op_desc_info.output_shape[j]) {
  866. output.mutable_shape()->add_dim(dim);
  867. }
  868. output.set_size(op_desc_info.output_size[j]);
  869. GELOGI("The output size int exception is %ld", op_desc_info.output_size[j]);
  870. dump_data.mutable_output()->Add(std::move(output));
  871. }
  872. uint64_t now_time = GetNowTime();
  873. std::string op_name = op_desc_info.op_name;
  874. std::string op_type = op_desc_info.op_type;
  875. ReplaceStringElem(op_name);
  876. ReplaceStringElem(op_type);
  877. string dump_file_path =
  878. "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time);
  879. GELOGI("The exception dump file path is %s", dump_file_path.c_str());
  880. uint64_t proto_size = dump_data.ByteSizeLong();
  881. std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
  882. bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
  883. if (!ret || proto_size == 0) {
  884. REPORT_INNER_ERROR("E19999", "Serialize proto to string fail when DataDumper %s", __FUNCTION__);
  885. GELOGE(PARAM_INVALID, "Dump data proto serialize failed");
  886. return PARAM_INVALID;
  887. }
  888. GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), &proto_size, sizeof(uint64_t)),
  889. "Failed to dump proto size");
  890. GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), proto_msg.get(), proto_size),
  891. "Failed to dump proto msg");
  892. if (DumpExceptionInput(op_desc_info, dump_file_path) != SUCCESS) {
  893. GELOGE(PARAM_INVALID, "Dump exception input failed");
  894. return PARAM_INVALID;
  895. }
  896. if (DumpExceptionOutput(op_desc_info, dump_file_path) != SUCCESS) {
  897. GELOGE(PARAM_INVALID, "Dump exception output failed");
  898. return PARAM_INVALID;
  899. }
  900. GELOGI("Dump exception info SUCCESS");
  901. } else {
  902. GELOGE(PARAM_INVALID, "Get op desc info failed,task id:%u,stream id:%u", iter.taskid, iter.streamid);
  903. return PARAM_INVALID;
  904. }
  905. }
  906. return SUCCESS;
  907. }
  908. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示