From b32fac224d672639021f213b1f9e10cbb71fe6da Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Tue, 30 Mar 2021 20:02:59 +0800 Subject: [PATCH] add error msg --- ge/graph/manager/graph_var_manager.cc | 2 +- ge/graph/manager/memory_api.cc | 4 +- ge/graph/preprocess/multi_batch_copy_graph.cc | 291 ++++++++++++++++-- ge/graph/preprocess/multi_batch_options.cc | 48 +++ inc/framework/common/util.h | 2 + 5 files changed, 316 insertions(+), 31 deletions(-) diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index f83353ff..de8efd29 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -990,7 +990,7 @@ VarManager *VarManagerPool::GetVarManager(uint64_t session_id) { VarManager *var_manager = new (std::nothrow) VarManager(session_id); if (var_manager == nullptr) { - REPORT_INNER_ERROR("E19999", "New VarManager fali, session_id:%lu, when VarManager %s", session_id, __FUNCTION__); + REPORT_INNER_ERROR("E19999", "New VarManager fail, session_id:%lu, when VarManager %s", session_id, __FUNCTION__); GELOGE(INTERNAL_ERROR, "VarManager::Instance find session by " "session_id[%lu] failed.", diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 9843634d..7f977756 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -75,8 +75,8 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t HcclResult hccl_ret = hcom_remote_mem_register(reg_addrs.get(), table_len); if (hccl_ret != HCCL_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Call hcom_remote_mem_register failed, ret:%u, when %s", - hccl_ret.c_str(), __FUNCTION__); + REPORT_CALL_ERROR("E19999", "Call hcom_remote_mem_register failed, ret:%d, when %s", + hccl_ret, __FUNCTION__); GELOGE(HCCL_E_INTERNAL, "Rdma mem register failed, ret: 0x%X", hccl_ret); return HCCL_E_INTERNAL; } diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index 12987f29..d4ccf060 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -78,6 +78,7 @@ inline bool IsGetNextType(const NodePtr &node) { NodePtr InsertMergeNodeToGraph(const std::string &name, size_t input_num, const ComputeGraphPtr &graph) { OpDescPtr desc = MakeShared(); if (desc == nullptr) { + REPORT_CALL_ERROR("E19999", "New OpDesc failed when %s", __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to insert merge node, name %s", name.c_str()); return nullptr; } @@ -87,24 +88,33 @@ NodePtr InsertMergeNodeToGraph(const std::string &name, size_t input_num, const for (size_t i = 0; i < input_num; ++i) { auto ret = desc->AddInputDesc("x" + std::to_string(i), tensor_desc); GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed, input desc name:%s, when %s", + desc->GetName().c_str(), desc->GetType().c_str(), + ("x" + std::to_string(i)).c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to create merge node %s, failed to add input %zu, error-code %u", name.c_str(), i, ret); return nullptr); } auto ret = desc->AddOutputDesc("y", tensor_desc); GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add output desc to op:%s(%s) failed, output desc name:%s, when %s", + desc->GetName().c_str(), desc->GetType().c_str(), "y", __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to create merge node %s, failed to add output 'y', error-code %u", name.c_str(), ret); return nullptr); tensor_desc.SetDataType(DT_INT32); ret = desc->AddOutputDesc("value_index", tensor_desc); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add output desc to op:%s(%s) failed, output desc name:%s, when %s", + desc->GetName().c_str(), desc->GetType().c_str(), "value_index", __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to create merge node %s, failed to add output 'value_index', error-code %u", name.c_str(), ret); return nullptr; } if (!AttrUtils::SetBool(desc, ATTR_INSERT_BY_MBATCH, true)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed when %s", ATTR_INSERT_BY_MBATCH.c_str(), + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to create merge node %s, failed to add attr", name.c_str()); return nullptr; } @@ -114,13 +124,18 @@ NodePtr InsertMergeNodeToGraph(const std::string &name, size_t input_num, const NodePtr InsertCopyNode(const NodePtr &node, size_t n) { const std::string &name = node->GetName() + "_ascend_mbatch_batch_" + std::to_string(n); auto src_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(src_op_desc == nullptr, GELOGE(INTERNAL_ERROR, "Failed to copy node %s to %s, the OpDesc is null", - node->GetName().c_str(), name.c_str()); + GE_IF_BOOL_EXEC(src_op_desc == nullptr, + REPORT_INNER_ERROR("E19999", "Param opdesc in node is nullptr, check invalid when %s", __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to copy node %s to %s, the OpDesc is null", + node->GetName().c_str(), name.c_str()); return nullptr); auto desc = AttrUtils::CopyOpDesc(src_op_desc); - GE_IF_BOOL_EXEC(desc == nullptr, GELOGE(OUT_OF_MEMORY, "Failed to create op desc for copy node for node %s name %s", - node->GetName().c_str(), name.c_str()); + GE_IF_BOOL_EXEC(desc == nullptr, + REPORT_CALL_ERROR("E19999", "Copy OpDesc from op:%s(%s) failed when %s", + src_op_desc->GetName().c_str(), src_op_desc->GetType().c_str(), __FUNCTION__); + GELOGE(OUT_OF_MEMORY, "Failed to create op desc for copy node for node %s name %s", + node->GetName().c_str(), name.c_str()); return nullptr); desc->SetName(name); @@ -128,6 +143,8 @@ NodePtr InsertCopyNode(const NodePtr &node, size_t n) { for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { auto input_desc = desc->MutableInputDesc(i); GE_IF_BOOL_EXEC(input_desc == nullptr, + REPORT_INNER_ERROR("E19999", "Input desc of op:%s(%s) not exist, index:%u, check invalid when %s", + desc->GetName().c_str(), desc->GetType().c_str(), i, __FUNCTION__); GELOGW("Get null input desc by index %u from node %s when copy from %s", i, desc->GetName().c_str(), node->GetName().c_str()); continue); @@ -137,6 +154,8 @@ NodePtr InsertCopyNode(const NodePtr &node, size_t n) { for (uint32_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) { auto output_desc = desc->MutableOutputDesc(i); GE_IF_BOOL_EXEC(output_desc == nullptr, + REPORT_INNER_ERROR("E19999", "Ouput desc of op:%s(%s) not exist, index:%u, check invalid when %s", + desc->GetName().c_str(), desc->GetType().c_str(), i, __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to get output desc by index %u from node %s when copy from %s", i, desc->GetName().c_str(), node->GetName().c_str()); return nullptr); @@ -145,6 +164,8 @@ NodePtr InsertCopyNode(const NodePtr &node, size_t n) { } const std::string &batch_label = "Batch_" + std::to_string(n); if (!AttrUtils::SetStr(desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed when %s", ATTR_NAME_BATCH_LABEL.c_str(), + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(FAILED, "set attr ATTR_NAME_BATCH_LABEL failed, node:%s.", name.c_str()); return nullptr; } @@ -167,6 +188,7 @@ bool IsAllDimsPositive(const std::vector &dims) { NodePtr InsertConst(const std::string &name, const ComputeGraphPtr &graph) { auto desc = MakeShared(); if (desc == nullptr) { + REPORT_CALL_ERROR("E19999", "New OpDesc failed when %s", __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to create const op %s, out of memory", name.c_str()); return nullptr; } @@ -175,14 +197,20 @@ NodePtr InsertConst(const std::string &name, const ComputeGraphPtr &graph) { GeTensor tensor; tensor.SetData(std::vector({0})); if (!AttrUtils::SetTensor(desc, ATTR_NAME_WEIGHTS, tensor)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed when %s", ATTR_NAME_WEIGHTS.c_str(), + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to init tensor value for const %s", name.c_str()); return nullptr; } if (!AttrUtils::SetBool(desc, ATTR_INSERT_BY_MBATCH, true)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed when %s", ATTR_INSERT_BY_MBATCH.c_str(), + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to set insert flag for const node %s", name.c_str()); return nullptr; } if (desc->AddOutputDesc(GeTensorDesc()) != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add output desc to op:%s(%s) failed when %s", + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to add output desc for const node %s", name.c_str()); return nullptr; } @@ -371,6 +399,9 @@ Status MultiBatchGraphCopyer::GetEnterNodesGroupByFrame(mapGetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__); GELOGE(FAILED, "Get attr frame_name of enter[%s] failed.", node->GetName().c_str()); return FAILED; } @@ -578,7 +609,9 @@ Status MultiBatchGraphCopyer::LabelInBatchBranchStatus() { GELOGD("Start label in batch branch status."); for (const auto &data : origin_data_nodes_) { auto op_desc = data->GetOpDesc(); - GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(PARAM_INVALID, "Op desc is nullptr."); + GE_IF_BOOL_EXEC(op_desc == nullptr, + REPORT_INNER_ERROR("E19999", "op_desc in origin_data_nodes_ is nullptr, check invalid when %s", __FUNCTION__); + GELOGE(PARAM_INVALID, "Op desc is nullptr."); return PARAM_INVALID); LabelStatusForData(data); if (!GetLocalOmgContext().dynamic_node_type.empty()) { @@ -853,6 +886,9 @@ NodePtr MultiBatchGraphCopyer::FindSwitchnNodeForDataEdge(const OutDataAnchorPtr GELOGI("The output idx %d has %zu referenced nums.", output_idx, data_out_anchor->GetPeerInDataAnchors().size()); for (const auto &peer_in_anchor : data_out_anchor->GetPeerInDataAnchors()) { if (peer_in_anchor->GetOwnerNode()->GetOpDesc() == nullptr) { + REPORT_INNER_ERROR("E19999", "peer op_desc of op:%s(%s)'s out_index:%d anchor exist nullptr, " + "check invalid when MultiBatchGraphCopyer %s", + data_node->GetName().c_str(), data_node->GetType().c_str(), output_idx, __FUNCTION__); GELOGE(INTERNAL_ERROR, "Op desc should not be nullptr."); return nullptr; } @@ -862,6 +898,11 @@ NodePtr MultiBatchGraphCopyer::FindSwitchnNodeForDataEdge(const OutDataAnchorPtr } if (output_idx >= static_cast(getnext_nodes_to_switchn_.size()) || referenced_index >= getnext_nodes_to_switchn_.at(output_idx).size()) { + REPORT_INNER_ERROR("E19999", "output_index:%d of op:%s(%s) > getnext_nodes_to_switchn_.size():%zu or " + "referenced_index:%zu >= getnext_nodes_to_switchn_.at(output_idx).size():%zu, " + "check invalid when %s", output_idx, + data_node->GetName().c_str(), data_node->GetType().c_str(), getnext_nodes_to_switchn_.size(), + referenced_index, getnext_nodes_to_switchn_.at(output_idx).size(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Output idx is %d, referenced index is %zu", output_idx, referenced_index); return nullptr; } @@ -891,6 +932,10 @@ Status MultiBatchGraphCopyer::CopyInDataEdges(const NodePtr &origin_node, int ba if (switchn != nullptr) { auto ret = GraphUtils::AddEdge(switchn->GetOutDataAnchor(batch_num), dst_anchor); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(out_index:%d) and op:%s(%s)(in_index:%d) failed " + "when MultiBatchGraphCopyer %s", switchn->GetName().c_str(), switchn->GetType().c_str(), + batch_num, copyed_node->GetName().c_str(), copyed_node->GetType().c_str(), + in_anchor->GetIdx(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add data edge between %s(%d) to %s(%d), error-code %u", switchn->GetName().c_str(), batch_num, copyed_node->GetName().c_str(), in_anchor->GetIdx(), ret); @@ -906,6 +951,11 @@ Status MultiBatchGraphCopyer::CopyInDataEdges(const NodePtr &origin_node, int ba auto src_batch_node = batch_branch_iter->second.at(batch_num); auto ret = GraphUtils::AddEdge(src_batch_node->GetOutDataAnchor(origin_src_anchor->GetIdx()), dst_anchor); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(out_index:%d) and op:%s(%s)(in_index:%d) failed " + "when MultiBatchGraphCopyer %s", src_batch_node->GetName().c_str(), + src_batch_node->GetType().c_str(), origin_src_anchor->GetIdx(), + copyed_node->GetName().c_str(), copyed_node->GetType().c_str(), + in_anchor->GetIdx(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add data edge between %s(%d) to %s(%d), error-code %u", src_batch_node->GetName().c_str(), batch_num, copyed_node->GetName().c_str(), in_anchor->GetIdx(), ret); return INTERNAL_ERROR; @@ -917,6 +967,11 @@ Status MultiBatchGraphCopyer::CopyInDataEdges(const NodePtr &origin_node, int ba auto ret = GraphUtils::AddEdge(origin_src_anchor, dst_anchor); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(out_index:%d) and op:%s(%s)(in_index:%d) failed " + "when MultiBatchGraphCopyer %s", origin_src_node->GetName().c_str(), + origin_src_node->GetType().c_str(), origin_src_anchor->GetIdx(), + copyed_node->GetName().c_str(), copyed_node->GetType().c_str(), + in_anchor->GetIdx(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add data edge between origin node %s(%d) to copyed %s(%d)", origin_src_node->GetName().c_str(), origin_src_anchor->GetIdx(), copyed_node->GetName().c_str(), dst_anchor->GetIdx()); @@ -936,6 +991,10 @@ Status MultiBatchGraphCopyer::CopyInControlEdges(const NodePtr &node, int batch_ // reconnect data node auto ret = GraphUtils::AddEdge(switchn_iter->second->GetOutControlAnchor(), copyed_node->GetInControlAnchor()); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add ctrl edge between op:%s(%s) and op:%s(%s) failed " + "when MultiBatchGraphCopyer %s", + switchn_iter->second->GetName().c_str(), switchn_iter->second->GetType().c_str(), + copyed_node->GetName().c_str(), copyed_node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add control edge between %s to %s, error-code %u", switchn_iter->second->GetName().c_str(), copyed_node->GetName().c_str(), ret); return INTERNAL_ERROR; @@ -950,6 +1009,10 @@ Status MultiBatchGraphCopyer::CopyInControlEdges(const NodePtr &node, int batch_ auto src_batch_node = batch_branch_iter->second.at(batch_num); auto ret = GraphUtils::AddEdge(src_batch_node->GetOutControlAnchor(), copyed_node->GetInControlAnchor()); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add ctrl edge between op:%s(%s) and op:%s(%s) failed " + "when MultiBatchGraphCopyer %s", + src_batch_node->GetName().c_str(), src_batch_node->GetType().c_str(), + copyed_node->GetName().c_str(), copyed_node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add data edge between %s to %s, error-code %u", src_batch_node->GetName().c_str(), copyed_node->GetName().c_str(), ret); return INTERNAL_ERROR; @@ -960,6 +1023,10 @@ Status MultiBatchGraphCopyer::CopyInControlEdges(const NodePtr &node, int batch_ auto ret = GraphUtils::AddEdge(origin_src_node->GetOutControlAnchor(), copyed_node->GetInControlAnchor()); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add ctrl edge between op:%s(%s) and op:%s(%s) failed " + "when MultiBatchGraphCopyer %s", + origin_src_node->GetName().c_str(), origin_src_node->GetType().c_str(), + copyed_node->GetName().c_str(), copyed_node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add control edge from origin %s to copyed %s", origin_src_node->GetName().c_str(), copyed_node->GetName().c_str()); return INTERNAL_ERROR; @@ -973,6 +1040,7 @@ Status MultiBatchGraphCopyer::CopyInControlEdges(const NodePtr &node, int batch_ NodePtr MultiBatchGraphCopyer::InsertShapeDataNode() { auto desc = MakeShared(); if (desc == nullptr) { + REPORT_CALL_ERROR("E19999", "New OpDesc failed when MultiBatchGraphCopyer %s", __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to create shape data node, out of memory"); return nullptr; } @@ -987,27 +1055,38 @@ NodePtr MultiBatchGraphCopyer::InsertShapeDataNode() { GeTensorDesc tensor_desc(GeShape({static_cast(shapes_.at(0).size())}), FORMAT_ND, DT_INT64); auto ret = desc->AddInputDesc(tensor_desc); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed when MultiBatchGraphCopyer %s", + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); return nullptr; } ret = desc->AddOutputDesc(tensor_desc); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add output desc into op:%s(%s) failed when MultiBatchGraphCopyer %s", + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data"); return nullptr; } if (!AttrUtils::SetBool(desc, ATTR_INSERT_BY_MBATCH, true)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_INSERT_BY_MBATCH.c_str(), desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add attr for created data"); return nullptr; } auto data_node = graph_->AddNode(desc); if (data_node == nullptr) { + REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s failed when MultiBatchGraphCopyer %s", + desc->GetName().c_str(), desc->GetType().c_str(), graph_->GetName().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add shape data node to graph"); return nullptr; } ret = GraphUtils::AppendInputNode(graph_, data_node); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Append input node:%s(%s) to graph:%s failed when MultiBatchGraphCopyer %s", + data_node->GetName().c_str(), data_node->GetType().c_str(), + graph_->GetName().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to append data node %s as input to graph", data_node->GetName().c_str()); return nullptr; } @@ -1019,6 +1098,7 @@ NodePtr MultiBatchGraphCopyer::InsertGetDynamicDimsNode() { GELOGD("Start insert getdynamicdims node to get shape info."); auto desc = MakeShared(); if (desc == nullptr) { + REPORT_CALL_ERROR("E19999", "New OpDesc failed when MultiBatchGraphCopyer %s", __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to create shape data node, out of memory"); return nullptr; } @@ -1040,33 +1120,49 @@ NodePtr MultiBatchGraphCopyer::InsertGetDynamicDimsNode() { tensor_desc.SetFormat(FORMAT_ND); tensor_desc.SetDataType(DT_INT64); auto ret = desc->AddInputDesc(tensor_desc); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); - return nullptr); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed when MultiBatchGraphCopyer %s", + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); + return nullptr); continue; } GeTensorDesc tensor_desc(GeShape({static_cast(input_shape_dims)}), FORMAT_ND, DT_INT64); auto ret = desc->AddInputDesc(tensor_desc); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); - return nullptr); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed when MultiBatchGraphCopyer %s", + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); + return nullptr); } GeTensorDesc tensor_desc(GeShape({static_cast(shapes_.at(0).size())}), FORMAT_ND, DT_INT64); auto ret = desc->AddOutputDesc(tensor_desc); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data"); - return nullptr); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add output desc to op:%s(%s) failed when MultiBatchGraphCopyer %s", + desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data"); + return nullptr); if (!AttrUtils::SetBool(desc, ATTR_INSERT_BY_MBATCH, true)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_INSERT_BY_MBATCH.c_str(), desc->GetName().c_str(), desc->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add attr for created data"); return nullptr; } auto data_node = graph_->AddNode(desc); if (data_node == nullptr) { + REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s failed when MultiBatchGraphCopyer %s", + desc->GetName().c_str(), desc->GetType().c_str(), graph_->GetName().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add shape data node to graph"); return nullptr; } ret = GraphUtils::AppendInputNode(graph_, data_node); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Append input node:%s(%s) to graph:%s failed when MultiBatchGraphCopyer %s", + data_node->GetName().c_str(), data_node->GetType().c_str(), + graph_->GetName().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to append data node %s as input to graph", data_node->GetName().c_str()); return nullptr; } @@ -1076,6 +1172,7 @@ NodePtr MultiBatchGraphCopyer::InsertGetDynamicDimsNode() { Status MultiBatchGraphCopyer::CheckArguments() { if (graph_ == nullptr) { + REPORT_INNER_ERROR("E19999", "graph_ is nullptr, check invalid when MultiBatchGraphCopyer %s", __FUNCTION__); GELOGE(PARAM_INVALID, "Failed to copy graph, the graph is null"); return PARAM_INVALID; } @@ -1122,6 +1219,10 @@ Status MultiBatchGraphCopyer::LinkDataToMerge(const NodePtr &data, const NodePtr for (size_t i = 0; i < shapes_.size(); ++i) { auto ret = GraphUtils::AddEdge(switchn->GetOutDataAnchor(i), merge->GetInDataAnchor(i)); GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%zu) and op:%s(%s)(index:%zu) failed " + "when MultiBatchGraphCopyer %s", + switchn->GetName().c_str(), switchn->GetType().c_str(), i, + merge->GetName().c_str(), merge->GetType().c_str(), i, __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add edge between switchn %s(%zu) to merge %s(%zu), error-code %u", switchn->GetName().c_str(), i, merge->GetName().c_str(), i, ret); return INTERNAL_ERROR); @@ -1132,6 +1233,10 @@ Status MultiBatchGraphCopyer::LinkDataToMerge(const NodePtr &data, const NodePtr Status MultiBatchGraphCopyer::LinkNodeToMerge(const NodePtr &node, int out_index, const NodePtr &merge) { auto ©ed_nodes = nodes_to_batch_nodes_[node.get()]; if (copyed_nodes.size() != shapes_.size()) { + REPORT_INNER_ERROR("E19999", "Create merge node for node %s failed, " + "the copyed nodes for it count %zu different with shape %zu, " + "check invalid when MultiBatchGraphCopyer %s", + node->GetName().c_str(), copyed_nodes.size(), shapes_.size(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to create merge node for node %s, the copyed nodes for it count %zu different with shape %zu", node->GetName().c_str(), copyed_nodes.size(), shapes_.size()); @@ -1153,14 +1258,23 @@ Status MultiBatchGraphCopyer::LinkNodeToMerge(const NodePtr &node, int out_index return OUT_OF_MEMORY); auto ret = GraphUtils::AddEdge(src_node->GetOutControlAnchor(), const_node->GetInControlAnchor()); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add control edge from %s to %s", - src_node->GetName().c_str(), const_node->GetName().c_str()); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add ctrl edge between op:%s(%s) and op:%s(%s) failed " + "when MultiBatchGraphCopyer %s", + src_node->GetName().c_str(), src_node->GetType().c_str(), + const_node->GetName().c_str(), const_node->GetType().c_str(), __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to add control edge from %s to %s", + src_node->GetName().c_str(), const_node->GetName().c_str()); return INTERNAL_ERROR); src_node = const_node; } auto ret = GraphUtils::AddEdge(src_node->GetOutDataAnchor(out_index), merge->GetInDataAnchor(i)); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%d) and op:%s(%s)(index:%zu) failed " + "when MultiBatchGraphCopyer %s", + src_node->GetName().c_str(), src_node->GetType().c_str(), out_index, + merge->GetName().c_str(), merge->GetType().c_str(), i, __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add edge between copyed node %s(%d) to inserted merge node %s(%zu), error-code %u", copyed_nodes[i]->GetName().c_str(), out_index, merge->GetName().c_str(), i, ret); @@ -1219,6 +1333,8 @@ Status MultiBatchGraphCopyer::UpdateShapeOfShapeNode(const NodePtr &node, size_t GeShape output_shape(output_dims); output_desc.SetShape(output_shape); if (node->GetOpDesc()->UpdateOutputDesc(shape_index, output_desc) != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Update output desc to op:%s(%s) failed, index:%zu, when MultiBatchGraphCopyer %s", + node->GetName().c_str(), node->GetType().c_str(), shape_index, __FUNCTION__); GELOGE(FAILED, "Update output desc fail."); return FAILED; } @@ -1253,6 +1369,10 @@ Status MultiBatchGraphCopyer::UpdateMaxShapeToData(const NodePtr &node, size_t o int64_t size = 1; for (auto dim : data_to_dynamic_info_.at(data_name).at(i)) { if (INT64_MAX / dim < size) { + REPORT_CALL_ERROR("E19999", "Op:%s(%s)'s shape:%s size will overflow after multi, " + "check invalid when MultiBatchGraphCopyer %s", + node->GetName().c_str(), node->GetType().c_str(), + formats::ShapeToString(data_to_dynamic_info_[data_name].at(i)).c_str(), __FUNCTION__); GELOGE(PARAM_INVALID, "The shape %s size overflow", formats::ShapeToString(data_to_dynamic_info_[data_name].at(i)).c_str()); return PARAM_INVALID; @@ -1300,6 +1420,7 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &node, const si auto switchn_desc = MakeShared(); GE_IF_BOOL_EXEC(switchn_desc == nullptr, + REPORT_CALL_ERROR("E19999", "New OpDesc failed when MultiBatchGraphCopyer %s", __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to create switchn for data %s", node->GetName().c_str()); return OUT_OF_MEMORY); string switchn_name = node->GetName() + "_ascend_mbatch_switchn"; @@ -1313,10 +1434,18 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &node, const si GeTensorDesc tensor(NodeUtils::GetOutputDesc(*node, out_anchor_index)); GE_IF_BOOL_EXEC(switchn_desc->AddInputDesc("data", tensor) != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed, input desc name:%s, " + "when MultiBatchGraphCopyer %s", + switchn_desc->GetName().c_str(), switchn_desc->GetType().c_str(), + "data", __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to add input tensor desc for %s", switchn_desc->GetName().c_str()); return OUT_OF_MEMORY); GeTensorDesc pred_tensor; GE_IF_BOOL_EXEC(switchn_desc->AddInputDesc("pred_value", pred_tensor) != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed, input desc name:%s, " + "when MultiBatchGraphCopyer %s", + switchn_desc->GetName().c_str(), switchn_desc->GetType().c_str(), + "pred_value", __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to add input pred tensor desc for %s", switchn_desc->GetName().c_str()); return OUT_OF_MEMORY); std::vector input_dims_str; @@ -1340,11 +1469,18 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &node, const si formats::JoinToString(tensor.GetShape().GetDims()); input_dims_str.emplace_back(input_str); if (!AttrUtils::SetListInt(tensor, ATTR_NAME_SWITCHN_PRED_VALUE, shapes_.at(i))) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to output tensor of node:%s(%s) failed, index:%zu, " + "when MultiBatchGraphCopyer %s", ATTR_NAME_SWITCHN_PRED_VALUE.c_str(), + node->GetName().c_str(), node->GetType().c_str(), out_anchor_index, __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add attr value on output %zu tensor", i); return INTERNAL_ERROR; } (void) AttrUtils::SetListInt(tensor, ATTR_NAME_COMBINED_DYNAMIC_DIMS, shape.GetDims()); if (switchn_desc->AddOutputDesc("output" + std::to_string(i), tensor) != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add output desc to op:%s(%s) failed, output desc name:%s, " + "when MultiBatchGraphCopyer %s", + switchn_desc->GetName().c_str(), switchn_desc->GetType().c_str(), + ("output" + std::to_string(i)).c_str(), __FUNCTION__); GELOGE(GRAPH_FAILED, "Opdesc AddOutputDesc failed"); return GRAPH_FAILED; } @@ -1352,15 +1488,23 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &node, const si } (void)AttrUtils::SetListStr(node->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); if (!AttrUtils::SetListStr(switchn_desc, ATTR_USER_DESIGNEATE_SHAPE_ORDER, data_name_order_)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_USER_DESIGNEATE_SHAPE_ORDER.c_str(), + switchn_desc->GetName().c_str(), switchn_desc->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add user designate shape order attr on switchn node %s", switchn_desc->GetName().c_str()); return INTERNAL_ERROR; } if (!AttrUtils::SetBool(switchn_desc, ATTR_INSERT_BY_MBATCH, true)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_INSERT_BY_MBATCH.c_str(), switchn_desc->GetName().c_str(), switchn_desc->GetType().c_str(), + __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add insert attr on switchn node %s", switchn_desc->GetName().c_str()); return INTERNAL_ERROR; } if (!AttrUtils::SetStr(node->GetOpDesc(), kMbatchSwitchnName, switchn_desc->GetName())) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + kMbatchSwitchnName, node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add switchn attr on data node %s", node->GetName().c_str()); return INTERNAL_ERROR; } @@ -1371,6 +1515,9 @@ Status MultiBatchGraphCopyer::InsertSwitchNForData(const NodePtr &node, const si auto switchn = graph_->AddNode(switchn_desc); GE_IF_BOOL_EXEC(switchn == nullptr, + REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s failed when MultiBatchGraphCopyer %s", + switchn_desc->GetName().c_str(), switchn_desc->GetType().c_str(), + graph_->GetName().c_str(), __FUNCTION__); GELOGE(OUT_OF_MEMORY, "Failed to create switchn %s from desc", switchn_desc->GetName().c_str()); return OUT_OF_MEMORY); if (!getnext_sink_dynamic_dims_) { @@ -1416,6 +1563,8 @@ Status MultiBatchGraphCopyer::LinkGetDynamicDimsToNetOutput(const NodePtr &node) if (node->GetType() == NETOUTPUT) { if (!GetLocalOmgContext().dynamic_node_type.empty()) { if (!AttrUtils::SetStr(node->GetOpDesc(), ATTR_ALL_GEARS_INFO, GetLocalOmgContext().dynamic_dims)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_ALL_GEARS_INFO.c_str(), node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to set all gears info attr on netoutput %s.", node->GetName().c_str()); return INTERNAL_ERROR; } @@ -1423,15 +1572,26 @@ Status MultiBatchGraphCopyer::LinkGetDynamicDimsToNetOutput(const NodePtr &node) if (getnext_sink_dynamic_dims_) { size_t input_index = node->GetAllInDataAnchors().size(); if (NodeUtils::AppendInputAnchor(node, input_index + 1) != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Append %zu input anchors to node:%s(%s) failed when MultiBatchGraphCopyer %s", + input_index + 1, node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Append input anchor of %s of %zu failed.", node->GetName().c_str(), input_index); return INTERNAL_ERROR; } auto ret = ge::GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), node->GetInDataAnchor(input_index)); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link netoutput %s to getdynamicdims %s", - node->GetName().c_str(), shape_data_->GetName().c_str()); + GE_IF_BOOL_EXEC( + ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%d) and op:%s(%s)(index:%zu) failed " + "when MultiBatchGraphCopyer %s", + shape_data_->GetName().c_str(), shape_data_->GetType().c_str(), kDataOutIndex, + node->GetName().c_str(), node->GetType().c_str(), input_index, __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to link netoutput %s to getdynamicdims %s", + node->GetName().c_str(), shape_data_->GetName().c_str()); return INTERNAL_ERROR); if (!AttrUtils::SetBool(node->GetOpDesc(), ATTR_GETNEXT_SINK_DYNMAIC, true)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_GETNEXT_SINK_DYNMAIC.c_str(), node->GetName().c_str(), node->GetType().c_str(), + __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to set getnext sink dynamic attr on netoutput %s.", node->GetName().c_str()); return INTERNAL_ERROR; } @@ -1459,6 +1619,9 @@ Status MultiBatchGraphCopyer::AddAttrForGetDynamicDims(const NodePtr &node) { GELOGD("Add attr for :%s, type is %s:", shape_data_->GetName().c_str(), shape_data_->GetType().c_str()); size_t data_count = node->GetAllOutDataAnchors().size() / kDivisionConst; if (!AttrUtils::SetInt(shape_data_->GetOpDesc(), ATTR_GETNEXT_SINK_DATA_COUNT, data_count)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_GETNEXT_SINK_DATA_COUNT.c_str(), + shape_data_->GetName().c_str(), shape_data_->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_DATA_COUNT failed"); return INTERNAL_ERROR; } @@ -1475,6 +1638,9 @@ Status MultiBatchGraphCopyer::AddAttrForGetDynamicDims(const NodePtr &node) { } } if (!AttrUtils::SetListInt(shape_data_->GetOpDesc(), ATTR_GETNEXT_SINK_SHAPE_INFO, shape_info)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_GETNEXT_SINK_SHAPE_INFO.c_str(), + shape_data_->GetName().c_str(), shape_data_->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_SHAPE_INFO failed"); return INTERNAL_ERROR; } @@ -1491,8 +1657,13 @@ Status MultiBatchGraphCopyer::AddLinkForGetDynamicDims(const NodePtr &node) { shape_data_->GetName().c_str(), input_index); auto out_data_anchor = node->GetOutDataAnchor(out_index); auto ret = GraphUtils::AddEdge(out_data_anchor, shape_data_->GetInDataAnchor(input_index)); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link getnext %s to getdynamicdims %s", - node->GetName().c_str(), shape_data_->GetName().c_str()); + GE_IF_BOOL_EXEC( + ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%zu) and op:%s(%s)(index:%zu) failed " + "when MultiBatchGraphCopyer %s", node->GetName().c_str(), node->GetType().c_str(), out_index, + shape_data_->GetName().c_str(), shape_data_->GetType().c_str(), input_index, __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to link getnext %s to getdynamicdims %s", + node->GetName().c_str(), shape_data_->GetName().c_str()); return INTERNAL_ERROR); } return SUCCESS; @@ -1506,6 +1677,9 @@ Status MultiBatchGraphCopyer::LinkEdges() { if (data_nodes_to_switchn_.count(node.get()) > 0) { auto switchn = data_nodes_to_switchn_[node.get()]; GE_IF_BOOL_EXEC(switchn == nullptr, + REPORT_INNER_ERROR("E19999", "swithn in data_nodes_to_switchn_ for op:%s(%s) is nullptr, " + "check invalid when MultiBatchGraphCopyer %s", + node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); GELOGE(PARAM_INVALID, "Switchn should not be nullptr for %s.", node->GetName().c_str()); return OUT_OF_MEMORY); ret = LinkDataToSwitchN(node, switchn, kDataOutIndex); @@ -1545,14 +1719,26 @@ Status MultiBatchGraphCopyer::LinkEdges() { Status MultiBatchGraphCopyer::LinkDataToSwitchN(const NodePtr &data, const NodePtr &switchn, const int &out_index) { auto ret = GraphUtils::AddEdge(shape_data_->GetOutDataAnchor(kDataOutIndex), switchn->GetInDataAnchor(kSwitchNPredIndex)); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link shape data %s to switchn %s", - shape_data_->GetName().c_str(), switchn->GetName().c_str()); - return INTERNAL_ERROR); + GE_IF_BOOL_EXEC( + ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%d) and op:%s(%s)(index:%d) failed " + "when MultiBatchGraphCopyer %s", + shape_data_->GetName().c_str(), shape_data_->GetType().c_str(), kDataOutIndex, + switchn->GetName().c_str(), switchn->GetType().c_str(), kSwitchNPredIndex, __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to link shape data %s to switchn %s", + shape_data_->GetName().c_str(), switchn->GetName().c_str()); + return INTERNAL_ERROR); ret = GraphUtils::AddEdge(data->GetOutDataAnchor(out_index), switchn->GetInDataAnchor(kSwitchNDataIndex)); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link data %s to switchn %s", - data->GetName().c_str(), switchn->GetName().c_str()); - return INTERNAL_ERROR); + GE_IF_BOOL_EXEC( + ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%d) and op:%s(%s)(index:%d) failed " + "when MultiBatchGraphCopyer %s", + data->GetName().c_str(), data->GetType().c_str(), out_index, + switchn->GetName().c_str(), switchn->GetType().c_str(), kSwitchNDataIndex, __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to link data %s to switchn %s", + data->GetName().c_str(), switchn->GetName().c_str()); + return INTERNAL_ERROR); return SUCCESS; } @@ -1594,6 +1780,8 @@ Status MultiBatchGraphCopyer::LinkToMerge(const NodePtr &node) { } continue; } + REPORT_INNER_ERROR("E19999", "The merge node %s is created, index %zu, but can not find the src node, " + "check invalid when MultiBatchGraphCopyer %s", merge_node->GetName().c_str(), i, __FUNCTION__); GELOGE(INTERNAL_ERROR, "The merge node %s is created, index %zu, but can not find the src node", merge_node->GetName().c_str(), i); return INTERNAL_ERROR; @@ -1630,24 +1818,40 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { } auto iter = nodes_to_merge_nodes_.find(in_node.get()); if (iter == nodes_to_merge_nodes_.end()) { + REPORT_INNER_ERROR("E19999", "Failed to link data edge from %s(%s)(index:%d) to %s(%s)(index:%d), " + "cause no merge node found, check invalid when MultiBatchGraphCopyer %s", + in_node->GetName().c_str(), in_node->GetType().c_str(), src_out_anchor->GetIdx(), + node->GetName().c_str(), node->GetType().c_str(), in_data_anchor->GetIdx(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to link IO data edge from %s(%d) to %s(%d), no merge node found", in_node->GetName().c_str(), src_out_anchor->GetIdx(), node->GetName().c_str(), in_data_anchor->GetIdx()); return INTERNAL_ERROR; } auto merge_node = iter->second[src_out_anchor->GetIdx()]; if (merge_node == nullptr) { + REPORT_INNER_ERROR("E19999", "Failed to link data edge from %s(%s)(index:%d) to %s(%s)(index:%d), " + "cause no merge node found, check invalid when MultiBatchGraphCopyer %s", + in_node->GetName().c_str(), in_node->GetType().c_str(), src_out_anchor->GetIdx(), + node->GetName().c_str(), node->GetType().c_str(), in_data_anchor->GetIdx(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to link IO data edge from %s(%d) to %s(%d), no merge node found", in_node->GetName().c_str(), src_out_anchor->GetIdx(), node->GetName().c_str(), in_data_anchor->GetIdx()); return INTERNAL_ERROR; } auto ret = src_out_anchor->Unlink(in_data_anchor); if (ret != GRAPH_SUCCESS) { + REPORT_INNER_ERROR("E19999", "Unlink edge from %s(%s)(index:%d) to %s(%s)(index:%d) failed, " + "when MultiBatchGraphCopyer %s", + in_node->GetName().c_str(), in_node->GetType().c_str(), src_out_anchor->GetIdx(), + node->GetName().c_str(), node->GetType().c_str(), in_data_anchor->GetIdx(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to unlink the control edge from %s(%d) to %s(%d)", in_node->GetName().c_str(), src_out_anchor->GetIdx(), node->GetName().c_str(), in_data_anchor->GetIdx()); return INTERNAL_ERROR; } ret = GraphUtils::AddEdge(merge_node->GetOutDataAnchor(kMergeDataOutIndex), in_data_anchor); if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%d) and op:%s(%s)(index:%d) failed " + "when MultiBatchGraphCopyer %s", + merge_node->GetName().c_str(), merge_node->GetType().c_str(), kMergeDataOutIndex, + node->GetName().c_str(), node->GetType().c_str(), in_data_anchor->GetIdx(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add data edge from %s(%d) to %s(%d)", merge_node->GetName().c_str(), src_out_anchor->GetIdx(), node->GetName().c_str(), in_data_anchor->GetIdx()); return INTERNAL_ERROR; @@ -1662,28 +1866,49 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { } auto iter = nodes_to_merge_nodes_.find(in_node.get()); if (iter == nodes_to_merge_nodes_.end()) { + REPORT_INNER_ERROR("E19999", "Failed to link IO control edge from %s(%s) to %s(%s), no merge node found," + "check invalid when MultiBatchGraphCopyer %s", + in_node->GetName().c_str(), in_node->GetType().c_str(), + node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to link IO control edge from %s to %s, no merge node found", in_node->GetName().c_str(), node->GetName().c_str()); return INTERNAL_ERROR; } auto merge_node = iter->second[0]; if (merge_node == nullptr) { + REPORT_INNER_ERROR("E19999", "Failed to link IO control edge from %s(%s) to %s(%s), no merge node found," + "check invalid when MultiBatchGraphCopyer %s", + in_node->GetName().c_str(), in_node->GetType().c_str(), + node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to link IO control edge from %s to %s, no merge node found", in_node->GetName().c_str(), node->GetName().c_str()); return INTERNAL_ERROR; } GE_IF_BOOL_EXEC(in_node->GetOutControlAnchor() == nullptr, + REPORT_INNER_ERROR("E19999", "Out control anchor of op:%s(%s) is nullptr, " + "check invalid when MultiBatchGraphCopyer %s", + in_node->GetName().c_str(), in_node->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Innode outputControlAnchor is null"); return INTERNAL_ERROR); auto ret = in_node->GetOutControlAnchor()->Unlink(node->GetInControlAnchor()); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to unlink the control edge from %s to %s", - in_node->GetName().c_str(), node->GetName().c_str()); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, + REPORT_INNER_ERROR("E19999", "Unlink ctrl edge from %s(%s) to %s(%s) failed, " + "when MultiBatchGraphCopyer %s", + in_node->GetName().c_str(), in_node->GetType().c_str(), + node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to unlink the control edge from %s to %s", + in_node->GetName().c_str(), node->GetName().c_str()); return INTERNAL_ERROR); ret = GraphUtils::AddEdge(merge_node->GetOutControlAnchor(), node->GetInControlAnchor()); - GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add control edge from %s to %s", - merge_node->GetName().c_str(), node->GetName().c_str()); - return INTERNAL_ERROR); + GE_IF_BOOL_EXEC( + ret != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Add ctrl edge between op:%s(%s) and op:%s(%s) failed " + "when MultiBatchGraphCopyer %s", merge_node->GetName().c_str(), merge_node->GetType().c_str(), + node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__); + GELOGE(INTERNAL_ERROR, "Failed to add control edge from %s to %s", + merge_node->GetName().c_str(), node->GetName().c_str()); + return INTERNAL_ERROR); GELOGI("Link control edge from merge %s(from %s) to %s", merge_node->GetName().c_str(), in_node->GetName().c_str(), node->GetName().c_str()); } @@ -1779,6 +2004,8 @@ void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, for (size_t j = 0; j < dynamic_branch_names.size(); ++j) { const auto &subgraph = graph->GetSubgraph(dynamic_branch_names[j]); if (subgraph == nullptr) { + REPORT_INNER_ERROR("E19999", "Get subgraph:%s from graph:%s failed when MultiBatchGraphCopyer %s", + dynamic_branch_names[j].c_str(), graph->GetName().c_str(), __FUNCTION__); GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s", dynamic_branch_names[j].c_str()); dynamic_output_dims.clear(); return; @@ -1786,6 +2013,8 @@ void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, const auto &out_node = subgraph->FindFirstNodeMatchType(NETOUTPUT); if (out_node == nullptr) { + REPORT_INNER_ERROR("E19999", "No netoutput node exist in subgraph:%s, check invalid " + "when MultiBatchGraphCopyer %s", subgraph->GetName().c_str(), __FUNCTION__); GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "NetOutput not found, name: %s", dynamic_branch_names[j].c_str()); dynamic_output_dims.clear(); return; @@ -1794,6 +2023,9 @@ void GetDynamicShapeByGraph(const ComputeGraphPtr &graph, const NodePtr &node, GELOGI("Find the subgraph Output node %s and the index is %zu", out_node->GetName().c_str(), i); const auto &out_desc = out_node->GetOpDesc(); if (out_desc == nullptr || out_desc->GetInputsSize() <= i) { + REPORT_INNER_ERROR("E19999", "op_desc of node in subgraph:%s is nullptr or input desc size:%zu <= %zu, " + "check invalid when MultiBatchGraphCopyer %s", subgraph->GetName().c_str(), + out_desc->GetInputsSize(), i, __FUNCTION__); GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "Get Input desc failed, name: %s, index: %zu", out_node->GetName().c_str(), i); dynamic_output_dims.clear(); return; @@ -1895,6 +2127,9 @@ Status GetDynamicOutputShape(ComputeGraphPtr &graph) { if ((net_output != nullptr) && !dynamic_output_dims.empty()) { GetDirectOutputShape(graph, net_output, dynamic_output_index, dynamic_output_dims); if (!AttrUtils::SetListStr(net_output->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when MultiBatchGraphCopyer %s", + ATTR_NAME_DYNAMIC_OUTPUT_DIMS.c_str(), + net_output->GetName().c_str(), net_output->GetType().c_str(), __FUNCTION__); GELOGE(FAILED, "Set dynamic output dims attr failed"); return FAILED; } diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index 3bde0efb..38968661 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -102,6 +102,10 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector &data_n Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector &data_nodes) { GELOGD("Start check input sequence from data nodes and input shape."); if (data_nodes.size() != GetLocalOmgContext().user_input_dims.size()) { + REPORT_INNER_ERROR("E19999", "Count:%zu of data_nodes in graph:%s should be equal to " + "input_shape count:%zu from option, check invalid when %s", + data_nodes.size(), graph->GetName().c_str(), GetLocalOmgContext().user_input_dims.size(), + __FUNCTION__); GELOGE(PARAM_INVALID, "The count of input shape:%zu should be equal to the count of data num:%zu.", GetLocalOmgContext().user_input_dims.size(), data_nodes.size()); return PARAM_INVALID; @@ -119,6 +123,11 @@ Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector &data_n continue; } if (dynamic_dims.size() != output_shape.size()) { + REPORT_INNER_ERROR("E19999", "The output shape of %s is %s, the input shape from options of %s is %s, graph:%s," + "check invalid when %s", data_node->GetName().c_str(), + formats::JoinToString(output_shape).c_str(), + GetLocalOmgContext().user_input_dims.at(i).first.c_str(), + formats::JoinToString(dynamic_dims).c_str(), graph->GetName().c_str(), __FUNCTION__); GELOGE(PARAM_INVALID, "The output shape of %s is %s, the input shape from options of %s is %s.", data_node->GetName().c_str(), formats::JoinToString(output_shape).c_str(), GetLocalOmgContext().user_input_dims.at(i).first.c_str(), formats::JoinToString(dynamic_dims).c_str()); @@ -126,6 +135,11 @@ Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector &data_n } for (size_t j = 0; j < dynamic_dims.size(); ++j) { if (dynamic_dims.at(j) != kDynmaicDims && dynamic_dims.at(j) != output_shape.at(j)) { + REPORT_INNER_ERROR("E19999", "Value of input shape %s from option and output shape %s of data op:%s " + "should be equal to %d, index:%zu, graph:%s, check invalid when %s", + formats::JoinToString(dynamic_dims).c_str(), + formats::JoinToString(output_shape).c_str(), data_node->GetName().c_str(), kDynmaicDims, + j, graph->GetName().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Value of input shape %s should be equal to %s.", formats::JoinToString(dynamic_dims).c_str(), formats::JoinToString(output_shape).c_str()); return INTERNAL_ERROR; @@ -138,6 +152,9 @@ Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector &data_n Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector &getnext_sink_node) { GELOGD("Start check input sequence from getnext sink nodes and input shape."); if (getnext_sink_node.size() != kNumOfGetnextNode) { + REPORT_INNER_ERROR("E19999", "Not support dynamic dims when a graph with multi getnext nodes, graph:%s, " + "num of getnext node:%zu, check invalid when %s", + graph->GetName().c_str(), getnext_sink_node.size(), __FUNCTION__); GELOGE(PARAM_INVALID, "Not support dynamic dims when a graph with multi getnext nodes."); return PARAM_INVALID; } @@ -147,6 +164,9 @@ Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector &get GE_CHECK_NOTNULL(op_desc); size_t data_count = data_node->GetAllOutDataAnchors().size() / kDivisionConst; if (data_count != GetLocalOmgContext().user_input_dims.size()) { + REPORT_INNER_ERROR("E19999", "Output desc count of %s is %zu, should be equal to count of input shape: %zu, " + "graph:%s, check invalid when %s", op_desc->GetName().c_str(), data_count, + GetLocalOmgContext().user_input_dims.size(), graph->GetName().c_str(), __FUNCTION__); GELOGE(PARAM_INVALID, "Output count of %s is %zu, should be equal to count of input shape: %zu", op_desc->GetName().c_str(), data_count, GetLocalOmgContext().user_input_dims.size()); return PARAM_INVALID; @@ -161,6 +181,12 @@ Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector &get continue; } if (dynamic_dims.size() != output_shape.size()) { + REPORT_INNER_ERROR("E19999", "The %zu output_shape of %s is %s not equal to the input_shape:%s " + "from options of %s, graph:%s, check invalid when %s", i, + data_node->GetName().c_str(), formats::JoinToString(output_shape).c_str(), + formats::JoinToString(dynamic_dims).c_str(), + GetLocalOmgContext().user_input_dims.at(i).first.c_str(), + graph->GetName().c_str(), __FUNCTION__); GELOGE(PARAM_INVALID, "the output_shape of %s is %s, the input_shape from options of %s is %s.", data_node->GetName().c_str(), formats::JoinToString(output_shape).c_str(), GetLocalOmgContext().user_input_dims.at(i).first.c_str(), formats::JoinToString(dynamic_dims).c_str()); @@ -168,6 +194,11 @@ Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector &get } for (size_t j = 0; j < dynamic_dims.size(); ++j) { if (dynamic_dims.at(j) != kDynmaicDims && dynamic_dims.at(j) != output_shape.at(j)) { + REPORT_INNER_ERROR("E19999", "Value of input shape %s from option and output shape %s of data op:%s " + "should be equal to %d, index:%zu, graph:%s, check invalid when %s", + formats::JoinToString(dynamic_dims).c_str(), + formats::JoinToString(output_shape).c_str(), data_node->GetName().c_str(), kDynmaicDims, + j, graph->GetName().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "value of input_shape %s should be equal to %s.", formats::JoinToString(dynamic_dims).c_str(), formats::JoinToString(output_shape).c_str()); return INTERNAL_ERROR; @@ -215,6 +246,10 @@ Status CheckSequenceOfOptions(ComputeGraphPtr &graph, vector &data_node Status UpdateNameOfData(ComputeGraphPtr &graph, const vector &data_nodes) { GELOGD("Update first value of input shape by data nodes."); if (data_nodes.size() != GetLocalOmgContext().user_input_dims.size()) { + REPORT_INNER_ERROR("E19999", "Count:%zu of data_nodes in graph:%s should be equal to " + "input_shape count:%zu from option, check invalid when %s", + data_nodes.size(), graph->GetName().c_str(), GetLocalOmgContext().user_input_dims.size(), + __FUNCTION__); GELOGE(PARAM_INVALID, "count of data_nodes: %zu should be equal to input_shape count: %zu.", data_nodes.size(), GetLocalOmgContext().user_input_dims.size()); return PARAM_INVALID; @@ -229,6 +264,9 @@ Status UpdateNameOfData(ComputeGraphPtr &graph, const vector &data_node Status UpdateNameOfGetnext(ComputeGraphPtr &graph, const vector &getnext_sink_nodes) { GELOGD("Update first value of input shape by getnext sink nodes."); if (getnext_sink_nodes.size() != kNumOfGetnextNode) { + REPORT_INNER_ERROR("E19999", "Not support dynamic dims when a graph with multi getnext nodes, graph:%s, " + "num of getnext node:%zu, check invalid when %s", + graph->GetName().c_str(), getnext_sink_nodes.size(), __FUNCTION__); GELOGE(PARAM_INVALID, "Not support dynamic dims when a graph with multi getnext nodes."); return PARAM_INVALID; } @@ -239,6 +277,9 @@ Status UpdateNameOfGetnext(ComputeGraphPtr &graph, const vector &getnex // user want getnext dynamic, just getnext or data+getnext_sink size_t data_count = input_node->GetAllOutDataAnchors().size() / kDivisionConst; if (data_count != GetLocalOmgContext().user_input_dims.size()) { + REPORT_INNER_ERROR("E19999", "Output desc count of %s is %zu, should be equal to count of input shape: %zu, " + "graph:%s, check invalid when %s", op_desc->GetName().c_str(), data_count, + GetLocalOmgContext().user_input_dims.size(), graph->GetName().c_str(), __FUNCTION__); GELOGE(PARAM_INVALID, "Output count of %s is %zu, should be equal to count of input shape: %zu", op_desc->GetName().c_str(), data_count, GetLocalOmgContext().user_input_dims.size()); return PARAM_INVALID; @@ -299,6 +340,9 @@ Status DeleteIdentityInsertByAdapter(ComputeGraphPtr &graph) { if (dst_node->GetType() == IDENTITY) { GELOGI("Need to remove %s.", dst_node->GetName().c_str()); if (ge::GraphUtils::RemoveNodeWithoutRelink(graph, dst_node) != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Remove node:%s(%s) from graph:%s failed when %s", + dst_node->GetName().c_str(), dst_node->GetType().c_str(), graph->GetName().c_str(), + __FUNCTION__); GELOGE(FAILED, "Remove Identity node %s failed.", dst_node->GetName().c_str()); return FAILED; } @@ -322,6 +366,8 @@ Status CheckNegativeCountOfOptions(const std::vector> &shap } for (size_t i = 0; i < shapes.size(); ++i) { if (shapes.at(i).size() != negative_count) { + REPORT_INNER_ERROR("E19999", "gear num of dynamic_dims is %zu should be equal to num:%zu from option, " + "check invalid when %s", shapes.at(i).size(), negative_count, __FUNCTION__); GELOGE(PARAM_INVALID, "Each gear num of dynamic_dims is %zu should be equal to %zu.", shapes.at(i).size(), negative_count); return PARAM_INVALID; @@ -533,6 +579,8 @@ Status StampDynamicType(const OpDescPtr &op_desc) { dynamic_type = static_cast(DYNAMIC_DIMS); } if (!AttrUtils::SetInt(op_desc, ATTR_DYNAMIC_TYPE, dynamic_type)) { + REPORT_CALL_ERROR("E19999", "Set Attr:%s to node:%s(%s) failed when %s", + ATTR_DYNAMIC_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__); GELOGE(INTERNAL_ERROR, "Failed to add dynamic type attr for node %s", op_desc->GetName().c_str()); return INTERNAL_ERROR; } diff --git a/inc/framework/common/util.h b/inc/framework/common/util.h index b73e7046..92cb8397 100644 --- a/inc/framework/common/util.h +++ b/inc/framework/common/util.h @@ -116,6 +116,8 @@ #define GE_CHECK_NOTNULL(val) \ do { \ if (val == nullptr) { \ + REPORT_INNER_ERROR("E19999", "Param:%s is nullptr, check invalid when %s", \ + #val, __FUNCTION__); \ DOMI_LOGE("[Check][Param:%s]null is invalid when %s.", #val, __FUNCTION__); \ return ge::PARAM_INVALID; \ } \