From 0d09bdb8903b8741d4587b5a08e3d3fe36664352 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Tue, 9 Mar 2021 19:18:47 +0800 Subject: [PATCH] ERROR log modify --- ge/generator/ge_generator.cc | 34 +++-- ge/graph/build/logical_stream_allocator.cc | 36 ++--- .../build/memory/binary_block_mem_assigner.cc | 15 +- ge/graph/build/memory/block_mem_assigner.cc | 143 ++++++++++++++---- ge/graph/manager/graph_caching_allocator.cc | 20 +-- inc/framework/common/debug/ge_log.h | 12 +- metadef | 2 +- parser | 2 +- 8 files changed, 178 insertions(+), 86 deletions(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index fd39552d..938a8bc6 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -52,7 +52,9 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; const int64_t kDynamicDimValue = -2; std::map engine_type_map{ - {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}}; + {ge::ENGINE_SYS, kEngineNameDefault}, + {ge::ENGINE_AICORE, kAIcoreEngine}, + {ge::ENGINE_VECTOR, kVectorEngine}}; bool ContainsDynamicInpus(const ge::OpDesc &op_desc) { for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) { @@ -84,7 +86,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"}, {op_desc->GetName(), op_desc->GetType(), "engine type", "it only support kEngineNameDefault/kAIcoreEngine/kVectorEngine"}); - GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast(engine_type)); + GELOGE(FAILED, "CheckEngineType: engine type: %d not support.", static_cast(engine_type)); return FAILED; } @@ -188,17 +190,17 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const (void)AttrUtils::SetBool(data_op, "_is_single_op", true); - GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); - GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); + GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail"); + GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail"); if (attr) { - GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, "Set index fail."); + GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, "Set index fail"); } ge::NodePtr arg_node = graph->AddNode(data_op); - GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail."); + GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail"); GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)), - "Add edge[%s->%s] fail.", data_op->GetName().c_str(), node->GetName().c_str()); + "Add edge[%s->%s] fail", data_op->GetName().c_str(), node->GetName().c_str()); return SUCCESS; } @@ -213,20 +215,20 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons for (const auto &out_desc : outputs) { GeTensorDesc tensor = out_desc.GetTensorDesc(); TensorUtils::SetInputTensor(tensor, true); - GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail"); + GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail."); TensorUtils::SetInputTensor(tensor, false); TensorUtils::SetOutputTensor(tensor, true); - GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail"); + GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail."); count++; } GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID); ge::NodePtr out_node = graph->AddNode(op_desc); - GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, "Insert Output node fail."); + GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, "Insert Output node fail"); GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID); for (int32_t i = 0; i < count; ++i) { GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)), - "Add edge[%s->%s] fail.", node->GetName().c_str(), out_node->GetName().c_str()); + "Add edge[%s->%s] fail", node->GetName().c_str(), out_node->GetName().c_str()); } return SUCCESS; @@ -246,7 +248,7 @@ static void GetOpsProtoPath(string &opsproto_path) { return; } string path_base = PluginManager::GetPath(); - GELOGI("path_base is %s", path_base.c_str()); + GELOGI("path_base is %s.", path_base.c_str()); path_base = path_base.substr(0, path_base.rfind('/')); path_base = path_base.substr(0, path_base.rfind('/') + 1); opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/"); @@ -331,7 +333,7 @@ Status GeGenerator::Initialize(const map &options, OmgContext &o ErrorManager::GetInstance().SetStage(ErrorMessage::kInitialize, ErrorMessage::kOpsProtoInit); string opsproto_path; GetOpsProtoPath(opsproto_path); - GELOGI("Get opsproto path is %s", opsproto_path.c_str()); + GELOGI("Get opsproto path is %s.", opsproto_path.c_str()); OpsProtoManager *manager = OpsProtoManager::Instance(); map option_tmp; option_tmp.emplace(std::pair(string("ge.opsProtoLibPath"), opsproto_path)); @@ -710,7 +712,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in auto node = comp_graph->FindNode(op_desc->GetName()); Status ret = CheckEngineTypeSupport(node, engine_type); if (ret != SUCCESS) { - GELOGE(ret, "check engine type failed."); + GELOGE(ret, "check engine type failed"); return ret; } } @@ -784,9 +786,9 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &outputs, OpEngineType engine_type, ModelBufferData &model_buff) { ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); - GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); + GELOGI("Start to build single op online, input size: %zu, output size: %zu.", inputs.size(), outputs.size()); Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false); - GELOGI("Finish build single online model, status: %u", status); + GELOGI("Finish build single online model, status: %u.", status); return status; } diff --git a/ge/graph/build/logical_stream_allocator.cc b/ge/graph/build/logical_stream_allocator.cc index 8ea7fe71..c966c5b3 100644 --- a/ge/graph/build/logical_stream_allocator.cc +++ b/ge/graph/build/logical_stream_allocator.cc @@ -64,7 +64,7 @@ Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector & subgraph->stream_id = iter->second; } else { subgraph->stream_id = next_stream; - GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str()); + GELOGI("Assign new stream %ld for label %s", next_stream, stream_label.c_str()); label_streams.emplace(stream_label, next_stream); ++next_stream; @@ -96,7 +96,7 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vectorstream_id = iter->second; } else { subgraph->stream_id = next_stream; - GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(), + GELOGI("Assign new independent stream %ld for engine %s (label: %s)", next_stream, engine.c_str(), stream_label.c_str()); label_streams.emplace(stream_label, next_stream); @@ -127,7 +127,7 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vectorstream_id = stream_id; - GELOGI("Reusable subgraph %s has not been assigned a stream, now assign new stream %ld.", + GELOGI("Reusable subgraph %s has not been assigned a stream, now assign new stream %ld", reusable_subgraph->name.c_str(), stream_id); } @@ -137,7 +137,7 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vectorreused_subgraph = reusable_subgraph; reused_subgraphs_.emplace_back(subgraph, reusable_subgraph); - GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(), + GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s", subgraph->name.c_str(), subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(), reusable_subgraph->engine_conf.id.c_str()); } else { @@ -249,7 +249,7 @@ int64_t AssignByDependencyPass::AssignNewStream(SubgraphPtr subgraph) { engine_stream_num_[engine_name] = stream_id + 1; } - GELOGI("Subgraph %s assigns new temp stream %ld (engine: %s).", subgraph->name.c_str(), stream_id, + GELOGI("Subgraph %s assigns new temp stream %ld (engine: %s)", subgraph->name.c_str(), stream_id, engine_name.c_str()); return stream_id; @@ -282,7 +282,7 @@ void AssignByDependencyPass::UpdateAssignedSubgraphs(Context &context) { GELOGI("Subgraph %s of engine %s reuses default stream %ld.", subgraph->name.c_str(), subgraph->engine_conf.id.c_str(), context.default_stream); } else { - GELOGI("Stream of subgraph %s has been updated to %ld.", subgraph->name.c_str(), subgraph->stream_id); + GELOGI("Stream of subgraph %s has been updated to %ld", subgraph->name.c_str(), subgraph->stream_id); } } } @@ -293,7 +293,7 @@ void AssignByDependencyPass::UpdateReusedSubgraphs() { auto &cur_subgraph = item.first; auto &reused_graph = item.second; cur_subgraph->stream_id = reused_graph->stream_id; - GELOGI("Stream of subgraph %s has been updated to %ld.", cur_subgraph->name.c_str(), cur_subgraph->stream_id); + GELOGI("Stream of subgraph %s has been updated to %ld", cur_subgraph->name.c_str(), cur_subgraph->stream_id); } } @@ -330,7 +330,7 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vectorname.c_str(), subgraph->stream_id, + GELOGI("Subgraph %s is assigned stream %ld (engine: %s)", subgraph->name.c_str(), subgraph->stream_id, engine_name.c_str()); } } @@ -353,11 +353,11 @@ Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vectorGetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), context.default_stream, engine_name.c_str()); } else if (IsEngineSkip(*subgraph) && node->GetInNodes().empty()) { - GELOGD("Node %s of type %s in subgraph %s doesn't need to assign a stream (engine: %s).", + GELOGD("Node %s of type %s in subgraph %s doesn't need to assign a stream (engine: %s)", node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str()); } else { node->GetOpDesc()->SetStreamId(stream_id); - GELOGD("Node %s of type %s in subgraph %s is assigned stream %ld (engine: %s).", node->GetName().c_str(), + GELOGD("Node %s of type %s in subgraph %s is assigned stream %ld (engine: %s)", node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), stream_id, engine_name.c_str()); } } @@ -387,7 +387,7 @@ int64_t UpdateForSkippedEnginePass::GetSingleInoutStream(const NodePtr &node) co if (stream_ids.size() == 1) { int64_t stream_id = *(stream_ids.begin()); - GELOGI("The stream of all input and output nodes of node %s (type: %s) is %ld.", node->GetName().c_str(), + GELOGI("The stream of all input and output nodes of node %s (type: %s) is %ld", node->GetName().c_str(), node->GetType().c_str(), stream_id); return stream_id; } @@ -427,7 +427,7 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vectorSetStreamId(inout_stream); - GELOGI("Node %s of type %s reassign to stream %ld from stream %ld.", node->GetName().c_str(), + GELOGI("Node %s of type %s reassign to stream %ld from stream %ld", node->GetName().c_str(), node->GetType().c_str(), inout_stream, stream_id); } } @@ -455,7 +455,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vectorGetName().c_str()); + GELOGD("Subgraphs of graph %s.", graph->GetName().c_str()); for (const auto &subgraph : subgraphs) { if (subgraph != nullptr) { - GELOGD("subgraph: %s", subgraph->name.c_str()); + GELOGD("subgraph: %s.", subgraph->name.c_str()); } } @@ -664,9 +664,9 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec Status status = pass->Run(graph, subgraphs, context_); if (status == SUCCESS) { - GELOGD("Stream pass %s return SUCCESS.", pass->GetName().c_str()); + GELOGD("Stream pass %s return SUCCESS", pass->GetName().c_str()); } else if (status == NOT_CHANGED) { - GELOGD("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str()); + GELOGD("Stream pass %s return NOT_CHANGED", pass->GetName().c_str()); } else { GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str()); return status; diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index 97a0aed6..a9c7fa74 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -70,7 +70,10 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { return SUCCESS; } if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { - GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); + GELOGE(FAILED, "[check][mem_range_step]first mem_range_step:%ld less than 0,invalid," + "maybe has dynamic shape in graph", all_memory_size.front()); + REPORT_INNER_ERROR("E19999", "first mem_range_step:%ld less than 0,invalid," + "maybe has dynamic shape in graph", all_memory_size.front()); return FAILED; } // Memory size is 512 aligned, so it is not necessary to take less than 512 @@ -81,12 +84,18 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGD("Range number: %zu", range_number); vector> ranges(range_number); - GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); + GE_CHK_BOOL_EXEC((range_number != 0), + REPORT_INNER_ERROR("E19999", "inner data[range_number] is 0, judge invalid"); + return PARAM_INVALID, + "[check][range_number]inner data is 0, judge invalid."); size_t range_number_limit = all_memory_size.size() / range_number; int64_t range_ceil = min_memory_size; for (size_t i = 1; i <= range_number; i++) { GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast(range_ceil), kRangeCeilInterval), - GELOGE(FAILED, "Multiply result is out of range."); + GELOGE(FAILED, "[check][mem_range_ceil]Multiply result is out of range," + "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); + REPORT_INNER_ERROR("E19999", "process mem_range_ceil,multiply result out of range," + "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval); return FAILED); range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time. for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 41f24b94..3db078d6 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -30,6 +30,7 @@ #include "graph/utils/node_utils.h" #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" +#include "graph/utils/type_utils.h" #include "graph/debug/ge_attr_define.h" @@ -457,7 +458,16 @@ Status GetNoAlignSize(const ge::OpDesc &desc, uint32_t index, size_t &size) { DataType data_type = output_op_desc->GetDataType(); graphStatus graph_status = TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size); if (graph_status != GRAPH_SUCCESS) { - GELOGE(graph_status, "CalcTensorMemSize failed!"); + GELOGE(graph_status, "[Calculate][TensorSize]shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", + shape.ToString().c_str(), + TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), + desc.GetName().c_str(), index); + REPORT_CALL_ERROR("E19999", "CalcTensorMemSize fail, shape:%s, format:%s, data_type:%s, op:%s, out_index:%u", + shape.ToString().c_str(), + TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::DataTypeToSerialString(data_type).c_str(), + desc.GetName().c_str(), index); return FAILED; } size = static_cast(tensor_size); @@ -586,9 +596,12 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); int64_t size = 0; GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", - node_op_desc->GetName().c_str(), size); - return;); + GE_IF_BOOL_EXEC(size < 0, + GELOGE(FAILED, "[check][TensorSize]tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", + size, node_op_desc->GetName().c_str()); + REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s", + size, node_op_desc->GetName().c_str()); + return;); batch_all_memory_size[batch_label].emplace_back(size); if (batch_total_size.find(batch_label) == batch_total_size.end()) { batch_total_size[batch_label] = size; @@ -678,22 +691,34 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { auto out_anchor = n->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_anchor == nullptr, - GELOGE(FAILED, "Node[%s] output[%u] anchor is null.", n->GetName().c_str(), out_index); + GELOGE(FAILED, "[check][anchor]Node[%s] output[%u] anchor is null.", + n->GetName().c_str(), out_index); + REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.", + n->GetName().c_str(), out_index); return false;); for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, - GELOGE(FAILED, "Node[%s] output[%u] peer_in_anchor 0 is null.", n->GetName().c_str(), out_index); + GELOGE(FAILED, "[check][anchor]Node[%s] output[%u] peer_in_anchor 0 is null.", + n->GetName().c_str(), out_index); + REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.", + n->GetName().c_str(), out_index); return false;); auto peer_node = peer_in_anchor->GetOwnerNode(); GE_IF_BOOL_EXEC(peer_node == nullptr, - GELOGE(FAILED, "Node[%s] output[%u] node is null.", n->GetName().c_str(), out_index); + GELOGE(FAILED, "[check][node]Node[%s] output[%u] peer node is null.", + n->GetName().c_str(), out_index); + REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.", + n->GetName().c_str(), out_index); return false;); // Get the continuous input type of the node, default is false bool is_input_continuous = false; auto peer_in_node_desc = peer_node->GetOpDesc(); GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, - GELOGE(FAILED, "Node[%s] output[%u] nodedesc is null.", n->GetName().c_str(), out_index); + GELOGE(FAILED, "[check][op_desc]Node[%s] output[%u] nodedesc is null.", + n->GetName().c_str(), out_index); + REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.", + n->GetName().c_str(), out_index); return false;); // If GetBool fail, is_input_continuous is false. @@ -793,7 +818,10 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr & if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { - GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); + GELOGE(FAILED, "[check][op_desc]Node[%s] output[%u] peer input node desc is null.", + n->GetName().c_str(), out_index); + REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.", + n->GetName().c_str(), out_index); return false; } auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); @@ -1077,7 +1105,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, const vector &workspace_reuse_flag, const bool is_op_reuse_mem, const bool continuous, int64_t memory_type) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "Input parameter n is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, + REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); + return nullptr, "[check][param]Input parameter n(type:node_ptr) is null."); auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr); std::string batch_label; @@ -1129,7 +1159,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "new an object failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, + REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u", + n->GetName().c_str(), out_index); + return nullptr, "[new][object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index); // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); @@ -1188,9 +1221,13 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, + REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null"); + return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null."); auto node_op_desc = n->GetOpDesc(); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, + REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); + return INTERNAL_ERROR, "[check][param]Input parameter n(type:OpDescPtr) is null"); // continuous output support ref only when all output ref input bool isAllOutputRef = true; @@ -1204,7 +1241,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorGetName().c_str()); + GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s", n->GetName().c_str()); return INTERNAL_ERROR; } @@ -1215,7 +1254,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector(node_op_desc->GetOutputsSize()); index++) { auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); + REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", + n->GetName().c_str(), index); + GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); return INTERNAL_ERROR; } @@ -1226,7 +1267,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorGetName().c_str(), index); + REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u", + n->GetName().c_str(), index); + GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index); return INTERNAL_ERROR; } size_t align_size = static_cast(size); @@ -1266,7 +1309,9 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorlast_continuous_block_ = true; ++(block->ref_count_); } else { - GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); + REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld", + n->GetName().c_str(), total_size); + GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size); return INTERNAL_ERROR; } return SUCCESS; @@ -1274,25 +1319,37 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem, const bool continuous) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, + REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null"); + return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null"); auto node_op_desc = n->GetOpDesc(); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, + REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null"); + return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null"); MemoryBlock *block = nullptr; NodeIndexIO node_index_io(n, index, kOut); int64_t size = 0; auto output_op_desc = node_op_desc->GetOutputDescPtr(index); - GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); + GE_IF_BOOL_EXEC(output_op_desc == nullptr, + REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); + GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index); + return nullptr); GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); size_t no_align_size = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, - return nullptr, "Get no align size failed"); + REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); + return nullptr, "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index); std::string symbol; bool reuse_input = false; if (IsSymbolExist(node_index_io, symbol)) { block = symbol_blocks_[symbol]; - GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); - return nullptr); + GE_IF_BOOL_EXEC(block == nullptr, + REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s", + node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); + GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s", + node_op_desc->GetName().c_str(), node_index_io.ToString().c_str()); + return nullptr); // reduce old size size_t align_size = block->Size(); AlignMemOffset(align_size); @@ -1335,12 +1392,24 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, vector workspace_reuse_flag; block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index, workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, + REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u", + n->GetName().c_str(), block_size, index); + return nullptr, "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u", + n->GetName().c_str(), block_size, index); } - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); int out_count = 0; - GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); + GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), + REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s", + index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); + GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s", + index, n->GetAllOutDataAnchors().size(), n->GetName().c_str()); + return nullptr); auto out_data_anchor = n->GetOutDataAnchor(index); - GE_IF_BOOL_EXEC(out_data_anchor == nullptr, GELOGE(FAILED, "Out data anchor is nullptr."); return nullptr); + GE_IF_BOOL_EXEC(out_data_anchor == nullptr, + REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str()); + GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str()); + return nullptr); for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { auto owner_node = in_anchor->GetOwnerNode(); auto op_desc = owner_node->GetOpDesc(); @@ -1546,8 +1615,13 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size()); if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { - GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", - op_desc->GetName().c_str(), op_desc->GetOutputsSize(), memorys_type.size()); + REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s", + ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), + op_desc->GetOutputsSize(), op_desc->GetName().c_str()); + GELOGE(INTERNAL_ERROR, + "[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s", + ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(), + op_desc->GetOutputsSize(), op_desc->GetName().c_str()); return INTERNAL_ERROR; } @@ -1673,8 +1747,10 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { temp.size(), tvm_workspace_memory_type.size()); if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { - GELOGE(INTERNAL_ERROR, "fusion: node[%s], tvm workspace memory size error![v_temp:%zu, workspace:%zu]", - n->GetName().c_str(), temp.size(), tvm_workspace_memory_type.size()); + REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s", + TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s", + TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str()); return; } for (size_t i = 0; i < temp.size(); i++) { @@ -2083,8 +2159,11 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, bool has_workspace_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type); if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) { - GELOGE(INTERNAL_ERROR, "node[%s], workspace_memory size error![index:%zu, workspace:%zu]", - node->GetName().c_str(), index, workspace_memory_type.size()); + REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, " + "index %zu invalid, bigger than attr %s size:%zu, node_name:%s", + index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s", + index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str()); return false; } memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index 03ca352e..10f6b498 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -101,7 +101,7 @@ CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memor } Status CachingAllocator::Initialize(uint32_t device_id) { - GELOGI("Device id %u", device_id); + GELOGI("Device id %u.", device_id); // when redo Initialize free old memory FreeBlocks(); std::lock_guard lock(mutex_); @@ -124,14 +124,14 @@ Status CachingAllocator::Initialize(uint32_t device_id) { } void CachingAllocator::Finalize(uint32_t device_id) { - GELOGI("Device id %u", device_id); + GELOGI("Device id %u.", device_id); PrintStatics(); FreeBlocks(); FreeBlockBins(); } uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { - GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); + GELOGI("Start malloc pool memory, size = %zu, device id = %u.", size, device_id); uint8_t *ptr = nullptr; size = GetBlockSize(size); Block *block = FindFreeBlock(size, org_ptr, device_id); @@ -152,7 +152,7 @@ uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device } Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { - GELOGI("Free device id = %u", device_id); + GELOGI("Free device id = %u.", device_id); if (ptr == nullptr) { GELOGE(PARAM_INVALID, "Invalid memory pointer"); return ge::PARAM_INVALID; @@ -174,7 +174,7 @@ void CachingAllocator::FreeBlock(Block *block) { if (block == nullptr || !block->allocated) { return; } - GELOGI("Free block size = %zu", block->size); + GELOGI("Free block size = %zu.", block->size); std::lock_guard lock(mutex_); block->allocated = false; @@ -227,7 +227,7 @@ Block *CachingAllocator::FindFreeBlock(size_t size, uint8_t *org_ptr, uint32_t d Block *block = *it; bin->erase(it); if (block != nullptr) { - GELOGI("Find block size = %zu", block->size); + GELOGI("Find block size = %zu.", block->size); if (ShouldSplit(block, size)) { block = SplitBlock(block, size, *bin, device_id); } @@ -235,7 +235,7 @@ Block *CachingAllocator::FindFreeBlock(size_t size, uint8_t *org_ptr, uint32_t d if (block->ptr != nullptr) { block->allocated = true; allocated_blocks_[block->ptr] = block; - GELOGI("Malloc device id = %u, size= %zu", device_id, size); + GELOGI("Malloc device id = %u, size= %zu.", device_id, size); } } @@ -265,7 +265,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui } Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { - GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id); + GELOGI("Try to extend cache. size = %zu, device id = %u.", size, device_id); auto memory_size = GetAllocationSize(size); const std::string purpose = "Memory for caching."; auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); @@ -302,7 +302,7 @@ Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size, uint32_t devic return ge::FAILED; } - GELOGI("Block size = %zu", size); + GELOGI("Block size = %zu.", size); block->ptr = ptr; block->size = size; @@ -313,7 +313,7 @@ Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size, uint32_t devic } size_t CachingAllocator::FreeCachedBlocks() { - GELOGI("Free cached blocks"); + GELOGI("Free cached blocks."); std::lock_guard lock(mutex_); size_t free_cached_memory_size = 0; for (uint32_t i = 0; i < kNumBins; ++i) { diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index c1359a20..a80cc156 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -20,6 +20,7 @@ #include #include "framework/common/ge_inner_error_codes.h" +#include "common/util/error_manager/error_manager.h" #include "toolchain/slog.h" #ifdef __GNUC__ #include @@ -55,9 +56,10 @@ inline bool IsLogEnable(int module_name, int log_level) { return (enable == 1); } -#define GELOGE(ERROR_CODE, fmt, ...) \ - dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ - ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) +#define GELOGE(ERROR_CODE, fmt, ...) \ + dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) %s" fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ + ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), \ + ErrorManager::GetInstance().GetLogHeader().c_str(), ##__VA_ARGS__) #define GELOGW(fmt, ...) \ if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) @@ -82,8 +84,8 @@ inline bool IsLogEnable(int module_name, int log_level) { ##__VA_ARGS__); \ } while (0) -#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ - dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ +#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ + dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) // print memory when it is greater than 1KB. diff --git a/metadef b/metadef index 6b802ec3..deebd59d 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 6b802ec3cf711e9942a7e2a74f04a53647aae473 +Subproject commit deebd59d7ea015d7907db525596213492fe021b0 diff --git a/parser b/parser index 6a07f1a8..eb4d9f3a 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 6a07f1a8b9b8b4630a5b60d9d8d02ec4a6314d68 +Subproject commit eb4d9f3aa4cd0b567e3af6149e48ca2b15a3339e