diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 6ff9f5d9..d10d73e4 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -173,10 +173,12 @@ set(TRAIN_SRC_LIST "graph/manager/graph_manager_utils.cc" "graph/manager/graph_mem_allocator.cc" "graph/manager/graph_caching_allocator.cc" + "graph/manager/session_scope_mem_allocator.cc" "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" "graph/manager/host_mem_allocator.cc" + "graph/manager/graph_mem_manager.cc" "graph/manager/memory_api.cc" "graph/manager/model_manager/event_manager.cc" "graph/manager/trans_var_data_utils.cc" @@ -270,7 +272,6 @@ set(TRAIN_SRC_LIST "graph/passes/identity_pass.cc" "graph/passes/ref_identity_delete_op_pass.cc" "graph/passes/infershape_pass.cc" - "graph/passes/isolated_op_remove_pass.cc" "graph/passes/iterator_op_pass.cc" "graph/passes/link_gen_mask_nodes_pass.cc" "graph/passes/merge_pass.cc" @@ -317,13 +318,11 @@ set(TRAIN_SRC_LIST "graph/passes/transop_without_reshape_fusion_pass.cc" "graph/passes/transpose_transdata_pass.cc" "graph/passes/unused_const_pass.cc" - "graph/passes/unused_op_remove_pass.cc" "graph/passes/var_is_initialized_op_pass.cc" "graph/passes/parallel_concat_start_op_pass.cc" "graph/passes/cond_pass.cc" "graph/passes/cond_remove_pass.cc" "graph/passes/for_pass.cc" - "graph/passes/variable_format_pass.cc" "graph/passes/variable_op_pass.cc" "graph/passes/variable_prepare_op_pass.cc" "graph/passes/variable_ref_delete_op_pass.cc" @@ -478,6 +477,8 @@ set(INFER_SRC_LIST "graph/manager/host_mem_allocator.cc" "graph/manager/graph_mem_allocator.cc" "graph/manager/graph_caching_allocator.cc" + "graph/manager/session_scope_mem_allocator.cc" + "graph/manager/graph_mem_manager.cc" "model/ge_model.cc" "model/ge_root_model.cc" "graph/common/transop_util.cc" @@ -522,12 +523,10 @@ set(INFER_SRC_LIST "graph/passes/dimension_adjust_pass.cc" "graph/passes/get_original_format_pass.cc" "graph/passes/shape_operate_op_remove_pass.cc" - "graph/passes/unused_op_remove_pass.cc" "graph/passes/assert_pass.cc" "graph/passes/dropout_pass.cc" "graph/passes/infershape_pass.cc" "graph/passes/unused_const_pass.cc" - "graph/passes/isolated_op_remove_pass.cc" "graph/passes/permute_pass.cc" "graph/passes/ctrl_edge_transfer_pass.cc" "graph/passes/end_of_sequence_add_control_pass.cc" @@ -610,7 +609,6 @@ set(INFER_SRC_LIST "graph/passes/switch_logic_remove_pass.cc" "graph/passes/switch_data_edges_bypass.cc" "graph/passes/merge_pass.cc" - "graph/passes/variable_format_pass.cc" "graph/passes/variable_op_pass.cc" "graph/passes/cast_remove_pass.cc" "graph/passes/transpose_transdata_pass.cc" diff --git a/ge/common/model_parser/model_parser.cc b/ge/common/model_parser/model_parser.cc index ce654887..5c68eea8 100644 --- a/ge/common/model_parser/model_parser.cc +++ b/ge/common/model_parser/model_parser.cc @@ -62,7 +62,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro char *data = new (std::nothrow) char[len]; if (data == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Load model From file failed, bad memory allocation occur. (need:%u)", len); GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Load][ModelFromFile]Failed, " "bad memory allocation occur(need %u), file %s", len, model_path); REPORT_CALL_ERROR("E19999", "Load model from file %s failed, " @@ -90,33 +89,45 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo GE_CHECK_NOTNULL(model.model_data); // Model length too small - GE_CHK_BOOL_RET_STATUS(model.model_len >= sizeof(ModelFileHeader), ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, - "Invalid model. Model data size %u must be greater than or equal to %zu.", model.model_len, - sizeof(ModelFileHeader)); + GE_CHK_BOOL_EXEC(model.model_len >= sizeof(ModelFileHeader), + REPORT_INPUT_ERROR("E10003", std::vector({"parameter", "value", "reason"}), + std::vector({"om", model.om_name.c_str(), "invalid om file"})); + GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, + "[Check][Param] Invalid model. Model data size %u must be greater than or equal to %zu.", + model.model_len, sizeof(ModelFileHeader)); + return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID;); // Get file header auto file_header = reinterpret_cast(model.model_data); // Determine whether the file length and magic number match - GE_CHK_BOOL_RET_STATUS( - file_header->length == model.model_len - sizeof(ModelFileHeader) && file_header->magic == MODEL_FILE_MAGIC_NUM, - ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, - "Invalid model. file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != model->model_len[%u] || " - "MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", - file_header->length, sizeof(ModelFileHeader), model.model_len, MODEL_FILE_MAGIC_NUM, file_header->magic); - + GE_CHK_BOOL_EXEC(file_header->length == model.model_len - sizeof(ModelFileHeader) && + file_header->magic == MODEL_FILE_MAGIC_NUM, + REPORT_INPUT_ERROR("E10003", std::vector({"parameter", "value", "reason"}), + std::vector({"om", model.om_name.c_str(), "invalid om file"})); + GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, + "[Check][Param] Invalid model, file_header->length[%u] + sizeof(ModelFileHeader)[%zu] != " + "model->model_len[%u] || MODEL_FILE_MAGIC_NUM[%u] != file_header->magic[%u]", + file_header->length, sizeof(ModelFileHeader), model.model_len, + MODEL_FILE_MAGIC_NUM, file_header->magic); + return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID;); Status res = SUCCESS; // Get data address uint8_t *data = reinterpret_cast(model.model_data) + sizeof(ModelFileHeader); if (file_header->is_encrypt == ModelEncryptType::UNENCRYPTED) { // Unencrypted model - GE_CHK_BOOL_RET_STATUS(model.key.empty(), ACL_ERROR_GE_PARAM_INVALID, - "Invalid param. model is unencrypted, but key is not empty."); - + if (!model.key.empty()) { + REPORT_INPUT_ERROR("E10003", std::vector({"parameter", "value", "reason"}), + std::vector({"om", model.om_name.c_str(), "invalid om file"})); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, + "[Check][Param] Invalid param, model is unencrypted, but key is not empty."); + return ACL_ERROR_GE_PARAM_INVALID; + } model_data = data; model_len = file_header->length; GELOGD("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); } else { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param]Invalid, model encrypt type not supported"); - REPORT_CALL_ERROR("E19999","Invalid model, encrypt type not supported"); + REPORT_INPUT_ERROR("E10003", std::vector({"parameter", "value", "reason"}), + std::vector({"om", model.om_name.c_str(), "invalid om file"})); res = ACL_ERROR_GE_PARAM_INVALID; } diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index f7015525..f1c3c87b 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -184,7 +184,10 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { if (options.find(kTrainingTrace) == std::string::npos) { return ge::SUCCESS; } - const std::string training_trace = prof_options[kTrainingTrace]; + std::string training_trace; + if (prof_options.contains(kTrainingTrace)) { + training_trace = prof_options[kTrainingTrace]; + } if (training_trace.empty()) { GELOGI("Training trace will not take effect."); return ge::SUCCESS; @@ -196,8 +199,12 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { REPORT_INNER_ERROR("E19999", "Training trace param:%s is invalid.", training_trace.c_str()); return ge::PARAM_INVALID; } - fp_point_ = prof_options[kFpPoint]; - bp_point_ = prof_options[kBpPoint]; + if (prof_options.contains(kFpPoint)) { + fp_point_ = prof_options[kFpPoint]; + } + if (prof_options.contains(kBpPoint)) { + bp_point_ = prof_options[kBpPoint]; + } if (!fp_point_.empty() && !bp_point_.empty()) { GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); } @@ -1014,10 +1021,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP if (is_profiling_valid) { try { Json prof_options = Json::parse(profiling_options); - - fp_point_ = prof_options[kFpPoint]; - bp_point_ = prof_options[kBpPoint]; - + if (prof_options.contains(kFpPoint)) { + fp_point_ = prof_options[kFpPoint]; + } + if (prof_options.contains(kBpPoint)) { + bp_point_ = prof_options[kBpPoint]; + } fp_point = fp_point_; bp_point = bp_point_; if (!fp_point_.empty() && !bp_point_.empty()) { diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index af9fce06..12293fc5 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -81,7 +81,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { Status ProfModelUnsubscribe(void *model); void StopProfiling(); bool ProfilingTrainingTraceOn() const { return is_training_trace_; } + // report model load profiling data flag, data contain task desc info, step info, model load fusion op info bool ProfilingModelLoadOn() const { return is_load_profiling_; } + // report model execute profiling data flag, data contain model execute time info bool ProfilingModelExecuteOn() const; // is_execute_profiling_ only used by ge option and env bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 856e7cf1..820518ad 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -28,6 +28,8 @@ set(SRC_LIST "../graph/manager/graph_var_manager.cc" "../graph/manager/graph_mem_allocator.cc" "../graph/manager/graph_caching_allocator.cc" + "../graph/manager/session_scope_mem_allocator.cc" + "../graph/manager/graph_mem_manager.cc" "../graph/manager/trans_var_data_utils.cc" "../graph/manager/util/debug.cc" "../graph/manager/rdma_pool_allocator.cc" diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 6d3114f4..e66dcb58 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -26,7 +26,7 @@ #include "graph/execute/graph_execute.h" #include "graph/load/graph_loader.h" #include "graph/load/model_manager/model_manager.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "single_op/single_op_manager.h" #include "graph/load/model_manager/davinci_model.h" #include "opskernel_manager/ops_kernel_builder_manager.h" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index ae1288f5..a56eaadf 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -122,12 +122,10 @@ OMG_HOST_SRC_FILES := \ graph/passes/dimension_adjust_pass.cc \ graph/passes/get_original_format_pass.cc \ graph/passes/shape_operate_op_remove_pass.cc \ - graph/passes/unused_op_remove_pass.cc \ graph/passes/assert_pass.cc \ graph/passes/dropout_pass.cc \ graph/passes/infershape_pass.cc \ graph/passes/unused_const_pass.cc \ - graph/passes/isolated_op_remove_pass.cc \ graph/passes/permute_pass.cc \ graph/passes/ctrl_edge_transfer_pass.cc \ graph/passes/end_of_sequence_add_control_pass.cc \ @@ -209,7 +207,6 @@ OMG_HOST_SRC_FILES := \ graph/passes/switch_logic_remove_pass.cc \ graph/passes/switch_data_edges_bypass.cc \ graph/passes/merge_pass.cc \ - graph/passes/variable_format_pass.cc \ graph/passes/variable_op_pass.cc \ graph/passes/cast_remove_pass.cc \ graph/passes/transpose_transdata_pass.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 2aa19e7a..8ca8572c 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -187,7 +187,6 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/identity_pass.cc \ graph/passes/ref_identity_delete_op_pass.cc \ graph/passes/infershape_pass.cc \ - graph/passes/isolated_op_remove_pass.cc \ graph/passes/iterator_op_pass.cc \ graph/passes/link_gen_mask_nodes_pass.cc \ graph/passes/merge_pass.cc \ @@ -233,13 +232,11 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/transop_without_reshape_fusion_pass.cc \ graph/passes/transpose_transdata_pass.cc \ graph/passes/unused_const_pass.cc \ - graph/passes/unused_op_remove_pass.cc \ graph/passes/var_is_initialized_op_pass.cc \ graph/passes/parallel_concat_start_op_pass.cc \ graph/passes/cond_pass.cc \ graph/passes/cond_remove_pass.cc \ graph/passes/for_pass.cc \ - graph/passes/variable_format_pass.cc \ graph/passes/variable_op_pass.cc \ graph/passes/variable_prepare_op_pass.cc \ graph/passes/variable_ref_delete_op_pass.cc \ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index a12f3cf7..8a94aa9b 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -31,6 +31,7 @@ #include "graph/ge_context.h" #include "graph/manager/graph_manager.h" #include "graph/manager/util/rt_context_util.h" +#include "graph/operator_factory_impl.h" #include "graph/opsproto_manager.h" #include "graph/utils/graph_utils.h" #include "graph/utils/type_utils.h" @@ -803,6 +804,41 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector return SUCCESS; } +Status GeGenerator::InferFormatForSingleOp(OpDescPtr &op_desc) { + GE_CHECK_NOTNULL(op_desc); + if (OperatorFactoryImpl::GetInferFormatFunc(op_desc->GetType()) != nullptr) { + auto node_op = ge::OperatorFactoryImpl::CreateOperator("node_op", op_desc->GetType()); + if (node_op.IsEmpty()) { + GELOGW("get op from OperatorFactory fail. op type: %s", op_desc->GetType().c_str()); + } else { + GELOGD("get op from OperatorFactory success. op type: %s", op_desc->GetType().c_str()); + auto temp_op_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op); + if (temp_op_desc == nullptr) { + REPORT_INNER_ERROR("E19999", "GetOpDescFromOperator failed, as return nullptr, type:%s", + op_desc->GetType().c_str()); + GELOGE(FAILED, "[Get][OpDesc] temp op desc is null, type:%s", op_desc->GetType().c_str()); + return FAILED; + } + if (!op_desc->UpdateInputName(temp_op_desc->GetAllInputName())) { + GELOGW("InferFormatForSingleOp UpdateInputName failed"); + } + if (!op_desc->UpdateOutputName(temp_op_desc->GetAllOutputName())) { + GELOGW("InferFormatForSingleOp UpdateOutputName failed"); + } + } + node_op.BreakConnect(); + } + auto op = OpDescUtils::CreateOperatorFromOpDesc(op_desc); + auto ret = op_desc->CallInferFormatFunc(op); + if (ret != GRAPH_SUCCESS) { + REPORT_INNER_ERROR("E19999", "call InferFormatFunc for single op:%s fail", + op_desc->GetName().c_str()); + GELOGE(FAILED, "[Call][InferFormatFunc] for single op:%s fail.", op_desc->GetName().c_str()); + return FAILED; + } + return SUCCESS; +} + Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline, int32_t compile_flag) { @@ -843,6 +879,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in Graph graph; GE_CHK_STATUS(BuildSingleOpGraph(op_desc, inputs, outputs, name, graph), "[Build][Graph] for single op:%s fail.", op_desc->GetName().c_str()); + GE_CHK_STATUS_RET_NOLOG(InferFormatForSingleOp(op_desc)); // 2. check engine type when compile online if (model_file_name == kFileNameSuffix) { diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 2756c6fe..9b81eae3 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -500,6 +500,7 @@ string MemoryBlock::String() { ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " "; ss << "real_size_list: " << ToString(real_size_list_) << " "; ss << "ref_count: " << ref_count_ << " "; + ss << "reuse_mem_: " << reuse_mem_ << " "; ss << "members: "; for (auto x : NodeTypeIndexList()) { ss << "__node: " << ToString(x) << " "; @@ -513,8 +514,8 @@ string MemoryBlock::String() { BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map &anchor_to_symbol, const map> &symbol_to_anchors) - : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), - symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} + : compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors), + anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} BlockMemAssigner::~BlockMemAssigner() { GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size()); @@ -1123,7 +1124,7 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) { MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, const vector &workspace_reuse_flag, const bool is_op_reuse_mem, - const bool continuous, int64_t memory_type) { + const bool continuous, uint64_t memory_type) { GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( n == nullptr, REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); @@ -1824,8 +1825,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { zero_memory_list_.emplace_back(n, kWorkspace, static_cast(i), false); continue; } - int64_t memory_type = RT_MEMORY_HBM; - if (!GetWorkSpaceMemoryType(n, i, memory_type)) { + uint64_t memory_type = RT_MEMORY_HBM; + if (!GetWorkSpaceMemoryType(n, i, memory_type, workspace_reuse_flag)) { GELOGW("Get workspace memory type failed."); return; } @@ -1860,7 +1861,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector &ranges) { } void BlockMemAssigner::CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block, int64_t memory_type) { + MemoryBlock *mem_block, uint64_t memory_type) { bool reuse_mem_flag = ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; if (reuse_mem_flag) { @@ -1992,24 +1993,29 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { } } -void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) { - if (block.memory_type_ == RT_MEMORY_HBM) { - if (block.first_continuous_block_) { - mem_offset += MEM_ALIGN_SIZE; - } - block.Resize(); - block.SetHeadOffset(mem_offset); - mem_offset += block.Size(); - block.SetTailOffset(mem_offset - 1); - } else if (block.memory_type_ == RT_MEMORY_P2P_DDR) { - if (block.first_continuous_block_) { - p2p_mem_offset += MEM_ALIGN_SIZE; +void AddBlockMemOffset(std::map &mem_offsets, MemoryBlock &block) { + auto it = mem_offsets.find(block.memory_type_); + if (it == mem_offsets.end()) { + auto result = mem_offsets.insert(std::pair(block.memory_type_, 0)); + // Insert failure is unlikely + if (!result.second) { + return; } - block.Resize(); - block.SetHeadOffset(p2p_mem_offset); - p2p_mem_offset += block.Size(); - block.SetTailOffset(p2p_mem_offset - 1); + it = result.first; + } + + if (it == mem_offsets.end()) { + return; + } + + auto &mem_offset = it->second; + if (block.first_continuous_block_) { + mem_offset += MEM_ALIGN_SIZE; } + block.Resize(); + block.SetHeadOffset(mem_offset); + mem_offset += block.Size(); + block.SetTailOffset(mem_offset - 1); } bool DynamicBatchBlockReuse(MemoryBlock &block) { @@ -2036,27 +2042,27 @@ void BlockMemAssigner::ResizeDynamicBatchBlocks() { } } - size_t max_mem_offset = mem_offset_; - size_t max_p2p_mem_offset = p2p_mem_offset_; + std::map max_mem_offsets = mem_offsets_; for (auto &batch_blocks : dynamic_batch_blocks) { - size_t mem_offset = mem_offset_; - size_t p2p_mem_offset = p2p_mem_offset_; + std::map mem_offsets = mem_offsets_; for (auto block : batch_blocks.second) { if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) { continue; } - AddBlockMemOffset(mem_offset, p2p_mem_offset, *block); + AddBlockMemOffset(mem_offsets, *block); } - if (mem_offset > max_mem_offset) { - max_mem_offset = mem_offset; - } - if (p2p_mem_offset > max_p2p_mem_offset) { - max_p2p_mem_offset = p2p_mem_offset; + + for (auto &it : mem_offsets) { + auto itmax = max_mem_offsets.find(it.first); + if (itmax == max_mem_offsets.end()) { + max_mem_offsets[it.first] = it.second; + } else if (it.second > itmax->second) { + itmax->second = it.second; + } + GELOGI("Batch:%s memory type:%ld offset:%zu", batch_blocks.first.c_str(), it.first, it.second); } - GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset); } - mem_offset_ = max_mem_offset; - p2p_mem_offset_ = max_p2p_mem_offset; + mem_offsets_ = max_mem_offsets; } /// @@ -2074,11 +2080,13 @@ void BlockMemAssigner::ResizeMemoryBlocks() { continue; } - AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block); + AddBlockMemOffset(mem_offsets_, *memory_block); } ResizeDynamicBatchBlocks(); - GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu," - "theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_); + for (auto it : mem_offsets_) { + GELOGI("Memory type:%ld mem_offset exclude zero_copy_memory:%zu, theory_min_memory_size:%zu", it.first, it.second, + theory_min_memory_size_); + } } /// @@ -2217,7 +2225,8 @@ bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { (node_type == CONSTANTOP) || (node_type == HVDWAIT); } -bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { +bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type, + vector &workspace_reuse_flag) { memory_type = RT_MEMORY_HBM; vector workspace_memory_type; auto op_desc = node->GetOpDesc(); @@ -2233,6 +2242,20 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, return false; } memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; + + vector workspace_no_reuse_scope; + bool has_workspace_no_reuse_scope = + ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + if (has_workspace_no_reuse_scope && (index < workspace_no_reuse_scope.size()) + && (workspace_no_reuse_scope[index] == kSessionNoReuse)) { + memory_type |= kSessionScopeMemory; + if (workspace_reuse_flag.empty()) { + workspace_reuse_flag.assign(workspace_no_reuse_scope.size(), true); + } + // set to no reuse + workspace_reuse_flag[index] = false; + GELOGI("%s's workspace is session scope no reuse, memory type:%lu.", node->GetName().c_str(), memory_type); + } return true; } } // namespace ge diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 3b1e0d31..231cce09 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -34,6 +34,10 @@ namespace ge { const size_t kMaxLifeTime = 0xffffffff; const int32_t kInvalidThreadScopeId = -1; +const uint64_t kSessionScopeMemory = 0x100000000; +const uint64_t kMemoryTypeMask = 0xffffffff; + +enum MemoryNoReuseScope { kReuse, kSessionNoReuse, kGraphNoReuse }; using DependStreamLife = std::map>; @@ -224,9 +228,7 @@ class BlockMemAssigner : public MemAssigner { Status Assign() override; - size_t GetMemOffset() const { return mem_offset_; } - - size_t GetP2PMemOffset() const { return p2p_mem_offset_; } + const std::map &GetMemOffsets() const { return mem_offsets_; } int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; } @@ -329,14 +331,10 @@ class BlockMemAssigner : public MemAssigner { /// void UpdateOpTensorMemType(std::list node_index_io_list, int64_t memory_type); - size_t mem_offset_; - size_t p2p_mem_offset_; - + std::map mem_offsets_; ge::ComputeGraphPtr compute_graph_; - std::vector memory_blocks_; std::vector blocks_store_; - std::vector zero_memory_list_; // ref mapping @@ -380,7 +378,7 @@ class BlockMemAssigner : public MemAssigner { /// MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, const ge::NodePtr &n, uint32_t out_index, const std::vector &workspace_reuse_flag, - const bool is_op_reuse_mem, const bool continuous, int64_t memory_type); + const bool is_op_reuse_mem, const bool continuous, uint64_t memory_type); /// /// @ingroup GE @@ -394,7 +392,7 @@ class BlockMemAssigner : public MemAssigner { /// @author /// void CheckWorkspaceReuse(const vector &workspace_reuse_flag, uint32_t index, int64_t stream_id, - MemoryBlock *mem_block, int64_t memory_type); + MemoryBlock *mem_block, uint64_t memory_type); /// /// @ingroup GE @@ -457,7 +455,8 @@ class BlockMemAssigner : public MemAssigner { bool IsContinuousOutput(const NodePtr &n); - bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); + bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type, + vector &workspace_reuse_flag); void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 0ac58fe2..166162aa 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -69,6 +69,10 @@ int64_t GetSymbolOutputOffset(const std::map &anchor_t } return ge::kInvalidOffset; } + +bool isVariableMemoryNode(const ge::NodePtr &node) { + return (node->GetType() == ge::VARIABLE) || (node->GetType() == ge::CONSTANTOP); +} } // namespace namespace ge { Status VariableMemoryAssigner::Assign() { @@ -107,11 +111,22 @@ Status GraphMemoryAssigner::AssignMemory() { compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); return ge::FAILED; } - MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); - memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); - if (mem_assigner->GetP2PMemOffset() >= 0) { - MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); + for (auto pair : mem_assigner->GetMemOffsets()) { + MemoryOffset offset(pair.first, pair.second); + memory_offset_.emplace(pair.first, offset); + } + + // base memtype offset must be exist + auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM); + if (it == mem_assigner->GetMemOffsets().end()) { + MemoryOffset memory_offset(RT_MEMORY_HBM, 0); + memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); + } + + it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR); + if (it == mem_assigner->GetMemOffsets().end()) { + MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0); memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); } @@ -224,7 +239,7 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out return SUCCESS; } -Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { +Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); @@ -264,7 +279,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_mem_copy_size) { +Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size) { BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); if (priority_assigner == nullptr) { REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s", @@ -436,22 +451,31 @@ bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op /// op1 -> node -> op2 /// return true when node is ref from input, and op1 or op2 is reuse input from output bool GraphMemoryAssigner::IsRefFromInputOpCascade(const NodePtr &node) { - bool ref_from_input = false; + std::unordered_set ref_input_index; int32_t reuse_in_index = -1; for (const auto &out_anchor : node->GetAllOutDataAnchors()) { - ref_from_input = GraphUtils::IsRefFromInput(out_anchor, reuse_in_index); - if (ref_from_input) { + bool reuse_input = GraphUtils::IsRefFromInput(out_anchor, reuse_in_index); + if (reuse_input) { GELOGD("IsRefFromInputOpCascade: cur node:%s:%d is ref", node->GetName().c_str(), reuse_in_index); - break; + ref_input_index.insert(reuse_in_index); } } + bool ref_from_input = !ref_input_index.empty(); + if (!ref_from_input) { + return false; + } for (const auto &in_anchor : node->GetAllInDataAnchors()) { const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); + auto in_node = peer_out_anchor->GetOwnerNode(); + if (isVariableMemoryNode(in_node) && (ref_input_index.count(in_anchor->GetIdx()) > 0)) { + GELOGD("Reuse variable memory, input node:%s, type:%s.", in_node->GetName().c_str(), in_node->GetType().c_str()); + return false; + } if (ref_from_input && GraphUtils::IsRefFromInput(peer_out_anchor, reuse_in_index)) { GELOGD("IsRefFromInputOpCascade: in node[%s] is ref, reuse index is:%d", - peer_out_anchor->GetOwnerNode()->GetName().c_str(), reuse_in_index); + in_node->GetName().c_str(), reuse_in_index); return true; } } @@ -489,6 +513,11 @@ Status GraphMemoryAssigner::UpdateRefOpOffsetReverse(const NodePtr &node) { GE_CHECK_NOTNULL(peer_out_anchor); auto peer_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(peer_node); + if (isVariableMemoryNode(peer_node)) { + GELOGW("Peer node to update is %s, skip it. Node name:%s.", + peer_node->GetType().c_str(), peer_node->GetName().c_str()); + continue; + } auto peer_op_desc = peer_node->GetOpDesc(); GE_CHECK_NOTNULL(peer_op_desc); vector peer_output_list = peer_op_desc->GetOutputOffset(); @@ -1398,6 +1427,9 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); } for (auto pair : memory_offset_) { + if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) { + continue; + } GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), pair.second.mem_offset_, pair.first); } diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index 0d9b03e5..a6a2a686 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -103,9 +103,9 @@ class GraphMemoryAssigner { ge::Status AssignMemory2HasRefAttrNode(); - ge::Status ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset); + ge::Status ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset); - ge::Status AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size); + ge::Status AssignZeroCopyMemory(map &mem_offset, size_t &zero_mem_copy_size); ge::Status SetInputOffset(); diff --git a/ge/graph/build/memory/hybrid_mem_assigner.cc b/ge/graph/build/memory/hybrid_mem_assigner.cc index eff821bf..ccf673b3 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.cc +++ b/ge/graph/build/memory/hybrid_mem_assigner.cc @@ -23,7 +23,7 @@ namespace ge { HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph) - : mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} + : compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} Status HybridMemAssigner::AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size) { vector ranges; @@ -36,7 +36,10 @@ Status HybridMemAssigner::AssignMemory(std::unique_ptr &block_ block_assigner->AssignMemoryWithReuse(ranges); - mem_size = block_assigner->GetMemOffset(); + // total size + for (auto it : block_assigner->GetMemOffsets()) { + mem_size += it.second; + } return SUCCESS; } @@ -73,8 +76,7 @@ Status HybridMemAssigner::Assign() { } priority_assigner->SetOpMemOffset(false); - mem_offset_ = priority_assigner->GetMemOffset(); - p2p_mem_offset_ = priority_assigner->GetP2PMemOffset(); + mem_offsets_ = priority_assigner->GetMemOffsets(); priority_assigner_ = std::move(priority_assigner); return SUCCESS; diff --git a/ge/graph/build/memory/hybrid_mem_assigner.h b/ge/graph/build/memory/hybrid_mem_assigner.h index 7baece44..2bdfd5c5 100755 --- a/ge/graph/build/memory/hybrid_mem_assigner.h +++ b/ge/graph/build/memory/hybrid_mem_assigner.h @@ -42,16 +42,14 @@ class HybridMemAssigner : public MemAssigner { Status Assign() override; - size_t GetMemOffset() const { return mem_offset_; } - size_t GetP2PMemOffset() const { return p2p_mem_offset_; } + const std::map &GetMemOffsets() const { return mem_offsets_; } BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; } private: Status AssignMemory(std::unique_ptr &block_assigner, size_t &mem_size); - size_t mem_offset_; - size_t p2p_mem_offset_; + std::map mem_offsets_; ge::ComputeGraphPtr compute_graph_; diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc index 34b97c60..570aae07 100755 --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -20,7 +20,7 @@ #include "graph/build/memory/graph_mem_assigner.h" namespace ge { -Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size) { +Status MemoryAssigner::AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size) { GraphMemoryAssigner graph_mem_assigner(compute_graph_); if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) { diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 0e625990..ce2f57f9 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -47,6 +47,7 @@ #include "omg/version.h" #include "register/op_registry.h" #include "graph/passes/set_input_output_offset_pass.h" +#include "graph/build/memory/block_mem_assigner.h" using std::map; using std::set; @@ -398,9 +399,21 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); return FAILED); + auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); + size_t session_scope_mem_offset = 0; + auto it = mem_type_to_mem_offset_.find(mem_type_session_scope); + if (it != mem_type_to_mem_offset_.end()) { + session_scope_mem_offset = it->second; + } if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) { p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR]; } + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset), + REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", + ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE.c_str()); + GELOGE(FAILED, "SetInt of ATTR_NAME_SESSION_SCOPE_MEMORY_SIZE failed."); + return FAILED); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_), REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); @@ -434,8 +447,8 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_OUT_NODES_NAME.c_str()); GELOGE(FAILED, "[Set][Str] %s in model failed.", ATTR_MODEL_OUT_NODES_NAME.c_str()); return FAILED); - GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, - p2p_mem_offset_, zero_copy_mem_size_); + GELOGI("For model, max_mem_offset: %zu, p2p_mem_size: %zu, zero_copy_mem_size: %zu, session_scope_mem_size: %zu", + max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_, session_scope_mem_offset); string fp_ceiling_mode; if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) { if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index 67def859..6f097329 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -93,7 +93,7 @@ class ModelBuilder { uint64_t session_id_; - map mem_type_to_mem_offset_; + map mem_type_to_mem_offset_; size_t weight_offset_; diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index 0e1a1aba..a5a1112e 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -905,6 +905,7 @@ Status StreamAllocator::SplitStreams(vector> &split_streams) { added_stream_num_vec[stream_id]++; new_stream_id_vec[stream_id] = last_stream_id; split_streams[stream_id].emplace(last_stream_id); + split_ori_stream_map_[last_stream_id] = stream_id; node_split_stream_map_[cur_node] = last_stream_id; // Add the send/recv event to the first and last nodes of the split stream. @@ -1104,7 +1105,7 @@ Status StreamAllocator::UpdateActiveStreamsForActiveNode(const vector(new_split_stream)); active_streams.assign(new_active_streams.begin(), new_active_streams.end()); if (!AttrUtils::SetListInt(active_op, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { @@ -1148,13 +1150,21 @@ Status StreamAllocator::UpdateActiveStreamsForSubgraphs() const { } bool StreamAllocator::IsActivated(int64_t stream_id) const { + const auto &iter = split_ori_stream_map_.find(stream_id); + if (iter == split_ori_stream_map_.end()) { + REPORT_INNER_ERROR("E19999", "Find original stream_id failed, split_stream_id=%ld", stream_id); + GELOGE(INTERNAL_ERROR, "[CheckActivated][Check] Find original stream_id failed, split_stream_id=%ld", stream_id); + return false; + } + int64_t ori_stream_id = iter->second; for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { auto op_desc = node->GetOpDesc(); vector active_streams; if (op_desc == nullptr || !AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_streams)) { continue; } - if (std::find(active_streams.begin(), active_streams.end(), stream_id) != active_streams.end()) { + if (std::find(active_streams.begin(), active_streams.end(), stream_id) != active_streams.end() || + std::find(active_streams.begin(), active_streams.end(), ori_stream_id) != active_streams.end()) { return true; } } diff --git a/ge/graph/build/stream_allocator.h b/ge/graph/build/stream_allocator.h index 44dcd673..34b2ec3a 100644 --- a/ge/graph/build/stream_allocator.h +++ b/ge/graph/build/stream_allocator.h @@ -66,7 +66,7 @@ class StreamAllocator { Status UpdateActiveStreamsForSwitchNode(NodePtr &switch_node); Status InsertActiveNodesAfterSwitch(NodePtr &switch_nodes, std::vector &switch_active_nodes); Status UpdateActiveStreamsForActiveNode(const std::vector> &split_streams, NodePtr &node); - Status UpdateActiveStreamsForSubgraphs() const; + Status UpdateActiveStreamsForSubgraphs(); bool IsActivated(int64_t stream_id) const; Status SetActiveStreamsForLoop(); Status CheckStreamActived() const; @@ -114,6 +114,7 @@ class StreamAllocator { std::map> specific_activated_streams_nodes_map_; std::map node_split_stream_map_; + std::map split_ori_stream_map_; std::map subgraph_first_active_node_map_; // send events corresponding to the node @@ -123,4 +124,4 @@ class StreamAllocator { std::map> node_to_recv_events_; }; } // namespace ge -#endif // GE_GRAPH_BUILD_STREAM_ALLOCATOR_H_ \ No newline at end of file +#endif // GE_GRAPH_BUILD_STREAM_ALLOCATOR_H_ diff --git a/ge/graph/common/omg_util.cc b/ge/graph/common/omg_util.cc index 1dba8c51..15fa3c47 100644 --- a/ge/graph/common/omg_util.cc +++ b/ge/graph/common/omg_util.cc @@ -272,20 +272,32 @@ bool IsUnknownShapeTensor(const GeTensorDesc &tensor_desc) { /// @brief Set Op _force_unknown_shape flag /// @param [in] node /// @param [in] force_unknown, set attribute if true +/// @param [in] group_index, condition group index of node. /// @return /// -void MarkForceUnknownShape(const NodePtr &node, bool force_unknown) { - GE_RT_VOID_CHECK_NOTNULL(node); +void MarkForceUnknownShape(const NodePtr &node, bool force_unknown, int64_t group_index) { if (!force_unknown) { return; } - GELOGD("[%s] mark as force unknown shape node", node->GetName().c_str()); - if (!AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, force_unknown)) { + GE_RT_VOID_CHECK_NOTNULL(node); + const auto &op_desc = node->GetOpDesc(); + GE_RT_VOID_CHECK_NOTNULL(op_desc); + + // op_desc as AttrHolderAdapter valid, Set attribute always success, just log for check. + GELOGD("Mark [%s] as force unknown shape node, group index: %ld", node->GetName().c_str(), group_index); + if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, force_unknown)) { REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_FORCE_UNKNOWN_SHAPE.c_str(), node->GetName().c_str(), node->GetType().c_str()); GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_FORCE_UNKNOWN_SHAPE.c_str(), node->GetName().c_str(), node->GetType().c_str()); } + + if (!AttrUtils::SetInt(op_desc, ATTR_NAME_CONTROL_FLOW_GROUP, group_index)) { + REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for op:%s(%s)", ATTR_NAME_CONTROL_FLOW_GROUP.c_str(), + node->GetName().c_str(), node->GetType().c_str()); + GELOGE(FAILED, "[Set][Attr] %s fail for op:%s(%s)", ATTR_NAME_CONTROL_FLOW_GROUP.c_str(), + node->GetName().c_str(), node->GetType().c_str()); + } } } // namespace ge diff --git a/ge/graph/common/omg_util.h b/ge/graph/common/omg_util.h index c84da7f8..fdb0e138 100644 --- a/ge/graph/common/omg_util.h +++ b/ge/graph/common/omg_util.h @@ -129,9 +129,10 @@ bool IsUnknownShapeTensor(const GeTensorDesc &tensor_desc); /// @brief Set Op _force_unknown_shape flag /// @param [in] node /// @param [in] force_unknown, set attribute if true +/// @param [in] group_index, condition group index of node. /// @return /// -void MarkForceUnknownShape(const NodePtr &node, bool force_unknown); +void MarkForceUnknownShape(const NodePtr &node, bool force_unknown, int64_t group_index); } // namespace ge #endif // GE_GRAPH_COMMON_OMG_UTIL_H_ diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc index e4904614..7ee74d1d 100755 --- a/ge/graph/load/graph_loader.cc +++ b/ge/graph/load/graph_loader.cc @@ -33,12 +33,12 @@ Status GraphLoader::UnloadModel(uint32_t model_id) { Status ret = model_manager->Stop(model_id); if (ret != SUCCESS) { - GELOGE(ret, "UnloadModel: Stop failed. model id:%u", model_id); + GELOGE(ret, "[Stop][Model] failed. model id:%u", model_id); } ret = model_manager->Unload(model_id); if (ret != SUCCESS) { - GELOGE(ret, "UnloadModel: Unload failed. model id:%u", model_id); + GELOGE(ret, "[Unload][Model] failed. model id:%u", model_id); return ret; } GELOGI("UnLoad model success, model id:%u.", model_id); @@ -50,14 +50,13 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptrLoadModelOnline(model_id, ge_root_model_ptr, listener); if (ret != SUCCESS) { - GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); + GELOGE(ret, "[Load][Model] Online failed. ret = %u, model_id:%u", ret, model_id); rt_ret = rtDeviceReset(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); } return ret; } @@ -81,31 +80,31 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptrStart(model_id); if (ret != SUCCESS) { if (model_manager->Unload(model_id) != SUCCESS) { - GELOGE(ret, "LoadModel: Unload failed while trying to unload after a failed start."); + GELOGE(ret, "[Unload][Model] failed while trying to unload after a failed start, model_id:%u.", model_id); } rt_ret = rtDeviceReset(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); } - GELOGE(ret, "LoadModel: Start failed."); + GELOGE(ret, "[Start][Model] failed, model_id:%u.", model_id); return ret; } rt_ret = rtDeviceReset(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); return RT_FAILED; } GELOGI("Load model online success, model_id:%u.", model_id); @@ -118,7 +117,7 @@ Status GraphLoader::GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size) { GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->GetMaxUsedMemory(model_id, max_size); if (ret != SUCCESS) { - GELOGE(ret, "GetMaxUsedMemory: GetMaxUsedMemory failed."); + GELOGE(ret, "[Call][GetMaxUsedMemory] failed, model_id:%u.", model_id); return ret; } return SUCCESS; @@ -127,21 +126,20 @@ Status GraphLoader::GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size) { Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string &key_path, int32_t priority, ModelData &model_data) { if (!CheckInputPathValid(path)) { - GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); + GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "[Check][Param] model path is invalid:%s", path.c_str()); return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; } GELOGI("Load model begin, model path is: %s", path.c_str()); if (!key_path.empty() && !CheckInputPathValid(key_path)) { - REPORT_INNER_ERROR("E19999", "Param key_path:%s empty or invalid", - key_path.c_str()); - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); + REPORT_INNER_ERROR("E19999", "Param key_path:%s empty or invalid", key_path.c_str()); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] decrypt_key path is invalid:%s", key_path.c_str()); return ACL_ERROR_GE_PARAM_INVALID; } Status ret = ModelParserBase::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); if (ret != SUCCESS) { - GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); + GELOGE(ret, "[Call][LoadFromFile] failed. ret = %u, path:%s, key path:%s", ret, path.c_str(), key_path.c_str()); if (model_data.model_data != nullptr) { delete[] static_cast(model_data.model_data); model_data.model_data = nullptr; @@ -156,18 +154,19 @@ Status GraphLoader::CommandHandle(const Command &command) { GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->HandleCommand(command); if (ret != SUCCESS) { - GELOGE(ret, "CommandHandle: Command Handle failed."); + GELOGE(ret, "[Handle][Command] failed, module_index:%lu.", command.module_index); return ret; } } catch (std::bad_alloc &) { REPORT_INNER_ERROR("E19999", "Bad memory allocation occur"); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Command handle failed, bad memory allocation occur !"); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Handle][Command] failed, " + "bad memory allocation occur, module_index:%lu.", command.module_index); return ACL_ERROR_GE_MEMORY_ALLOCATION; } catch (...) { REPORT_INNER_ERROR("E19999", "Some exceptions occur"); - GELOGE(FAILED, "Command handle failed, some exceptions occur !"); + GELOGE(FAILED, "[Handle][Command] failed, some exceptions occur, module_index:%lu.", command.module_index); return FAILED; } @@ -184,7 +183,7 @@ Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model Status ret = model_manager->LoadModelOffline( model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size); if (ret != SUCCESS) { - GELOGE(ret, "Load model failed, model_id:%u.", model_id); + GELOGE(ret, "[Load][Model] failed, model_id:%u.", model_id); return ret; } GELOGI("Load model success, model_id:%u.", model_id); @@ -210,7 +209,7 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); if (ret != SUCCESS) { - GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id); + GELOGE(ret, "[Load][Model] with queue failed, model_id:%u.", model_id); return ret; } @@ -237,7 +236,7 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc); if (ret != SUCCESS) { - GELOGE(ret, "Execute model failed, model_id:%u.", model_id); + GELOGE(ret, "[Execute][Model] failed, model_id:%u.", model_id); return ret; } @@ -250,7 +249,7 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) { if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); return RT_FAILED; } size_t total_mem = 0; @@ -258,14 +257,14 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) { rt_ret = rtMemGetInfo(&free_mem, &total_mem); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemGetInfo failed, ret:0x%X", rt_ret); - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtMemGetInfo] failed, ret:0x%X", rt_ret); return RT_FAILED; } rt_ret = rtDeviceReset(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); - GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); return RT_FAILED; } // Add small page memory size @@ -280,7 +279,8 @@ Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, u GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->DestroyAicpuKernel(session_id, model_id, sub_model_id); if (ret != SUCCESS) { - GELOGE(ret, "Destroy aicpu kernel failed."); + GELOGE(ret, "[Destroy][AicpuKernel] failed, session_id:%lu, model_id:%u, sub_model_id:%u.", + session_id, model_id, sub_model_id); return ret; } return SUCCESS; @@ -291,7 +291,7 @@ Status GraphLoader::DestroyAicpuSessionForInfer(uint32_t model_id) { GE_CHECK_NOTNULL(model_manager); Status ret = model_manager->DestroyAicpuSessionForInfer(model_id); if (ret != SUCCESS) { - GELOGE(ret, "Destroy aicpu serrion for infer failed."); + GELOGE(ret, "[Call][DestroyAicpuSessionForInfer] failed, model_id:%u.", model_id); return ret; } return SUCCESS; diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 5d90d5a1..b52796c8 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -40,7 +40,7 @@ #include "graph/load/model_manager/cpu_queue_schedule.h" #include "graph/load/model_manager/model_manager.h" #include "graph/load/model_manager/tbe_handle_store.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/manager/util/debug.h" @@ -60,6 +60,8 @@ #include "graph/common/local_context.h" #include "common/formats/utils/formats_trans_utils.h" #include "graph/common/omg_util.h" +#include "graph/build/memory/block_mem_assigner.h" +#include "graph/manager/session_scope_mem_allocator.h" // create std::thread, catch exceptions using try/catch #define CREATE_STD_THREAD(thread_id, func, args) \ @@ -168,7 +170,6 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr(dev_ptr); - p2p_mem_base_ = static_cast(dev_ptr); is_inner_mem_base_ = false; if (TotalMemSize() && mem_base_ == nullptr) { @@ -422,24 +421,13 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { is_inner_mem_base_ = true; } - if (p2p_data_size != 0) { - p2p_mem_base_ = MallocP2PMem(p2p_data_size); - if (p2p_mem_base_ == nullptr) { - REPORT_CALL_ERROR("E19999", "MallocFeatureMapMem fail, p2p_data_size:%zu, model_id:%u, check invalid", - p2p_data_size, model_id_); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][Memory] for p2p failed, size:%zu, model_id:%u", - p2p_data_size, model_id_); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } - GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, - p2p_mem_base_, p2p_data_size); - is_inner_p2p_mem_base_ = true; + if (!runtime_param_.memory_infos.empty()) { + GE_CHK_STATUS_RET(MallocExMem(), "MallocExMem failed."); } GE_CHK_STATUS_RET(InitVariableMem(), "[Init][VariableMemory] failed, model_id:%u", model_id_); runtime_param_.mem_base = mem_base_; runtime_param_.weight_base = weights_mem_base_; - runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; return SUCCESS; } @@ -465,7 +453,6 @@ Status DavinciModel::InitVariableMem() { void DavinciModel::InitRuntimeParams() { int64_t value = 0; bool ret; - MemInfo p2p_mem_info; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value); runtime_param_.mem_size = ret ? (uint64_t)value : 0; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value); @@ -490,16 +477,18 @@ void DavinciModel::InitRuntimeParams() { runtime_param_.var_size = ret ? (uint64_t)value : 0; session_id_ = runtime_param_.session_id; ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value); - p2p_mem_info.memory_size = ret ? (uint64_t)value : 0; + MemInfo p2p_mem_info; + p2p_mem_info.memory_size = static_cast(ret ? value : 0); + p2p_mem_info.memory_type = RT_MEMORY_P2P_DDR; + p2p_mem_info.memory_key = "_p"; runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info); - GELOGI( - "InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, " - "logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, " - "memory_size:%lu, weight_size:%lu, var_size:%lu", - runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num, - runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base, - runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); + ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, value); + MemInfo session_scope_mem_info; + session_scope_mem_info.memory_size = static_cast(ret ? value : 0); + runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info); + + GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str()); } void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { @@ -4089,14 +4078,15 @@ Status DavinciModel::InitEntryTask() { uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { uint8_t *mem_base = nullptr; const string purpose("feature map,used for op input and output."); - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK) { data_size = static_cast(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()); string memory_key = std::to_string(0) + "_f"; - mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId()); + mem_base = + MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, data_size, GetDeviceId()); } else { - mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, data_size, GetDeviceId()); + mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, data_size, GetDeviceId()); } if (mem_base != nullptr) { @@ -4105,83 +4095,119 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { return mem_base; } -uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) { - uint8_t *p2p_mem_base = nullptr; - const string purpose("p2p memory, used for some op related to hcom"); - if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { - string p2p_memory_key = std::to_string(0) + "_p"; - p2p_mem_base = - MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId()); - } else { - p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId()); +Status DavinciModel::MallocExMem() { + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; + INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); + for (auto it : runtime_param_.memory_infos) { + auto mem_size = it.second.memory_size; + if (mem_size == 0) { + continue; + } + bool sessoion_scope = ((kSessionScopeMemory & it.first) == kSessionScopeMemory); + auto mem_type = it.first & kMemoryTypeMask; + uint8_t *mem_base = nullptr; + const string purpose("p2p memory, used for some op related to hcom or session scope memory"); + if (sessoion_scope) { + mem_base = MemManager::Instance().SessionScopeMemInstance(mem_type).Malloc(mem_size, runtime_param_.session_id); + } else if (res_static_memory == EN_OK) { + string memory_key = std::to_string(0) + it.second.memory_key; + mem_base = + MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, memory_key, mem_size, GetDeviceId()); + } else { + mem_base = MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, mem_size, GetDeviceId()); + } + + if (mem_base == nullptr) { + REPORT_CALL_ERROR("E19999", "MallocExMem fail, type:%ld size:%zu, model_id:%u, check invalid", + mem_type, mem_size, model_id_); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc ex memory failed, type:%ld size: %zu", mem_type, mem_size); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } + it.second.memory_base = mem_base; + GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] mem_type[%ld] mem_addr[%p] mem_size[%zu]", + runtime_param_.graph_id, mem_type, mem_base, mem_size); } - return p2p_mem_base; + return SUCCESS; } uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { uint8_t *weights_mem_base = nullptr; const string purpose("weights memory in inference network."); - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK) { string weight_memory_key = std::to_string(0) + "_w"; - weights_mem_base = - MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); + weights_mem_base = MemManager::Instance() + .MemInstance(RT_MEMORY_HBM) + .MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); } else { - weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId()); + weights_mem_base = + MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, weights_size, GetDeviceId()); } return weights_mem_base; } void DavinciModel::FreeFeatureMapMem() { - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK && is_inner_mem_base_) { string weight_memory_key = std::to_string(0) + "_f"; - if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); + if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(weight_memory_key) != nullptr) { + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weight_memory_key, GetDeviceId()), + "failed to free weight memory"); } mem_base_ = nullptr; } else { - GE_IF_BOOL_EXEC(mem_base_ != nullptr && is_inner_mem_base_, - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(mem_base_, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); - mem_base_ = nullptr); + GE_IF_BOOL_EXEC( + mem_base_ != nullptr && is_inner_mem_base_, + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(mem_base_, GetDeviceId()), + "failed to free feature_map memory"); + mem_base_ = nullptr); } } -void DavinciModel::FreeP2PMem() { - if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { - std::string p2p_memory_key = std::to_string(0) + "_p"; - if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) { - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); +void DavinciModel::FreeExMem() { + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; + INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); + for (auto it : runtime_param_.memory_infos) { + // free when session destory + if ((kSessionScopeMemory & it.first) == kSessionScopeMemory) { + continue; + } + auto mem_type = it.first & kMemoryTypeMask; + if (res_static_memory == EN_OK) { + std::string memory_key = std::to_string(0) + it.second.memory_key; + if (MemManager::Instance().MemInstance(mem_type).GetMemoryAddr(memory_key) != nullptr) { + GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(memory_key, GetDeviceId()), + "failed to free memory"); + } + it.second.memory_base = nullptr; + } else { + GE_IF_BOOL_EXEC( + it.second.memory_base != nullptr, + GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(it.second.memory_base, GetDeviceId()), + "failed to free memory"); + it.second.memory_base = nullptr); } - p2p_mem_base_ = nullptr; - } else { - GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_, - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); - p2p_mem_base_ = nullptr); } } void DavinciModel::FreeWeightsMem() { - char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; + char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); if (res == EN_OK) { string memory_key = std::to_string(0) + "_w"; - if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) { - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); + if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(memory_key) != nullptr) { + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key, GetDeviceId()), + "failed to free feature_map memory"); } weights_mem_base_ = nullptr; } else { - GE_IF_BOOL_EXEC(weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_, - GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weights_mem_base_, GetDeviceId()), - "[Free][Memory] failed, model_id:%u", model_id_); - weights_mem_base_ = nullptr); + GE_IF_BOOL_EXEC( + weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_, + GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weights_mem_base_, GetDeviceId()), + "failed to free weight memory"); + weights_mem_base_ = nullptr); } } diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index a4abcae6..e4898dec 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -248,8 +248,6 @@ class DavinciModel { // get total mem size size_t TotalMemSize() const { return runtime_param_.mem_size; } - const map &P2PMemInfos() const { return runtime_param_.memory_infos; } - // model name string Name() const { return name_; } @@ -586,10 +584,8 @@ class DavinciModel { // memory address of model uintptr_t fixed_mem_base_; // Initial of mem_base_, keep forever. uint8_t *mem_base_; - uint8_t *p2p_mem_base_; bool is_inner_mem_base_; bool is_inner_weight_base_; - bool is_inner_p2p_mem_base_; // input data manager DataInputer *data_inputer_; int64_t load_begin_time_; @@ -668,13 +664,13 @@ class DavinciModel { uint8_t *MallocWeightsMem(size_t weights_size); - uint8_t *MallocP2PMem(size_t p2p_data_size); + Status MallocExMem(); void FreeFeatureMapMem(); void FreeWeightsMem(); - void FreeP2PMem(); + void FreeExMem(); void ReleaseTask(); diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index 47d104f4..770fdb08 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -102,16 +102,14 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size()); rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret: 0x%X", - kernel_size, rt_ret); - GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", kernel_size, rt_ret); + GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", kernel_size, rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, - REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret: 0x%X", - kernel_size, rt_ret); - GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", kernel_size, rt_ret); + GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", kernel_size, rt_ret); GE_CHK_RT(rtFree(aicpu_kernel_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) uint64_t kernel_id_addr = static_cast(reinterpret_cast(aicpu_kernel_addr)); param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; @@ -122,9 +120,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret: 0x%X", - sizeof(STR_FWK_OP_KERNEL), rt_ret); - GELOGE(RT_FAILED, "malloc device memory failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", sizeof(STR_FWK_OP_KERNEL), rt_ret); + GELOGE(RT_FAILED, "[Call][RtMalloc] failed. size:%zu, ret:0x%X", sizeof(STR_FWK_OP_KERNEL), rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); return RT_ERROR_TO_GE_STATUS(rt_ret); } @@ -132,9 +129,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u rt_ret = rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret: 0x%X", - sizeof(STR_FWK_OP_KERNEL), rt_ret); - GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", sizeof(STR_FWK_OP_KERNEL), rt_ret); + GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", sizeof(STR_FWK_OP_KERNEL), rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -143,8 +139,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u rtStream_t stream = nullptr; rt_ret = rtStreamCreate(&stream, 0); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtStreamCreate failed, ret: 0x%X", rt_ret); - GELOGE(RT_FAILED, "create stream failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtStreamCreate failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Create][Stream] failed. ret:0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -152,8 +148,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret: 0x%X", rt_ret); - GELOGE(RT_FAILED, "rtKernelLaunchEx failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtKernelLaunchEx] failed. ret:0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); @@ -161,9 +157,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u } rt_ret = rtStreamSynchronize(stream); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize failed, ret: 0x%X", - rt_ret); - GELOGE(RT_FAILED, "rtStreamSynchronize failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtStreamSynchronize] failed. ret:0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); @@ -172,8 +167,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u if (aicpu_kernel_addr != nullptr) { rt_ret = rtFree(aicpu_kernel_addr); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret: 0x%X", rt_ret); - GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Free][Memory] failed. ret:0x%X", rt_ret); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(rt_ret); @@ -181,15 +176,15 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u } rt_ret = rtFree(devicebase); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret: 0x%X", rt_ret); - GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Free][Memory] failed. ret:0x%X", rt_ret); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtStreamDestroy(stream); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtStreamDestroy failed, ret: 0x%X", rt_ret); - GELOGE(RT_FAILED, "rtStreamDestroy failed. ret: 0x%X", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtStreamDestroy failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtStreamDestroy] failed. ret:0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; @@ -235,9 +230,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { auto it = model_map_.find(model_id); if (it == model_map_.end()) { - REPORT_INNER_ERROR("E19999", "Param model_id:%u can't find in model_map, check invalid", - model_id); - GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); + REPORT_INNER_ERROR("E19999", "Param model_id:%u can't find in model_map, check invalid", model_id); + GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "[Check][Param] model id %u does not exists.", model_id); return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; } uint64_t session_id = it->second->GetSessionId(); @@ -256,7 +250,8 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ if (ret != SUCCESS) { REPORT_CALL_ERROR("E19999", "Call KernelLaunchEx fail, model_id:%u, sub_model_id:%u, session_id:%lu", model_id, sub_model_id, session_id); - GELOGE(FAILED, "Destroy aicpu kernel failed."); + GELOGE(FAILED, "[Call][KernelLaunchEx] fail, model_id:%u, sub_model_id:%u, session_id:%lu", + model_id, sub_model_id, session_id); return FAILED; } } @@ -304,7 +299,7 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string hybrid_model->SetModelId(model_id); hybrid_model->SetDeviceId(GetContext().DeviceId()); hybrid_model->SetOmName(om_name); - GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); + GE_CHK_STATUS_RET(hybrid_model->Init(), "[Init][HybridModel] failed. model_id = %u", model_id); auto shared_model = std::shared_ptr(hybrid_model.release()); InsertModel(model_id, shared_model); return SUCCESS; @@ -315,7 +310,8 @@ bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) { if (root_graph == nullptr) { REPORT_INNER_ERROR("E19999", "root graph in param ge_root_model is nullptr, model_id:%u, " "check invalid", ge_root_model.GetModelId()); - GELOGE(FAILED, "no model on root model"); + GELOGE(FAILED, "[Check][Param] root graph in param ge_root_model is nullptr, model_id:%u", + ge_root_model.GetModelId()); return false; } bool is_shape_unknown = root_graph->GetGraphUnknownFlag(); @@ -354,9 +350,11 @@ Status ModelManager::UpdateSessionId(uint32_t model_id, GeModelPtr ge_model, std::shared_ptr &davinci_model, uint64_t &session_id) { uint64_t new_session_id; Status ret = GenSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, + "[Generate][SessionId] for infer failed, model_id:%u.", model_id); ret = davinci_model->UpdateSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, + "[Update][SessionId] for infer failed, model_id:%u.", model_id); ge_model->InsertSessionMap(model_id, new_session_id); GELOGD("Update new session id: %lu.", new_session_id); session_id = new_session_id; @@ -382,7 +380,7 @@ bool ModelManager::HasVarNode(ComputeGraphPtr &compute_graph) const { /// Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr &ge_root_model, std::shared_ptr listener) { - GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "Param incorrect, listener is null"); + GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "[Check][Param] Param incorrect, listener is null"); if (model_id == INVALID_MODEL_ID) { GenModelId(&model_id); GELOGD("Generate new model_id:%u", model_id); @@ -444,13 +442,13 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr &davinci_model) { - GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", model_id); + GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "[Check][Param] davinci_model ptr is null, id:%u", model_id); std::lock_guard lock(map_mutex_); model_map_[model_id] = davinci_model; } void ModelManager::InsertModel(uint32_t model_id, shared_ptr &hybrid_model) { - GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", model_id); + GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "[Check][Param] hybrid_model ptr is null, id:%u", model_id); std::lock_guard lock(map_mutex_); hybrid_model_map_[model_id] = hybrid_model; } @@ -502,7 +500,7 @@ std::shared_ptr ModelManager::GetHybridModel(uint32_ } Status ModelManager::Unload(uint32_t model_id) { - GE_CHK_STATUS_RET(DeleteModel(model_id), "failed to unload model id: %u", model_id); + GE_CHK_STATUS_RET(DeleteModel(model_id), "[Delete][Model] failed, model id:%u", model_id); if (device_count > 0) { device_count--; GELOGI("Unload model %u success.", model_id); @@ -515,7 +513,7 @@ Status ModelManager::Unload(uint32_t model_id) { } Status ModelManager::UnloadModeldef(uint32_t model_id) { - GE_CHK_STATUS_RET(DeleteModel(model_id), "failed to unload modeldef id: %u", model_id); + GE_CHK_STATUS_RET(DeleteModel(model_id), "[Delete][Model] failed, model id: %u", model_id); return SUCCESS; } @@ -526,8 +524,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d Status status = data_wrap->Init(input_data, output_data); if (status != SUCCESS) { - REPORT_CALL_ERROR("E19999", "Init InputDataWrapper failed, input data index: %u", input_data.index); - GELOGE(domi::PUSH_DATA_FAILED, "Init InputDataWrapper failed, input data index: %u.", input_data.index); + REPORT_CALL_ERROR("E19999", "Init InputDataWrapper failed, input data index:%u", input_data.index); + GELOGE(domi::PUSH_DATA_FAILED, "[Init][InputDataWrapper] failed, input data index:%u.", input_data.index); return domi::PUSH_DATA_FAILED; } @@ -536,7 +534,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d std::shared_ptr model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); + GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, + "[Get][Model] failed, Invalid model id %u in InputData!", model_id); GE_IF_BOOL_EXEC(model->GetDataInputTid() == 0, model->SetDataInputTid(mmGetTid())); @@ -544,7 +543,7 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d GE_CHECK_NOTNULL(inputer); if (inputer->Push(data_wrap) != SUCCESS) { REPORT_CALL_ERROR("E19999", "DataInputer queue is full, please call again later, model_id %u", model_id); - GELOGE(domi::DATA_QUEUE_ISFULL, "Data queue is full, please call again later, model_id %u ", model_id); + GELOGE(domi::DATA_QUEUE_ISFULL, "[Call][Push] Data queue is full, please call again later, model_id %u ", model_id); return domi::DATA_QUEUE_ISFULL; } GELOGD("Data input success, model id:%u", model_id); @@ -558,10 +557,9 @@ Status ModelManager::GetCurDynamicDims(const vector> &user_real_ GELOGD("Start get cur dynamic dims."); if (user_real_input_dims.size() != user_input_dims.size()) { REPORT_INNER_ERROR("E19999", "Param user_real_input_dims.size:%zu != user_input_dims.size:%zu, " - "check invalid", - user_real_input_dims.size(), user_input_dims.size()); + "check invalid", user_real_input_dims.size(), user_input_dims.size()); GELOGE(INTERNAL_ERROR, - "The input count of user: %zu should be equal to the data count of graph: %zu", + "[Check][Param] The input count of user:%zu should be equal to the data count of graph:%zu", user_real_input_dims.size(), user_input_dims.size()); return INTERNAL_ERROR; } @@ -571,8 +569,8 @@ Status ModelManager::GetCurDynamicDims(const vector> &user_real_ REPORT_INNER_ERROR("E19999", "Param user_real_input_dims[%zu].size:%zu != user_input_dims[%zu].size:%zu, " "check invalid", i, user_real_input_dims[i].size(), i, user_input_dims[i].second.size()); - GELOGE(INTERNAL_ERROR, - "The shape size: %zu of dynamic input: %s should be equal to the shape size of input shape: %zu.", + GELOGE(INTERNAL_ERROR, "[Check][Param] The shape size:%zu of dynamic input:%s " + "should be equal to the shape size of input shape:%zu.", user_real_input_dims[i].size(), user_input_dims[i].first.c_str(), user_input_dims[i].second.size()); return INTERNAL_ERROR; } @@ -594,7 +592,7 @@ Status ModelManager::GetCurDynamicDims(const vector> &user_real_ if (!cur_dynamic_dims_valid) { REPORT_INNER_ERROR("E19999", "cur dynamic dims is %s, not exist in options, check invalid", formats::JoinToString(cur_dynamic_dims).c_str()); - GELOGE(INTERNAL_ERROR, "Cur dynamic dims is %s, not exist in options.", + GELOGE(INTERNAL_ERROR, "[Check][Param] Cur dynamic dims is %s, not exist in options.", formats::JoinToString(cur_dynamic_dims).c_str()); return INTERNAL_ERROR; } @@ -632,15 +630,16 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector(cur_dynamic_dims.size() * sizeof(int32_t)); - GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR, - "Failed to memcpy data."); + GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, + REPORT_CALL_ERROR("E19999", "memcpy data failed, size:%u", length); + return INTERNAL_ERROR, "[Memcpy][Data] failed, size:%u.", length); data.length = length; input_data.blobs.push_back(data); } @@ -654,21 +653,22 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vectorInit(input_data, output_data), return domi::PUSH_DATA_FAILED, - "Init InputDataWrapper failed,input data model_id is : %u.", model_id); + "[Init][InputDataWrapper] failed, input data model_id:%u.", model_id); if (hybrid_model != nullptr) { - GE_CHK_STATUS_RET(hybrid_model->EnqueueData(data_wrap), "Data queue is full, please call again later, model_id %u ", - model_id); + GE_CHK_STATUS_RET(hybrid_model->EnqueueData(data_wrap), + "[Enqueue][Data] Data queue is full, please call again later, model_id:%u", model_id); return SUCCESS; } - GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); + GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, + "[Check][Param] Invalid model id %u in InputData!", model_id); DataInputer *inputer = model->GetDataInputer(); GE_CHECK_NOTNULL(inputer); GE_CHK_STATUS_EXEC(inputer->Push(data_wrap), return domi::DATA_QUEUE_ISFULL, - "Data queue is full, please call again later, model_id %u ", model_id); + "[Call][Push] Data queue is full, please call again later, model_id %u ", model_id); GELOGD("Data input success, model id:%u", model_id); @@ -691,7 +691,8 @@ Status ModelManager::Start(uint32_t model_id) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to start! ", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, + "[Get][Model] failed, Invalid model id %u to start! ", model_id); Status status = davinci_model->ModelRunStart(); if (status == SUCCESS) { @@ -718,7 +719,8 @@ Status ModelManager::Stop(uint32_t model_id) { } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to stop!", model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, + "[Get][Model] failed, Invalid model id %u to stop!", model_id); Status status = davinci_model->ModelRunStop(); if (status == SUCCESS) { @@ -746,9 +748,8 @@ Status ModelManager::HandleCommand(const Command &command) { auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { - REPORT_INNER_ERROR("E19999", "Unsupported command:%s check", - command.cmd_type.c_str()); - GELOGE(PARAM_INVALID, "Unsupported command: %s", command.cmd_type.c_str()); + REPORT_INNER_ERROR("E19999", "Unsupported command:%s check", command.cmd_type.c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] Unsupported command:%s", command.cmd_type.c_str()); return PARAM_INVALID; } else { return iter->second(command); @@ -761,8 +762,8 @@ Status ModelManager::GetModelByCmd(const Command &command, REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu < kCmdParSize:%u, command_type:%s, " "check invalid", command.cmd_params.size(), kCmdParSize, command.cmd_type.c_str()); - GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.", - command.cmd_type.c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] When the cmd_type is '%s', the size of cmd_params must larger than 2.", + command.cmd_type.c_str()); return PARAM_INVALID; } @@ -773,19 +774,16 @@ Status ModelManager::GetModelByCmd(const Command &command, try { model_id = std::stoi(value); } catch (std::invalid_argument &) { - REPORT_INNER_ERROR("E19999", "%s param:%s, check invalid", PROFILE_MODEL_ID.c_str(), - value.c_str()); - GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str()); + REPORT_INNER_ERROR("E19999", "%s param:%s, check invalid", PROFILE_MODEL_ID.c_str(), value.c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] Model id:%s is invalid.", value.c_str()); return PARAM_INVALID; } catch (std::out_of_range &) { - REPORT_INNER_ERROR("E19999", "%s param:%s, check out of range", PROFILE_MODEL_ID.c_str(), - value.c_str()); - GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str()); + REPORT_INNER_ERROR("E19999", "%s param:%s, check out of range", PROFILE_MODEL_ID.c_str(), value.c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] Model id:%s is out of range.", value.c_str()); return PARAM_INVALID; } catch (...) { - REPORT_INNER_ERROR("E19999", "%s param:%s, check cannot change to int", - PROFILE_MODEL_ID.c_str(), value.c_str()); - GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str()); + REPORT_INNER_ERROR("E19999", "%s param:%s, check cannot change to int", PROFILE_MODEL_ID.c_str(), value.c_str()); + GELOGE(FAILED, "[Check][Param] Model id:%s cannot change to int.", value.c_str()); return FAILED; } @@ -793,15 +791,13 @@ Status ModelManager::GetModelByCmd(const Command &command, GE_CHECK_NOTNULL(model_manager); davinci_model = model_manager->GetModel(static_cast(model_id)); if (davinci_model == nullptr) { - REPORT_INNER_ERROR("E19999", "GetModel from model_manager fail, model_id:%u", - model_id); - GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); + REPORT_INNER_ERROR("E19999", "GetModel from model_manager fail, model_id:%u", model_id); + GELOGE(FAILED, "[Get][Model] failed, Model id:%d is invaild or model is not loaded.", model_id); return FAILED; } } else { - REPORT_INNER_ERROR("E19999", "Fisrt cmd_param not %s, check invalid", - PROFILE_MODEL_ID.c_str()); - GELOGE(FAILED, "The model_id parameter is not found in the command."); + REPORT_INNER_ERROR("E19999", "Fisrt cmd_param not %s, check invalid", PROFILE_MODEL_ID.c_str()); + GELOGE(FAILED, "[Check][Param] The model_id parameter is not found in the command."); return FAILED; } @@ -817,7 +813,8 @@ Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) { if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index, static_cast(davinci_model.get())) != SUCCESS) { - GELOGE(FAILED, "Handle prof model subscribe failed."); + GELOGE(FAILED, "[Handle][ProfModelSubscribe] failed, module_index:%lu.", + command.module_index); return FAILED; } @@ -832,7 +829,7 @@ Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) { } if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast(davinci_model.get())) != SUCCESS) { - GELOGE(FAILED, "Handle prof model unsubscribe failed."); + GELOGE(FAILED, "[Handle][ProfModelUnsubscribe] failed."); return FAILED; } @@ -842,7 +839,7 @@ Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) { Status ModelManager::HandleProfInitCommand(const Command &command) { uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { - GELOGE(FAILED, "Handle prof init failed."); + GELOGE(FAILED, "[Handle][ProfInit] failed, module_index:%lu.", module_index); return FAILED; } return SUCCESS; @@ -850,7 +847,7 @@ Status ModelManager::HandleProfInitCommand(const Command &command) { Status ModelManager::HandleProfFinalizeCommand(const Command &command) { if (ProfilingManager::Instance().ProfFinalize() != SUCCESS) { - GELOGE(FAILED, "Handle prof finalize failed."); + GELOGE(FAILED, "[Handle][ProfFinalize] failed."); return FAILED; } return SUCCESS; @@ -866,13 +863,14 @@ Status ModelManager::HandleProfStartCommand(const Command &command) { if (command.cmd_params.size() < kProfStartCmdParaSize) { REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu < %zu, check invalid", command.cmd_params.size(), kProfStartCmdParaSize); - GELOGE(PARAM_INVALID, "When the cmd_type is 'profile start', the size of cmd_params must larger than 2."); + GELOGE(PARAM_INVALID, "[Check][Param] When the cmd_type is 'profile start', " + "the size:%zu of cmd_params must larger than 2.", command.cmd_params.size()); return PARAM_INVALID; } if (command.cmd_params.size() > kProfCmdParaMaxSize) { REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu > %zu, check invalid", command.cmd_params.size(), kProfCmdParaMaxSize); - GELOGE(PARAM_INVALID, "Command para size[%zu] larger than max[1000].", command.cmd_params.size()); + GELOGE(PARAM_INVALID, "[Check][Param] Command param size[%zu] larger than max[1000].", command.cmd_params.size()); return PARAM_INVALID; } @@ -886,7 +884,7 @@ Status ModelManager::HandleProfStartCommand(const Command &command) { } uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfStartProfiling(module_index, cmd_params_map) != SUCCESS) { - GELOGE(FAILED, "Handle prof start failed."); + GELOGE(FAILED, "[Handle][ProfStartProfiling] failed, module_index:%lu.", module_index); return FAILED; } return SUCCESS; @@ -896,13 +894,14 @@ Status ModelManager::HandleProfStopCommand(const Command &command) { if (command.cmd_params.size() < kProfStartCmdParaSize) { REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu < %zu, check invalid", command.cmd_params.size(), kProfStartCmdParaSize); - GELOGE(PARAM_INVALID, "When the cmd_type is 'profile stop', the size of cmd_params must larger than 2."); + GELOGE(PARAM_INVALID, "[Check][Param] When the cmd_type is 'profile stop', " + "the size:%zu of cmd_params must larger than 2.", command.cmd_params.size()); return PARAM_INVALID; } if (command.cmd_params.size() > kProfCmdParaMaxSize) { REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu > %zu, check invalid", command.cmd_params.size(), kProfCmdParaMaxSize); - GELOGE(PARAM_INVALID, "Command para size[%zu] larger than max[1000].", command.cmd_params.size()); + GELOGE(PARAM_INVALID, "[Check][Param] Command param size[%zu] larger than max[1000].", command.cmd_params.size()); return PARAM_INVALID; } @@ -916,7 +915,7 @@ Status ModelManager::HandleProfStopCommand(const Command &command) { } uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfStopProfiling(module_index, cmd_params_map) != SUCCESS) { - GELOGE(FAILED, "Handle prof finalize failed."); + GELOGE(FAILED, "[Handle][ProfStopProfiling] failed, module_index:%lu.", module_index); return FAILED; } return SUCCESS; @@ -927,9 +926,8 @@ static Status ParserPara(const Command &command, const string &dump_key, string if (iter != command.cmd_params.end()) { ++iter; if (iter == command.cmd_params.end()) { - REPORT_INNER_ERROR("E19999", "dump_key:%s can't find in command.param, check invalid", - dump_key.c_str()); - GELOGE(PARAM_INVALID, "Invalid access."); + REPORT_INNER_ERROR("E19999", "dump_key:%s can't find in command.param, check invalid", dump_key.c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] dump_key:%s can't find in command.param, check invalid", dump_key.c_str()); return PARAM_INVALID; } dump_value = *iter; @@ -939,9 +937,9 @@ static Status ParserPara(const Command &command, const string &dump_key, string Status ModelManager::HandleDumpCommand(const Command &command) { if (command.cmd_params.size() % kDumpCmdPairSize != 0) { - REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu MOD 2 != 0, check invalid", - command.cmd_params.size()); - GELOGE(PARAM_INVALID, "When the cmd_type is 'dump', the size of cmd_params must be a even number."); + REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu MOD 2 != 0, check invalid", command.cmd_params.size()); + GELOGE(PARAM_INVALID, "[Check][Param] When the cmd_type is 'dump', " + "the size:%zu of cmd_params must be a even number.", command.cmd_params.size()); return PARAM_INVALID; } @@ -953,14 +951,14 @@ Status ModelManager::HandleDumpCommand(const Command &command) { auto ret = ParserPara(command, DUMP_STATUS, dump_status); if (ret != SUCCESS) { - GELOGE(PARAM_INVALID, "parser dump status failed"); + GELOGE(PARAM_INVALID, "[Parser][DumpStatus] failed, ret:%d", ret); return FAILED; } GELOGI("dump status = %s.", dump_status.c_str()); ret = ParserPara(command, DUMP_MODEL, dump_model); if (ret != SUCCESS) { - GELOGE(PARAM_INVALID, "parser dump model failed"); + GELOGE(PARAM_INVALID, "[Parser][DumpModel] failed, ret:%d", ret); return FAILED; } GELOGI("dump model = %s.", dump_model.c_str()); @@ -979,7 +977,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) { ret = ParserPara(command, DUMP_FILE_PATH, dump_path); if (ret != SUCCESS) { - GELOGE(PARAM_INVALID, "parser dump path failed"); + GELOGE(PARAM_INVALID, "[Parser][DumpPath] failed, ret:%d", ret); return FAILED; } if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { @@ -990,7 +988,7 @@ Status ModelManager::HandleDumpCommand(const Command &command) { ret = ParserPara(command, DUMP_MODE, dump_mode); if (ret != SUCCESS) { - GELOGE(PARAM_INVALID, "parser dump mode failed"); + GELOGE(PARAM_INVALID, "[Parser][DumpMode] failed, ret:%d", ret); return FAILED; } GELOGI("dump mode = %s", dump_mode.c_str()); @@ -1010,8 +1008,8 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz } std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid model id %u!", - model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, + "[Get][Model] failed, Invalid model id:%u!", model_id); max_size = davinci_model->TotalMemSize(); return SUCCESS; @@ -1020,8 +1018,8 @@ Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_siz Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &input_desc, vector &output_desc) { std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", - model_id); + GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, + "[Get][Model] failed, Invalid model id %u!", model_id); return davinci_model->GetInputOutputDescInfo(input_desc, output_desc); } @@ -1038,7 +1036,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); + "[Get][Model] Failed, Invalid model id %u!", model_id); return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats, new_model_desc); } @@ -1059,7 +1057,7 @@ Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); + "[Get][Model] failed, Invalid model id %u!", model_id); return davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); } @@ -1074,7 +1072,7 @@ Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector> &batch_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetCombinedDynamicDims Failed, Invalid Model ID %u!", model_id); + "[Get][Model] Failed, Invalid Model ID %u!", model_id); davinci_model->GetCombinedDynamicDims(batch_info); return SUCCESS; @@ -1097,7 +1095,7 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, auto davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) + "[Get][Model] Failed, Invalid Model ID %u!", model_id) davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); return SUCCESS; } @@ -1105,7 +1103,7 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { auto davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetCurShape Failed, Invalid Model ID %u!", model_id); + "[Get][Model] Failed, Invalid Model ID %u!", model_id); davinci_model->GetCurShape(batch_info, dynamic_type); return SUCCESS; } @@ -1134,7 +1132,7 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynami std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetModelAttr Failed, Invalid Model ID %u!", model_id); + "[Get][Model] Failed, Invalid Model ID %u!", model_id); davinci_model->GetModelAttr(dynamic_output_shape_info); return SUCCESS; } @@ -1150,14 +1148,14 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynami Status ModelManager::GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", model_id); + "[Get][Model] failed, invalid model_id is %u.", model_id); return davinci_model->GetAippInfo(index, aipp_info); } Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAIPPInfo failed, invalid model_id is %u.", model_id); + "[Get][Model] failed, invalid model_id is %u.", model_id); return davinci_model->GetAippType(index, type, aipp_index); } @@ -1173,7 +1171,7 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { mmTimeval tv; if (mmGetTimeOfDay(&tv, nullptr) != 0) { REPORT_CALL_ERROR("E19999", "Call mmGetTimeOfDay fail. errmsg:%s", strerror(errno)); - GELOGE(INTERNAL_ERROR, "Failed to get current time."); + GELOGE(INTERNAL_ERROR, "[Call][MmGetTimeOfDay] fail. errmsg:%s", strerror(errno)); return INTERNAL_ERROR; } uint64_t timestamp = static_cast(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us @@ -1192,7 +1190,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, "Input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); + ACL_ERROR_GE_PARAM_INVALID, + "[Check][Param] Input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); GenModelId(&model_id); mmTimespec timespec = mmGetTickCount(); @@ -1200,14 +1199,14 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model ModelHelper model_helper; Status ret = model_helper.LoadRootModel(model); if (ret != SUCCESS) { - GELOGE(ret, "load model failed."); + GELOGE(ret, "[Load][RootModel] failed, ret:%d, model_id:%u.", ret, model_id); return ret; } if (model_helper.GetModelType()) { bool is_shape_unknown = false; GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), - "CheckIsUnknownShape failed, model id:%u", model_id); + "[Check][IsUnknownShape] failed, model id:%u", model_id); if (is_shape_unknown || GetContext().GetHostExecFlag()) { return DoLoadHybridModelOnline(model_id, model.om_name, model_helper.GetGeRootModel(), listener); } @@ -1218,7 +1217,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model shared_ptr davinci_model = MakeShared(model.priority, listener); if (davinci_model == nullptr) { REPORT_CALL_ERROR("E19999", "New DavinciModel fail"); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[New][DavinciModel] fail"); return ACL_ERROR_GE_MEMORY_ALLOCATION; } davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano + @@ -1234,7 +1233,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model rtError_t rt_ret = rtGetDevice(&device_id); if (rt_ret != RT_ERROR_NONE || device_id < 0) { REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret = 0x%X", rt_ret); - GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); + GELOGE(rt_ret, "[Call][RtGetDevice] failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); return RT_ERROR_TO_GE_STATUS(rt_ret); } davinci_model->SetDeviceId(device_id); @@ -1250,12 +1249,13 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model /// Update session_id for infer in load model to avoid the same session_id. uint64_t new_session_id; ret = GenSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for inference failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "[Generate][SessionId] for inference failed, ret:%d.", ret); ret = davinci_model->UpdateSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for inference failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, + "[Update][SessionId] for inference failed, session id:%lu.", new_session_id); ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "DavinciInit failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "[Init][DavinciModel] failed, ret:%d.", ret); InsertModel(model_id, davinci_model); @@ -1280,26 +1280,27 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d const std::vector &input_queue_ids, const std::vector &output_queue_ids) { GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || mmAccess2(model_data.key.c_str(), M_F_OK) == EN_OK, - ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is not valid, %s", + ACL_ERROR_GE_PARAM_INVALID, + "[Check][Param] input key file path %s is not valid, %s", model_data.key.c_str(), strerror(errno)); ModelHelper model_helper; Status ret = model_helper.LoadModel(model_data); if (ret != SUCCESS) { - GELOGE(ret, "load model failed."); + GELOGE(ret, "[Load][Model] failed."); return ret; } shared_ptr davinci_model = MakeShared(model_data.priority, nullptr); if (davinci_model == nullptr) { REPORT_CALL_ERROR("E19999", "New DavinciModel fail"); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create model failed."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][Model] failed."); return ACL_ERROR_GE_MEMORY_ALLOCATION; } ret = davinci_model->Assign(model_helper.GetGeModel()); if (ret != SUCCESS) { - GELOGE(ret, "assign model failed."); + GELOGE(ret, "[Assign][Model] failed, ret:%d.", ret); return ret; } @@ -1308,15 +1309,17 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d /// Update session_id for infer in load model to avoid the same session_id. uint64_t new_session_id; ret = GenSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, + "[Generate][SessionId] for infer failed, ret:%d.", ret); ret = davinci_model->UpdateSessionId(new_session_id); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, + "[Update][SessionId] for infer failed, SessionId:%lu.", new_session_id); GenModelId(&model_id); davinci_model->SetId(model_id); ret = davinci_model->SetQueIds(input_queue_ids, output_queue_ids); if (ret != SUCCESS) { - GELOGE(ret, "set model queue ids failed."); + GELOGE(ret, "[Set][Ids] for model queue failed, ret:%d, model_id:%u.", ret, model_id); return ret; } @@ -1324,7 +1327,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d ret = davinci_model->Init(); if (ret != SUCCESS) { - GELOGE(ret, "init model failed."); + GELOGE(ret, "[Init][Model] failed, ret:%d, model_id:%u.", ret, model_id); return ret; } @@ -1362,7 +1365,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "Invalid model id %u, check whether model has been loaded or not.", model_id); + "[Get][Model] Invalid model id %u, check whether model has been loaded or not.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); @@ -1413,9 +1416,8 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ rtContext_t rt_cur_ctx = nullptr; auto rt_error = rtCtxGetCurrent(&rt_cur_ctx); if (rt_error != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, ret = 0x%X", - rt_error); - GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_error)); + REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, ret = 0x%X", rt_error); + GELOGE(RT_FAILED, "[Call][RtCtxGetCurrent] failed, runtime result is %d", static_cast(rt_error)); return RT_FAILED; } @@ -1450,9 +1452,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { rtContext_t rt_cur_ctx = nullptr; auto rt_error = rtCtxGetCurrent(&rt_cur_ctx); if (rt_error != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, ret = 0x%X", - rt_error); - GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_error)); + REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, ret = 0x%X", rt_error); + GELOGE(RT_FAILED, "[Call][RtCtxGetCurrent] failed, runtime result is %d", static_cast(rt_error)); return RT_FAILED; } uintptr_t resource_id = reinterpret_cast(rt_cur_ctx); @@ -1477,17 +1478,15 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret = 0x%X", - aicpu_data_length, status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret = 0x%X", aicpu_data_length, status); + GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%u, ret = 0x%X", aicpu_data_length, status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_aicpu_data); status = rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", - so_name.size(), status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", so_name.size(), status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%zu, ret = 0x%X", so_name.size(), status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_so_name); @@ -1509,9 +1508,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { uint32_t args_size = sizeof(CustAicpuSoBuf) * v_cust_so.size(); status = rtMalloc(&args, args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X", - args_size, status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X", args_size, status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%u, ret = 0x%X", args_size, status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); @@ -1525,9 +1523,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { uint32_t batch_args_size = sizeof(BatchLoadOpFromBufArgs); status = rtMalloc(&batch_args, batch_args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X", - batch_args_size, status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X", batch_args_size, status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%u, ret = 0x%X", batch_args_size, status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(batch_args); @@ -1539,9 +1536,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize fail, ret = 0x%X", - status); - GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize fail, ret = 0x%X", status); + GELOGE(RT_FAILED, "[Call][RtStreamSynchronize] fail, ret = 0x%X", status); return RT_ERROR_TO_GE_STATUS(status); } std::function callback = [&]() { @@ -1556,12 +1552,14 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { } Status ModelManager::ClearAicpuSo() { - GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kDeleteCustOp), "delete cust op so failed."); + GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kDeleteCustOp), + "[Call][LaunchKernelCustAicpuSo] delete cust op so failed."); return SUCCESS; } Status ModelManager::LaunchCustAicpuSo() { - GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kBatchLoadBuf), "launch cust op so failed."); + GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kBatchLoadBuf), + "[Call][LaunchKernelCustAicpuSo] launch cust op so failed."); return SUCCESS; } @@ -1577,21 +1575,21 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me uint8_t *model_data = nullptr; uint32_t model_len = 0; Status ret = ModelParserBase::ParseModelContent(model, model_data, model_len); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "[Parse][ModelContent] failed!"); OmFileLoadHelper om_file_helper; ret = om_file_helper.Init(model_data, model_len); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "om file helperInit failed!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "[Init][OmFileHelper] failed, ret:%d", ret); auto partition_table = reinterpret_cast(model_data); if (partition_table->num == 1) { REPORT_INNER_ERROR("E19999", "partition_table num in model_data is 1, check invalid"); - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "om model is error,please use executable om model"); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Param] om model is error, please use executable om model"); return ACL_ERROR_GE_PARAM_INVALID; } ModelPartition task_partition; if (om_file_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { - GELOGE(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "get task model partition failed."); + GELOGE(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "[Get][ModelPartition] failed."); return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; } @@ -1601,7 +1599,7 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me } if (task_partition.size != 0) { if (!ReadProtoFromArray(task_partition.data, static_cast(task_partition.size), model_task_def.get())) { - GELOGE(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "ReadProtoFromArray failed."); + GELOGE(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "[Read][Proto] From Array failed."); return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; } } @@ -1609,7 +1607,7 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me ModelPartition partition_weight; ret = om_file_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition_weight); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, - "Get weight partition failed. ret = %u", ret); + "[Get][ModelPartition] failed. ret = %u", ret); mem_size = model_task_def->memory_size(); weight_size = partition_weight.size; @@ -1627,8 +1625,7 @@ void ModelManager::GenModelId(uint32_t *id) { Status ModelManager::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetOrigInputInfo failed, invalid model_id is %u.", - model_id); + "[Get][Model] failed, invalid model_id is %u.", model_id); return davinci_model->GetOrigInputInfo(index, orig_input_info); } @@ -1638,7 +1635,7 @@ Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index std::vector &output_dims) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetAllAippInputOutputDims failed, invalid model_id is %u.", model_id); + "[Get][Model] failed, invalid model_id is %u.", model_id); return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims); } @@ -1653,7 +1650,7 @@ ge::Status ModelManager::SyncExecuteModel(uint32_t model_id, const vector &options dump_exception_flag_ = true; rtError_t rt_ret = rtSetTaskFailCallback(reinterpret_cast(ExceptionCallback)); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtSetTaskFailCallback fail, ret = 0x%X", - rt_ret); - GELOGE(RT_FAILED, "rtSetTaskFailCallback failed"); + REPORT_CALL_ERROR("E19999", "Call rtSetTaskFailCallback fail, ret = 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtSetTaskFailCallback] fail, ret = 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } } else { @@ -1743,9 +1739,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op // malloc sysOpInfoList in SysOpCheckInfo status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", - op_nums * sizeof(SysOpInfo), status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_req_op_list); @@ -1753,9 +1748,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op // malloc sysOpInfoList in SysOpCheckResp status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", - op_nums * sizeof(SysOpInfo), status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_res_op_list); @@ -1763,9 +1757,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op // malloc returnCodeList in SysOpCheckResp status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", - op_nums * sizeof(ReturnCode), status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", op_nums * sizeof(ReturnCode), status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%zu, ret = 0x%X", op_nums * sizeof(ReturnCode), status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_ret_code_list); @@ -1776,9 +1769,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op void *d_op_type_name = nullptr; status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X", - op_type.length(), status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X", op_type.length(), status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%lu, ret = 0x%X", op_type.length(), status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1795,9 +1787,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op void *d_op_type_name = nullptr; status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X", - op_type.length(), status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X", op_type.length(), status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%lu, ret = 0x%X", op_type.size(), status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1825,9 +1816,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp); status = rtMalloc(&args, args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X", - args_size, status); - GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X", args_size, status); + GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%u, ret = 0x%X", args_size, status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); @@ -1842,9 +1832,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize fail, ret = 0x%X", - status); - GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); + REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize fail, ret = 0x%X", status); + GELOGE(RT_FAILED, "[Call][RtStreamSynchronize] failed, ret:0x%X", status); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(status); } @@ -1879,7 +1868,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op REPORT_INNER_ERROR("E19999", "res_ret_code_list.size:%zu res_aicpu_op_info_list.size:%zu res_op_nums:%lu " "not equal, check invalid", res_ret_code_list.size(), res_aicpu_op_info_list.size(), res_op_nums); - GELOGE(FAILED, "Number of retcode is not equal to number of op type."); + GELOGE(FAILED, "[Check][Param] Number:%zu of retcode is not equal to number:%zu of op type or not equal %lu.", + res_ret_code_list.size(), res_aicpu_op_info_list.size(), res_op_nums); GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } @@ -1902,9 +1892,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op "<0: op_type, 1: format, 2: datatype> \n"; } fail_reason += "not support."; - REPORT_INNER_ERROR("E19999", "Check aicpu op_type failed, details:%s", - fail_reason.c_str()); - GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); + REPORT_INNER_ERROR("E19999", "Check aicpu op_type failed, details:%s", fail_reason.c_str()); + GELOGE(FAILED, "[Check][Param] Check aicpu op_type failed. details:%s", fail_reason.c_str()); GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } @@ -1924,7 +1913,7 @@ Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) { return SUCCESS; } GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list), - "Launch check aicpu op type failed."); + "[Call][LaunchKernelCheckAicpuOp] failed."); return SUCCESS; } diff --git a/ge/graph/load/model_manager/model_manager.h b/ge/graph/load/model_manager/model_manager.h index c0f14934..efba54ec 100755 --- a/ge/graph/load/model_manager/model_manager.h +++ b/ge/graph/load/model_manager/model_manager.h @@ -310,7 +310,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { std::lock_guard lock(exeception_infos_mutex_); auto instance = ModelManager::GetInstance(); if (instance == nullptr) { - GELOGE(FAILED, "Instance is nullptr"); + GELOGE(FAILED, "[Get][Instance] failed, as ret is nullptr"); return; } instance->AddExceptionInfo(*rt_exception_info); diff --git a/ge/graph/load/model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc index f872791e..f1748b35 100755 --- a/ge/graph/load/model_manager/model_utils.cc +++ b/ge/graph/load/model_manager/model_utils.cc @@ -21,14 +21,15 @@ #include "graph/utils/tensor_utils.h" #include "graph/manager/graph_var_manager.h" #include "graph/types.h" +#include "graph/build/memory/block_mem_assigner.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ do { \ if (SIZE <= static_cast(OFFSET)) { \ - REPORT_INNER_ERROR("E19999", \ - "Node:%s(%s) offset:%ld out of range size:%lu, check invalid", \ + REPORT_INNER_ERROR("E19999", "Node:%s(%s) offset:%ld out of range size:%lu, check invalid", \ OP->GetName().c_str(), OP->GetType().c_str(), OFFSET, SIZE); \ - GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", OP->GetName().c_str(), SIZE, OFFSET); \ + GELOGE(OUT_OF_MEMORY, "[Check][Param]Node: %s, memory out of range[%lu: %ld]", \ + OP->GetName().c_str(), SIZE, OFFSET); \ return {}; \ } \ } while (0) @@ -311,8 +312,9 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co REPORT_INNER_ERROR("E19999", "Attr:%s, memory_type.size:%zu != input_desc.size:%zu, op:%s(%s), check invalid", ATTR_NAME_INPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), inputs_size, op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, "Fusion: check input size failed, op: %s, input v_memory_type size: %zu input numbers: %zu", - op_desc->GetName().c_str(), v_memory_type.size(), inputs_size); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s, memory_type.size:%zu != input_desc.size:%zu, op:%s(%s)", + ATTR_NAME_INPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), inputs_size, + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return v_input_data_addr; } for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { @@ -394,8 +396,7 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc case RT_MEMORY_RDMA_HBM: if (offset < 0) { REPORT_INNER_ERROR("E19999", "Param offset:%ld < 0, check invalid", offset); - GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", - reinterpret_cast(static_cast(offset))); + GELOGE(PARAM_INVALID, "[Check][Param] Param offset:%ld cannot be negative", offset); return PARAM_INVALID; } var_addr = reinterpret_cast(static_cast(offset)); @@ -405,9 +406,9 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc var_addr = model_param.var_base + offset - model_param.logic_var_base; break; default: - REPORT_INNER_ERROR("E19999", "Get mem_type:%d for offset:%ld is unsupported, check invalid", - mem_type, offset); - GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); + REPORT_INNER_ERROR("E19999", "Get mem_type:%d for offset:%ld is unsupported, check invalid", mem_type, offset); + GELOGE(PARAM_INVALID, "[Check][Param] Get mem_type:%d for offset:%ld is unsupported, check invalid", + mem_type, offset); return PARAM_INVALID; } GE_CHECK_NOTNULL(var_addr); @@ -435,9 +436,9 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C REPORT_INNER_ERROR("E19999", "Attr:%s, memory_type.size:%zu != output_desc.size:%zu, op:%s(%s), check invalid", ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), outputs_size, op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, - "Fusion: check output size failed, op: %s, output v_memory_type size: %lu output numbers: %zu", - op_desc->GetName().c_str(), v_memory_type.size(), outputs_size); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s, memory_type.size:%zu != output_desc.size:%zu, op:%s(%s)", + ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), outputs_size, + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return v_output_data_addr; } for (size_t i = 0; i < outputs_size; ++i) { @@ -520,10 +521,16 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); bool has_mem_type_workspace = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type); + + vector workspace_no_reuse_scope; + bool has_workspace_no_reuse_scope = + ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { // Temporary solution, the aicpu workspace of multiple images cannot be shared. - if (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] && - !model_param.is_single_op) { + bool aicpu_work_space = (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] && + !model_param.is_single_op); + if (aicpu_work_space) { void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]); v_workspace_data_addr.push_back(mem_addr); GELOGI( @@ -554,7 +561,13 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); - uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i]; + uint8_t *mem_addr = nullptr; + bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size()); + if (session_scope_memory) { + mem_addr = model_param.memory_infos.at(kSessionScopeMemory | RT_MEMORY_HBM).memory_base + v_workspace_offset[i]; + } else { + mem_addr = model_param.mem_base + v_workspace_offset[i]; + } v_workspace_data_addr.push_back(mem_addr); GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], @@ -587,7 +600,7 @@ Status ModelUtils::GetRtAddress(const RuntimeParam ¶m, uintptr_t logic_addr, } else if (logic_addr != 0) { mem_addr = nullptr; REPORT_INNER_ERROR("E19999", "Check param logic addr:0x%lx abnormal", logic_addr); - GELOGE(PARAM_INVALID, "The logic addr:0x%lx is abnormal", logic_addr); + GELOGE(PARAM_INVALID, "[Check][Param] The logic addr:0x%lx is abnormal", logic_addr); return PARAM_INVALID; } diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index 7c26d23d..356919f6 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -195,7 +195,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return RT_ERROR_TO_GE_STATUS(rt_ret);) SetIoAddrs(op_desc); - InitDumpTask(input_output_addr, op_desc); + InitDumpFlag(op_desc); + InitDumpArgs(input_output_addr, op_desc); GELOGI("KernelExTaskInfo knonw node Init Success."); return SUCCESS; } @@ -237,7 +238,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, ret:0x%X, size:%lu", rt_ret, addrs_size); return RT_ERROR_TO_GE_STATUS(rt_ret);) - InitDumpTask(input_output_addr_, op_desc); + InitDumpFlag(op_desc); + InitDumpArgs(input_output_addr_, op_desc); } uint64_t input_output_addr = static_cast(reinterpret_cast(input_output_addr_)); @@ -269,10 +271,16 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin return SUCCESS; } -void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { - if (davinci_model_->OpNeedDump(op_desc->GetName()) || davinci_model_->GetOpDugReg()) { - GELOGD("Op %s need dump in kernel ex task info", op_desc->GetName().c_str()); +void KernelExTaskInfo::InitDumpFlag(const OpDescPtr &op_desc) { + if (davinci_model_->OpNeedDump(op_desc->GetName())) { + GELOGD("Op %s need init dump flag in kernel ex task info", op_desc->GetName().c_str()); dump_flag_ = RT_KERNEL_DUMPFLAG; + } +} + +void KernelExTaskInfo::InitDumpArgs(void *addr, const OpDescPtr &op_desc) { + if (davinci_model_->OpNeedDump(op_desc->GetName())) { + GELOGD("Op %s need dump in kernel ex task info", op_desc->GetName().c_str()); dump_args_ = addr; } if (davinci_model_->GetOpDugReg()) { diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h index bcc17168..1b77b715 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.h @@ -61,7 +61,8 @@ class KernelExTaskInfo : public TaskInfo { Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc); void SetIoAddrs(const OpDescPtr &op_desc); - void InitDumpTask(void *addr, const OpDescPtr &op_desc); + void InitDumpFlag(const OpDescPtr &op_desc); + void InitDumpArgs(void *addr, const OpDescPtr &op_desc); Status InitTaskExtInfo(const std::string &ext_info, const OpDescPtr &op_desc); uint32_t task_id_; diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_task_info.cc index c72bfeef..d69d0a8b 100755 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.cc @@ -129,6 +129,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci ctx_.opIndex2[i] = context.origin_op_index(i); } ctx_.opCount = context.origin_op_index_size(); + InitDumpFlag(); if (kernel_type_ == ccKernelType::TE) { ctx_.opIndex = context.op_index(); uint16_t *args_offset_tmp = reinterpret_cast(const_cast(context.args_offset().data())); @@ -660,7 +661,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne if (davinci_model_->IsKnownNode()) { args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_) : davinci_model_->GetCurrentArgsAddr(args_offset_); - InitDumpTask(offset); + InitDumpArgs(offset); return SUCCESS; } @@ -726,7 +727,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne return FAILED; } skt_dump_args_ = static_cast(args_) + offset; - InitDumpTask(offset); + InitDumpArgs(offset); vector virtual_io_addrs; // use virtual address for zero copy key. virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); @@ -1022,7 +1023,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k if (davinci_model_->IsKnownNode()) { args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); - InitDumpTask(sizeof(aicpu::AicpuParamHead)); + InitDumpArgs(sizeof(aicpu::AicpuParamHead)); return SUCCESS; } const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); @@ -1063,7 +1064,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } - InitDumpTask(sizeof(aicpu::AicpuParamHead)); + InitDumpArgs(sizeof(aicpu::AicpuParamHead)); if (kernel_type_ == ccKernelType::CUST_AI_CPU) { dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; @@ -1074,14 +1075,20 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k return SUCCESS; } -void KernelTaskInfo::InitDumpTask(uint32_t offset) { +void KernelTaskInfo::InitDumpFlag() { if (davinci_model_->OpNeedDump(op_desc_->GetName())) { - GELOGD("Op %s need dump in task info", op_desc_->GetName().c_str()); + GELOGD("Op %s init dump flag", op_desc_->GetName().c_str()); if (IsL1FusionOp(op_desc_)) { dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG; } else { dump_flag_ = RT_KERNEL_DUMPFLAG; } + } +} + +void KernelTaskInfo::InitDumpArgs(uint32_t offset) { + if (davinci_model_->OpNeedDump(op_desc_->GetName())) { + GELOGD("Op %s need dump in task info", op_desc_->GetName().c_str()); dump_args_ = static_cast(args_) + offset; } if (davinci_model_->GetOpDugReg()) { diff --git a/ge/graph/load/model_manager/task_info/kernel_task_info.h b/ge/graph/load/model_manager/task_info/kernel_task_info.h index 79347255..d9dd30bb 100644 --- a/ge/graph/load/model_manager/task_info/kernel_task_info.h +++ b/ge/graph/load/model_manager/task_info/kernel_task_info.h @@ -128,7 +128,8 @@ class KernelTaskInfo : public TaskInfo { Status SuperKernelDistribute(); bool IsL1FusionOp(const OpDescPtr &op_desc); void SetIoAddrs(const OpDescPtr &op_desc); - void InitDumpTask(uint32_t offset); + void InitDumpFlag(); + void InitDumpArgs(uint32_t offset); void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model); void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model); Status CopyNoncontinuousArgs(uint16_t offset); diff --git a/ge/graph/load/model_manager/task_info/task_info.cc b/ge/graph/load/model_manager/task_info/task_info.cc index fad18d37..9f82efad 100755 --- a/ge/graph/load/model_manager/task_info/task_info.cc +++ b/ge/graph/load/model_manager/task_info/task_info.cc @@ -26,7 +26,7 @@ Status TaskInfo::SetStream(uint32_t stream_id, const std::vector &st stream_ = stream_list[stream_id]; } else { REPORT_INNER_ERROR("E19999", "stream_id:%u >= stream_list.size(): %zu, check invalid", - stream_id, stream_list.size()); + stream_id, stream_list.size()); GELOGE(FAILED, "[Check][Param] index:%u >= stream_list.size():%zu.", stream_id, stream_list.size()); return FAILED; } diff --git a/ge/graph/load/model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h index 99ec3c4e..5657f003 100644 --- a/ge/graph/load/model_manager/task_info/task_info.h +++ b/ge/graph/load/model_manager/task_info/task_info.h @@ -18,6 +18,7 @@ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_H_ #include +#include #include "cce/customize.h" #include "framework/common/taskdown_common.h" @@ -28,9 +29,11 @@ namespace ge { struct MemInfo { - uint64_t memory_size = 0; + size_t memory_size = 0; uint64_t logic_memory_base = 0; uint8_t *memory_base = nullptr; + uint32_t memory_type = RT_MEMORY_HBM; + std::string memory_key = ""; }; struct RuntimeParam { @@ -40,6 +43,19 @@ struct RuntimeParam { } ~RuntimeParam() = default; + std::string ToString() { + std::stringstream ss; + ss << "session_id:" << session_id << ", stream_num:" << stream_num << ", event_num:" << event_num + << ", label_num:" << label_num << ", logic_mem_base:" << logic_mem_base + << ", logic_weight_base:" << logic_weight_base << ", logic_var_base:" << logic_var_base + << ", memory_size:" << mem_size << ", weight_size:" << weight_size << ", var_size:" << var_size + << ", ex_memory_info:"; + for (auto it : memory_infos) { + ss << "[memory_type:" << it.first << ", memory_size:" << it.second.memory_size << "]"; + } + return ss.str(); + } + uint64_t mem_size = 0; uint64_t logic_mem_base = 0; uint8_t *mem_base = nullptr; @@ -49,7 +65,7 @@ struct RuntimeParam { uint64_t var_size = 0; uint64_t logic_var_base = 0; uint8_t *var_base = nullptr; - std::map memory_infos; + std::map memory_infos; uint32_t batch_num = 0; uint32_t stream_num = 0; uint32_t event_num = 0; diff --git a/ge/graph/load/model_manager/tbe_handle_store.cc b/ge/graph/load/model_manager/tbe_handle_store.cc index 6efb6190..36207aa2 100755 --- a/ge/graph/load/model_manager/tbe_handle_store.cc +++ b/ge/graph/load/model_manager/tbe_handle_store.cc @@ -24,7 +24,7 @@ namespace ge { void TbeHandleInfo::used_inc(uint32_t num) { if (used_ > std::numeric_limits::max() - num) { REPORT_INNER_ERROR("E19999", "Used:%u reach numeric max", used_); - GELOGE(INTERNAL_ERROR, "Used[%u] reach numeric max.", used_); + GELOGE(INTERNAL_ERROR, "[Check][Param] Used[%u] reach numeric max.", used_); return; } @@ -34,7 +34,7 @@ void TbeHandleInfo::used_inc(uint32_t num) { void TbeHandleInfo::used_dec(uint32_t num) { if (used_ < std::numeric_limits::min() + num) { REPORT_INNER_ERROR("E19999", "Used:%u reach numeric min", used_); - GELOGE(INTERNAL_ERROR, "Used[%u] reach numeric min.", used_); + GELOGE(INTERNAL_ERROR, "[Check][Param] Used[%u] reach numeric min.", used_); return; } @@ -107,9 +107,8 @@ void TBEHandleStore::ReferTBEHandle(const std::string &name) { std::lock_guard lock(mutex_); auto it = kernels_.find(name); if (it == kernels_.end()) { - REPORT_INNER_ERROR("E19999", "Kernel:%s not found in stored check invalid", - name.c_str()); - GELOGE(INTERNAL_ERROR, "Kernel[%s] not found in stored.", name.c_str()); + REPORT_INNER_ERROR("E19999", "Kernel:%s not found in stored check invalid", name.c_str()); + GELOGE(INTERNAL_ERROR, "[Check][Param] Kernel[%s] not found in stored.", name.c_str()); return; } @@ -128,9 +127,8 @@ void TBEHandleStore::EraseTBEHandle(const std::map &names for (auto &item : names) { auto it = kernels_.find(item.first); if (it == kernels_.end()) { - REPORT_INNER_ERROR("E19999", "Kernel:%s not found in stored check invalid", - item.first.c_str()); - GELOGE(INTERNAL_ERROR, "Kernel[%s] not found in stored.", item.first.c_str()); + REPORT_INNER_ERROR("E19999", "Kernel:%s not found in stored check invalid", item.first.c_str()); + GELOGE(INTERNAL_ERROR, "[Check][Param] Kernel[%s] not found in stored.", item.first.c_str()); continue; } @@ -142,7 +140,8 @@ void TBEHandleStore::EraseTBEHandle(const std::map &names if (rt_ret != RT_ERROR_NONE) { REPORT_INNER_ERROR("E19999", "Call rtDevBinaryUnRegister failed for Kernel:%s fail, ret:0x%X", item.first.c_str(), rt_ret); - GELOGE(INTERNAL_ERROR, "Kernel[%s] UnRegister handle fail:%u.", item.first.c_str(), rt_ret); + GELOGE(INTERNAL_ERROR, "[Call][RtDevBinaryUnRegister] Kernel[%s] UnRegister handle fail:%u.", + item.first.c_str(), rt_ret); } kernels_.erase(it); } diff --git a/ge/graph/load/model_manager/ts_mem_mall.h b/ge/graph/load/model_manager/ts_mem_mall.h index 74ce5a16..986b3101 100644 --- a/ge/graph/load/model_manager/ts_mem_mall.h +++ b/ge/graph/load/model_manager/ts_mem_mall.h @@ -43,7 +43,7 @@ class TsMemMall { for (auto it : mem_store_size_) { rtError_t ret = rtFree(it.second); if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret); + GELOGE(RT_FAILED, "[Call][RtFree] failed, ret:0x%X", ret); } } mem_store_size_.clear(); @@ -52,7 +52,7 @@ class TsMemMall { void *Acquire(int64_t offset, uint64_t size) { if (size == 0) { - GELOGE(RT_FAILED, "Acquire mem block failed, size: %lu", size); + GELOGE(RT_FAILED, "[Check][Param] Acquire mem block failed, size:%lu", size); return nullptr; } @@ -71,7 +71,7 @@ class TsMemMall { void *addr = nullptr; rtError_t rt_ret = rtMalloc(&addr, bytes, mem_type_); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%lu, ret:0x%X", bytes, rt_ret); return nullptr; } @@ -94,7 +94,7 @@ class TsMemMall { mem_store_addr_.erase(it); rtError_t ret = rtFree(addr); if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret); + GELOGE(RT_FAILED, "[Call][RtFree] failed, ret:0x%X", ret); } } diff --git a/ge/graph/load/model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc index 9d6f4e4f..4a57a899 100644 --- a/ge/graph/load/model_manager/zero_copy_offset.cc +++ b/ge/graph/load/model_manager/zero_copy_offset.cc @@ -38,8 +38,13 @@ Status ZeroCopyOffset::InitInputDataInfo(int64_t output_size, void *virtual_addr op_name_ = op_desc->GetName(); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); - GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, - "basic_offset_size should be equal to relative_offset_size"); + GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), + REPORT_INNER_ERROR("E19999", "basic_offset_size:%zu not equal to relative_offset_size:%zu, " + "check invalid", zero_copy_basic_offset_.size(), + zero_copy_relative_offset_.size()); + return PARAM_INVALID, + "[Check][Param] basic_offset_size:%zu should be equal to relative_offset_size:%zu", + zero_copy_basic_offset_.size(), zero_copy_relative_offset_.size()); GELOGD("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size()); int64_t virtual_addr_offset = op_desc->GetOutputOffset().at(kDataIndex); @@ -78,7 +83,8 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector &input_size_list if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS) { REPORT_INNER_ERROR("E19999", "Get input TensorSize in op:%s(%s) failed, input_index:%zu", op_desc->GetName().c_str(), op_desc->GetType().c_str(), idx); - GELOGE(FAILED, "GetTensorSizeInBytes failed!"); + GELOGE(FAILED, "[Get][InputTensorSize] in op:%s(%s) failed, input_index:%zu", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), idx); return FAILED; } @@ -88,8 +94,13 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector &input_size_list op_name_ = op_desc->GetName(); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); - GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, - "basic_offset_size should be equal to relative_offset_size"); + GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), + REPORT_INNER_ERROR("E19999", "basic_offset_size:%zu not equal to relative_offset_size:%zu, " + "check invalid", + zero_copy_basic_offset_.size(), zero_copy_relative_offset_.size()); + return PARAM_INVALID, + "[Check][Param] basic_offset_size:%zu should be equal to relative_offset_size:%zu", + zero_copy_basic_offset_.size(), zero_copy_relative_offset_.size()); int64_t virtual_addr_offset = op_desc->GetInputOffset().at(idx); IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); @@ -194,7 +205,8 @@ void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *ou for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { auto args_addrs = outside_addrs_[out_count].find(outside_addr); if (args_addrs != outside_addrs_[out_count].end()) { - GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); + GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), + "[Set][TaskArgsOffset] failed, Input args invalid, offset:%zu.", offset); void *args_val = static_cast(args) + offset; args_addrs->second.push_back(args_val); GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, diff --git a/ge/graph/load/model_manager/zero_copy_task.cc b/ge/graph/load/model_manager/zero_copy_task.cc index c96dd8b7..4957f8ea 100755 --- a/ge/graph/load/model_manager/zero_copy_task.cc +++ b/ge/graph/load/model_manager/zero_copy_task.cc @@ -36,9 +36,9 @@ ZeroCopyTask::~ZeroCopyTask() { args_addr_ = nullptr; } */ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { if (offset + sizeof(uintptr_t) > args_size_) { - REPORT_INNER_ERROR("E19999", "Param offset:%zu + 8 > args_size_:%zu, check invalid", - offset, args_size_); - GELOGE(FAILED, "[ZCPY] %s set task args failed, args size: %zu, offset: %zu", name_.c_str(), args_size_, offset); + REPORT_INNER_ERROR("E19999", "Param offset:%zu + 8 > args_size_:%zu, check invalid", offset, args_size_); + GELOGE(FAILED, "[Check][Param] [ZCPY] %s set task args failed, args size:%zu, offset:%zu", + name_.c_str(), args_size_, offset); return FAILED; // unexpected error, need fix. } @@ -118,9 +118,8 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { } if (rt_err != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync or rtMemcpy failed, size:%zu, ret: 0x%X", - args_size_, rt_err); - GELOGE(RT_FAILED, "[ZCPY] %s distribute task param failed, error=0x%x", name_.c_str(), rt_err); + REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync or rtMemcpy failed, size:%zu, ret:0x%X", args_size_, rt_err); + GELOGE(RT_FAILED, "[Distribute][TaskParam] for %s failed, error = 0x%x", name_.c_str(), rt_err); return RT_ERROR_TO_GE_STATUS(rt_err); } diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index 8c8df326..82bfbda9 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -21,7 +21,7 @@ #include #include "framework/common/debug/ge_log.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, @@ -112,12 +112,12 @@ Status CachingAllocator::Initialize(uint32_t device_id) { auto bin_ptr = new (std::nothrow) BlockBin(BlockComparator); if (bin_ptr == nullptr) { REPORT_CALL_ERROR("E19999", "New BlockBin fail, device_id:%u", device_id); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc BlockBin failed."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][BlockBin] failed, device_id:%u", device_id); return ACL_ERROR_GE_MEMORY_ALLOCATION; } free_block_bins_[i] = bin_ptr; } - memory_allocator_ = MemManager::Instance(memory_type_); + memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); if (memory_allocator_ == nullptr) { return ACL_ERROR_GE_INTERNAL_ERROR; } @@ -137,6 +137,7 @@ uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device uint8_t *ptr = nullptr; Block *block = FindFreeBlock(size, org_ptr, device_id); if (block == nullptr) { + std::lock_guard lock(mutex_); if (ge::SUCCESS == TryExtendCache(size, device_id)) { block = FindFreeBlock(size, org_ptr, device_id); if (block != nullptr) { @@ -147,9 +148,8 @@ uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device ptr = block->ptr; } if (ptr == nullptr) { - REPORT_INNER_ERROR("E19999", "FindFreeBlock fail, size:%zu, device_id:%u", - size, device_id); - GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size); + REPORT_INNER_ERROR("E19999", "FindFreeBlock fail, size:%zu, device_id:%u", size, device_id); + GELOGE(FAILED, "[Check][Param] FindFreeBlock failed device id = %u, size= %zu", device_id, size); } return ptr; } @@ -157,18 +157,16 @@ uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) { GELOGI("Free device id = %u", device_id); if (ptr == nullptr) { - REPORT_INNER_ERROR("E19999", "Param ptr is nullptr, device_id:%u, check invalid", - device_id); - GELOGE(PARAM_INVALID, "Invalid memory pointer"); + REPORT_INNER_ERROR("E19999", "Param ptr is nullptr, device_id:%u, check invalid", device_id); + GELOGE(PARAM_INVALID, "[Check][Param] Invalid memory pointer, device_id:%u", device_id); return ge::PARAM_INVALID; } std::lock_guard lock(mutex_); auto it = allocated_blocks_.find(ptr); if (it == allocated_blocks_.end()) { - REPORT_INNER_ERROR("E19999", "Param ptr not allocated before, device_id:%u, check invalid", - device_id); - GELOGE(PARAM_INVALID, "Invalid memory pointer: %p", ptr); + REPORT_INNER_ERROR("E19999", "Param ptr not allocated before, device_id:%u, check invalid", device_id); + GELOGE(PARAM_INVALID, "[Check][Param] Param ptr not allocated before, device_id:%u", device_id); return ge::PARAM_INVALID; } Block *block = it->second; @@ -225,9 +223,8 @@ Block *CachingAllocator::FindFreeBlock(size_t size, uint8_t *org_ptr, uint32_t d Block key(device_id, size, org_ptr); BlockBin *bin = GetBlockBin(size); if (bin == nullptr) { - REPORT_INNER_ERROR("E19999", "GetBlockBin fail, size:%zu, device_id:%u", - size, device_id); - GELOGE(ge::FAILED, "Get block bin failed size = %zu", size); + REPORT_INNER_ERROR("E19999", "GetBlockBin fail, size:%zu, device_id:%u", size, device_id); + GELOGE(ge::FAILED, "[Get][BlockBin] failed, size:%zu, device_id:%u", size, device_id); return nullptr; } std::lock_guard lock(mutex_); @@ -258,9 +255,8 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui Block *remaining = block; Block *new_block = new (std::nothrow) Block(device_id, size, &bin, block->ptr); if (new_block == nullptr) { - REPORT_CALL_ERROR("E19999", "New Block fail, size:%zu, device_id:%u", - size, device_id); - GELOGE(ge::FAILED, "Alloc block failed size = %zu", size); + REPORT_CALL_ERROR("E19999", "New Block fail, size:%zu, device_id:%u", size, device_id); + GELOGE(ge::FAILED, "[Alloc][Block] failed, size:%zu, device_id:%u", size, device_id); return block; } new_block->prev = remaining->prev; @@ -285,7 +281,7 @@ Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { size_t free_cached_memory_size = FreeCachedBlocks(); memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); if (memory_addr == nullptr) { - GELOGE(ge::FAILED, "TryExtendCache failed, no enough memory for size = %zu, device_id = %u", memory_size, + GELOGE(ge::FAILED, "[Malloc][Memory] failed, no enough memory for size = %zu, device_id = %u", memory_size, device_id); return ge::FAILED; } @@ -304,16 +300,14 @@ Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size, uint32_t device_id) { BlockBin *bin = GetBlockBin(size); if (bin == nullptr) { - REPORT_INNER_ERROR("E19999", "GetBlockBin fail, size:%zu, device_id:%u", - size, device_id); - GELOGE(ge::FAILED, "Get block bin failed size = %zu", size); + REPORT_INNER_ERROR("E19999", "GetBlockBin fail, size:%zu, device_id:%u", size, device_id); + GELOGE(ge::FAILED, "[Get][BlockBin] failed, size:%zu, device_id:%u", size, device_id); return ge::FAILED; } Block *block = new (std::nothrow) Block(device_id, size, bin, nullptr); if (block == nullptr) { - REPORT_CALL_ERROR("E19999", "New Block fail, size:%zu, device_id:%u", - size, device_id); - GELOGE(ge::FAILED, "Alloc block failed size = %zu", size); + REPORT_CALL_ERROR("E19999", "New Block fail, size:%zu, device_id:%u", size, device_id); + GELOGE(ge::FAILED, "[Alloc][Block] failed, size:%zu, device_id:%u", size, device_id); return ge::FAILED; } diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index a9c3202a..2db00ff2 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -88,8 +88,8 @@ class CachingAllocator { /// /// @ingroup ge_graph /// @brief free memory + /// @param [in] memory_ptr memory address ptr /// @param [in] device_id device id - /// @param [out] memory_ptr memory address ptr /// @return Status result of function /// Status Free(uint8_t *memory_addr, uint32_t device_id = 0); diff --git a/ge/graph/manager/graph_context.cc b/ge/graph/manager/graph_context.cc index 3a705ad9..6d202cef 100644 --- a/ge/graph/manager/graph_context.cc +++ b/ge/graph/manager/graph_context.cc @@ -33,7 +33,7 @@ GraphContext::GraphContext(const GraphNodePtr &graph_node) { if (compute_graph_ == nullptr) { std::shared_ptr graph = graph_node->GetGraph(); if (graph == nullptr) { - GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "compute_graph by graphNode is NULL!"); + GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Get][Graph] failed, compute_graph by graphNode is NULL!"); return; } @@ -45,7 +45,7 @@ GraphContext::GraphContext(const GraphNodePtr &graph_node) { Status GraphContext::SetComputeGraph(const GraphNodePtr &graph_node) { if (graph_node == nullptr) { REPORT_INNER_ERROR("E19999", "Param graph_node is nullptr, check invalid"); - GELOGE(GE_GRAPH_PARAM_NULLPTR, "graphNode is NULL!"); + GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] graphNode is NULL!"); return GE_GRAPH_PARAM_NULLPTR; } @@ -56,7 +56,7 @@ Status GraphContext::SetComputeGraph(const GraphNodePtr &graph_node) { std::shared_ptr graph = graph_node->GetGraph(); if (graph == nullptr) { REPORT_INNER_ERROR("E19999", "Param graph in graph_node is nullptr, check invalid"); - GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "compute_graph by graphNode is NULL!"); + GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[Get][Graph] failed, compute_graph by graphNode is NULL!"); return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; } @@ -73,14 +73,15 @@ Status GraphContext::Finalize() const { return SUCCESS; } Status GraphContext::GetVariableTensor(const std::string &var_data_name, GeTensor &returned_tensor) { if (var_data_name.empty()) { REPORT_INNER_ERROR("E19999", "Param var_data_name is empty, check invalid"); - GELOGE(GE_GRAPH_EMPTY_STRING_NAME, "Variable data name is empty!"); + GELOGE(GE_GRAPH_EMPTY_STRING_NAME, "[Check][Param] Variable data name is empty!"); return GE_GRAPH_EMPTY_STRING_NAME; } if (GetVarNodeTensorTable().empty()) { REPORT_INNER_ERROR("E19999", "VarNodeTensorTable is empty, var_data_name:%s, check invalid", var_data_name.c_str()); - GELOGE(GE_GRAPH_EMPTY_VARIABLE_TENSOR_TABLE, "VarNodeTensorTable is empty!"); + GELOGE(GE_GRAPH_EMPTY_VARIABLE_TENSOR_TABLE, "[Check][Param] VarNodeTensorTable is empty, var_data_name:%s", + var_data_name.c_str()); return GE_GRAPH_EMPTY_VARIABLE_TENSOR_TABLE; } for (auto &var_record : GetVarNodeTensorTable()) { @@ -88,9 +89,8 @@ Status GraphContext::GetVariableTensor(const std::string &var_data_name, GeTenso returned_tensor.SetTensorDesc(var_record.second.GetTensorDesc()); auto ret = returned_tensor.SetData(var_record.second.GetData()); if (ret != SUCCESS) { - REPORT_INNER_ERROR("E19999", "SetData to tensor fail, var_data_name:%s", - var_data_name.c_str()); - GELOGE(ret, "Set Tensor data failed!"); + REPORT_INNER_ERROR("E19999", "SetData to tensor fail, var_data_name:%s", var_data_name.c_str()); + GELOGE(ret, "[Set][Data] to Tensor failed, var_data_name:%s", var_data_name.c_str()); return ret; } @@ -100,7 +100,8 @@ Status GraphContext::GetVariableTensor(const std::string &var_data_name, GeTenso REPORT_INNER_ERROR("E19999", "VarRecord with data_name:%s does not exist, check invalid", var_data_name.c_str()); - GELOGE(GE_GRAPH_VARIABLE_DOES_NOT_EXIST, "VarRecord with data_name %s does NOT exist!", var_data_name.c_str()); + GELOGE(GE_GRAPH_VARIABLE_DOES_NOT_EXIST, "[Check][Param] VarRecord with data_name %s does NOT exist!", + var_data_name.c_str()); return GE_GRAPH_VARIABLE_DOES_NOT_EXIST; } diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 31e5b5ab..f9d24ac4 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -65,7 +65,6 @@ #include "graph/passes/merge_pass.h" #include "graph/passes/merge_input_memcpy_pass.h" #include "graph/passes/merge_to_stream_merge_pass.h" -#include "graph/passes/mark_force_unknown_for_cond_pass.h" #include "graph/passes/multi_batch_pass.h" #include "graph/passes/next_iteration_pass.h" #include "graph/passes/permute_pass.h" @@ -108,6 +107,7 @@ #include "common/formats/utils/formats_trans_utils.h" #include "register/custom_pass_helper.h" #include "external/graph/types.h" +#include "common/util/error_manager/error_manager.h" namespace { const char *const kSummary = "Summary"; @@ -149,7 +149,8 @@ ge::Status CheckFpCeilingMode() { if (ret == ge::GRAPH_SUCCESS) { if (kValidFpCeilingMode.count(mode) == 0) { REPORT_INNER_ERROR("E19999", "Option ge.fpCeilingMode is invalid, value:%s", mode.c_str()); - GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "The fp_ceiling_mode %s is invalid, options are 0, 1, and 2.", mode.c_str()); + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Get][Option] The fp_ceiling_mode %s is invalid, options are 0, 1, and 2.", + mode.c_str()); return ge::GE_GRAPH_OPTIONS_INVALID; } GELOGI("The parameter fp_ceiling_mode is set to %s.", mode.c_str()); @@ -178,33 +179,33 @@ Status GraphManager::Initialize(const std::map &options) { graph_run_listener_ = MakeShared(sync_run_mutex_, condition_); if (graph_run_listener_ == nullptr) { REPORT_CALL_ERROR("E19999", "New GraphModelListener fail"); - GELOGE(MEMALLOC_FAILED, "Make shared failed"); + GELOGE(MEMALLOC_FAILED, "[New][GraphModelListener] failed"); return MEMALLOC_FAILED; } // graph context graph_context_ = MakeShared(); if (graph_context_ == nullptr) { - REPORT_CALL_ERROR("E19999", "New GraphModelListener fail"); - GELOGE(MEMALLOC_FAILED, "Make shared failed."); + REPORT_CALL_ERROR("E19999", "New GraphContext fail"); + GELOGE(MEMALLOC_FAILED, "[New][GraphContext] failed."); return MEMALLOC_FAILED; } // parse option parameters Status ret = ParseOptions(options); if (ret != SUCCESS) { - GELOGE(ret, "[Initialize] parse options failed."); + GELOGE(ret, "[Parse][Options] failed."); return ret; } ret = CheckFpCeilingMode(); if (ret != SUCCESS) { - GELOGE(ret, "[Initialize] Check fp-ceiling-mode options failed."); + GELOGE(ret, "[Check][FpCeilingMode] failed."); return ret; } ret = graph_context_->Initialize(options); if (ret != SUCCESS) { - GELOGE(ret, "[Initialize] GraphContext initialize failed."); + GELOGE(ret, "[Initialize][GraphContext] failed."); return ret; } @@ -302,7 +303,7 @@ Status GraphManager::Finalize() { if (graph_context_ != nullptr) { Status ret_final = graph_context_->Finalize(); if (ret_final != SUCCESS) { - GELOGE(ret_final, "[GraphManager] graph context Finalize failed!"); + GELOGE(ret_final, "[Finalize][GraphContext] failed!"); unload_model_ret = ret_final; } } @@ -321,9 +322,8 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { std::string op_type; auto ret = GetOriginalType(node, op_type); if (ret != SUCCESS) { - REPORT_CALL_ERROR("E19999", "GetOriginalType from op:%s fail", - node->GetName().c_str()); - GELOGE(FAILED, "Failed to get node %s original type.", node->GetName().c_str()); + REPORT_CALL_ERROR("E19999", "GetOriginalType from op:%s fail", node->GetName().c_str()); + GELOGE(FAILED, "[Get][OriginalType] from op:%s failed.", node->GetName().c_str()); return FAILED; } if ((op_desc->GetType() == DATA) || (op_type == kGetNextName)) { @@ -335,7 +335,7 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) { if (!options_.input_shape.empty() && !options_.dynamic_dims.empty()) { if (!ge::ParseInputShape(options_.input_shape, GetLocalOmgContext().input_dims, GetLocalOmgContext().user_input_dims, true)) { - GELOGE(GRAPH_PARAM_INVALID, "Failed to parse input shape: %s.", options_.input_shape.c_str()); + GELOGE(GRAPH_PARAM_INVALID, "[Parse][InputShape] %s failed.", options_.input_shape.c_str()); return GRAPH_PARAM_INVALID; } GetLocalOmgContext().dynamic_dims = options_.dynamic_dims; @@ -380,7 +380,7 @@ void GraphManager::RemoveAddGraphCondition(GraphId graph_id) { Status GraphManager::CheckRepeatAdd(uint32_t graph_id, bool &is_added) { uint32_t count = 0; if (GetGraphCount(graph_id, count) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id); + GELOGE(INTERNAL_ERROR, "[Get][GraphCount] failed, graph[id:%u] might have not been added.", graph_id); return INTERNAL_ERROR; } // previous thread owns same graph_id has been in the middle of the AddGraph procession @@ -393,7 +393,7 @@ Status GraphManager::CheckRepeatAdd(uint32_t graph_id, bool &is_added) { GraphNodePtr graph_node; Status ret = GetGraphNode(graph_id, graph_node); if (ret != SUCCESS) { - GELOGE(ret, "[AddGraph] GetGraphNode failed, graph_id = %u.", graph_id); + GELOGE(ret, "[Get][GraphNode] failed, graph_id = %u.", graph_id); return ret; } is_added = true; @@ -418,7 +418,7 @@ void GraphManager::SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t gra Status GraphManager::NotifyWaittingGraph(uint32_t graph_id) { uint32_t count = 0; if (GetGraphCount(graph_id, count) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id); + GELOGE(INTERNAL_ERROR, "[Get][GraphCount] failed, graph[id:%u] might have not been added.", graph_id); return INTERNAL_ERROR; } GELOGD("Add graph finished, graph_id:%u", graph_id); @@ -433,15 +433,13 @@ Status GraphManager::CreateGraphNode(uint32_t graph_id, const Graph &graph, const std::map &options) { GraphNodePtr graph_node = MakeShared(graph_id); GE_IF_BOOL_EXEC(graph_node == nullptr, - REPORT_CALL_ERROR("E19999", "New GraphNode fail, graph_id:%u", - graph_id); - GELOGE(FAILED, "GraphNode make shared failed"); + REPORT_CALL_ERROR("E19999", "New GraphNode fail, graph_id:%u", graph_id); + GELOGE(FAILED, "[New][GraphNode] fail, graph_id:%u", graph_id); return FAILED); std::shared_ptr graph_ptr = MakeShared(graph); GE_IF_BOOL_EXEC(graph_ptr == nullptr, - REPORT_CALL_ERROR("E19999", "New Graph fail, graph_id:%u", - graph_id); - GELOGE(FAILED, "GraphPtr make shared failed"); + REPORT_CALL_ERROR("E19999", "New Graph fail, graph_id:%u", graph_id); + GELOGE(FAILED, "[New][Graph] fail, graph_id:%u", graph_id); return FAILED); // update option about tuning graph ParseOption(options, BUILD_MODE, options_.build_mode); @@ -459,13 +457,55 @@ Status GraphManager::SetStagesOptions(uint32_t graph_id, const GraphManagerOptio stages.preparer.SetOptions(options_); Status status = stages.optimizer.SetOptions(options_); if (status != SUCCESS) { - GELOGE(status, "Graph optimizer set options failed."); + GELOGE(status, "[Set][Options] for Graph optimizer failed, graph id:%u.", graph_id); return status; } stages.builder.SetOptions(options_); return SUCCESS; } +Status GraphManager::ModifyDataIndex(const Graph &graph, const std::map &graph_option) { + vector data_desc; + set indexes; + auto compute_graph = GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + for (auto &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + auto op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + int64_t index = 0; + (void) AttrUtils::GetInt(op, ATTR_NAME_INDEX, index); + indexes.insert(index); + data_desc.emplace_back(op); + } + } + if (!indexes.empty()) { + auto first_iter = indexes.begin(); + auto end_iter = indexes.end(); + --end_iter; + auto data_size = static_cast(data_desc.size()); + // The valid index starts with 0 and increases by 1, and num is equal to data_node. + if (indexes.size() != data_desc.size() || *first_iter != 0 || *end_iter != data_size - 1) { + auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE); + if (iter != graph_option.end() && !iter->second.empty()) { + // If data inputs shape range is set, user must set valid data index. + std::string failed_reason = "Data index must be set continuous from 0 when data shape range enabled!"; + REPORT_INPUT_ERROR("E10003", std::vector({"parameter", "value", "reason"}), + std::vector({"--data_index", "-", failed_reason})); + GELOGE(GRAPH_PARAM_INVALID, "[COMP][AddGraph]Input data index is invalid when data shape range enabled."); + return GRAPH_PARAM_INVALID; + } + GELOGI("Graph[%s] input data index is invalid, set data index by topo order.", compute_graph->GetName().c_str()); + int64_t index = 0; + for (auto &op : data_desc) { + (void) AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); + } + } + } + return SUCCESS; +} + Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, const std::map &options, const OmgContext &omg_context) { @@ -475,7 +515,8 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, if (GetAddGraphCondition(graph_id) == kDoneAdded) { GraphNodePtr graph_node; if (GetGraphNode(graph_id, graph_node) != SUCCESS) { - GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "Graph not exist while done adding previously, graph_id = %u.", graph_id); + GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[Get][GraphNode] failed, Graph not exist while done adding previously, " + "graph_id = %u.", graph_id); return GE_GRAPH_GRAPH_NOT_EXIST; } graph_node->IncreaseLoadCount(); @@ -486,7 +527,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, // done adding graph of the former graph, avoiding repeatively adding same graph. bool is_added = false; if (CheckRepeatAdd(graph_id, is_added) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "CheckRepeatAdd for graph[id:%u] failed.", graph_id); + GELOGE(INTERNAL_ERROR, "[Check][RepeatAdd] for graph[id:%u] failed.", graph_id); return INTERNAL_ERROR; } // The former graph (from different thread) owns same graph id has been successfully added. @@ -496,16 +537,17 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, // Do add graph SetAddGraphCondition(graph_id, kStartAdd); if (CheckGraphAdded(graph_id, graph) != SUCCESS) { - GELOGE(FAILED, "AddGraph failed."); + GELOGE(FAILED, "[Check][GraphAdded] failed, graph id:%u.", graph_id); return FAILED; } + GE_CHK_STATUS_RET(ModifyDataIndex(graph, options)); auto compute_graph = GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); (void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true); SetSessionGraphId(compute_graph, graph_id); if (CreateGraphNode(graph_id, graph, options) != SUCCESS) { - GELOGE(FAILED, "Failed to create graph_node."); + GELOGE(FAILED, "[Create][GraphNode] failed, graph id:%u.", graph_id); return FAILED; } @@ -514,12 +556,12 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, GetLocalOmgContext().output_type = options_.output_datatype; } if (InitDynamicParams(compute_graph) != SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "Failed to init params when online infer is dynamic."); + GELOGE(GRAPH_PARAM_INVALID, "[Init][Params] failed, when online infer is dynamic, graph id:%u.", graph_id); return GRAPH_PARAM_INVALID; } if (SetStagesOptions(graph_id, options_) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Set stage options failed."); + GELOGE(INTERNAL_ERROR, "[Set][StagesOptions] failed, graph id:%u.", graph_id); return INTERNAL_ERROR; } @@ -527,7 +569,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, SetAddGraphCondition(graph_id, kDoneAdded); // There are threads waitting for adding same graph if (NotifyWaittingGraph(graph_id) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "NotifyWaittingGraph failed."); + GELOGE(INTERNAL_ERROR, "[Notify][WaittingGraph] failed, graph id:%u.", graph_id); return INTERNAL_ERROR; } return SUCCESS; @@ -542,14 +584,13 @@ Status GraphManager::CheckGraphAdded(const GraphId &graph_id, const Graph &graph && graph_has_been_added) { REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%u fail.", ATTR_NAME_GRAPH_HAS_BEEN_ADDED.c_str(), graph_id); - GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, - "[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id); + GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[Get][Attr] %s from graph:%u fail.", + ATTR_NAME_GRAPH_HAS_BEEN_ADDED.c_str(), graph_id); return GE_GRAPH_GRAPH_ALREADY_EXIST; } } else { - REPORT_INNER_ERROR("E19999", "compute_graph from graph:%u is nullptr, check invalid", - graph_id); - GELOGE(FAILED, "compute graph is null"); + REPORT_INNER_ERROR("E19999", "compute_graph from graph:%u is nullptr, check invalid", graph_id); + GELOGE(FAILED, "[Get][ComputeGraph] failed, compute graph from graph:%u is nullptr", graph_id); return FAILED; } return SUCCESS; @@ -559,11 +600,11 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap const std::map &options, const OmgContext &omg_context) { if (HasGraphNode(graph_id)) { - GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u", graph_id); + GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[Has][GraphNode] graph exists, graph_id = %u", graph_id); return GE_GRAPH_GRAPH_ALREADY_EXIST; } if (CheckGraphAdded(graph_id, graph) != SUCCESS) { - GELOGE(FAILED, "AddGraphWithCopy failed."); + GELOGE(FAILED, "[Check][GraphAdded] failed, graph_id = %u", graph_id); return FAILED; } IncreaseGraphCount(graph_id); @@ -577,7 +618,7 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap SetSessionGraphId(new_compute_graph, graph_id); std::shared_ptr new_graph_ptr = GraphUtils::CreateGraphPtrFromComputeGraph(new_compute_graph); if (CreateGraphNode(graph_id, *new_graph_ptr, options) != SUCCESS) { - GELOGE(FAILED, "Failed to create graph_node."); + GELOGE(FAILED, "[Create][GraphNode] failed, graph_id = %u", graph_id); return FAILED; } @@ -586,12 +627,12 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap GetLocalOmgContext().output_type = options_.output_datatype; } if (InitDynamicParams(new_compute_graph) != SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "Failed to init params when online infer is dynamic."); + GELOGE(GRAPH_PARAM_INVALID, "[Init][Params] failed, when online infer is dynamic, graph_id = %u", graph_id); return GRAPH_PARAM_INVALID; } if (SetStagesOptions(graph_id, options_) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Set stage options failed."); + GELOGE(INTERNAL_ERROR, "[Set][StagesOptions] failed, graph_id = %u", graph_id); return INTERNAL_ERROR; } @@ -612,9 +653,9 @@ Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::Com Status ret_topo = compute_graph->TopologicalSorting(); if (ret_topo != SUCCESS) { - REPORT_CALL_ERROR("E19999", "TopologicalSorting fail, graph_id:%u", - compute_graph->GetGraphID()); - GELOGE(ret_topo, "[GraphManager]: TopologicalSorting the merged graph failed."); + REPORT_CALL_ERROR("E19999", "TopologicalSorting fail, graph_id:%u", compute_graph->GetGraphID()); + GELOGE(ret_topo, "[Call][TopologicalSorting] for the merged graph failed, graph_id:%u", + compute_graph->GetGraphID()); return ret_topo; } } else { @@ -649,16 +690,16 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr std::vector output_nodes; ComputeGraphPtr new_compute_graph = GraphUtils::CloneGraph(old_compute_graph, "", input_nodes, output_nodes); if (new_compute_graph == nullptr) { - REPORT_CALL_ERROR("E19999", "CloneGraph fail, graph_id:%u", - compute_graph->GetGraphID()); - GELOGE(INTERNAL_ERROR, "Clone graph failed."); + REPORT_CALL_ERROR("E19999", "CloneGraph fail, graph_id:%u", compute_graph->GetGraphID()); + GELOGE(INTERNAL_ERROR, "[Clone][Graph] failed, graph_id:%u", compute_graph->GetGraphID()); return INTERNAL_ERROR; } copy_graphs.emplace(old_compute_graph->GetName(), new_compute_graph); if (!AttrUtils::SetBool(old_compute_graph, ATTR_NAME_NEED_LX_FUSION, true)) { REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail", ATTR_NAME_NEED_LX_FUSION.c_str(), old_compute_graph->GetGraphID()); - GELOGE(INTERNAL_ERROR, "Set attr lx_fusion to graph failed."); + GELOGE(INTERNAL_ERROR, "[Set][Attr] %s to graph:%u failed.", + ATTR_NAME_NEED_LX_FUSION.c_str(), old_compute_graph->GetGraphID()); return INTERNAL_ERROR; } } @@ -696,7 +737,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr ErrorManager::GetInstance().GetErrorManagerContext(), GetThreadLocalContext()); if (!f.valid()) { - GELOGE(FAILED, "Future is invalid"); + GELOGE(FAILED, "[Call][Commit] failed, Future is invalid, session_id:%lu", session_id); return FAILED; } vector_future.emplace_back(std::move(f)); @@ -713,7 +754,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr ErrorManager::GetInstance().GetErrorManagerContext(), GetThreadLocalContext()); if (!f.valid()) { - GELOGE(FAILED, "Future is invalid"); + GELOGE(FAILED, "[Call][Commit] failed, Future is invalid, session_id:%lu", session_id); return FAILED; } vector_future.emplace_back(std::move(f)); @@ -724,7 +765,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr Status ret_status = vector_future[i].get(); if (ret_status != SUCCESS) { REPORT_CALL_ERROR("E19999", "subgraph %zu optimize failed", i); - GELOGE(ret_status, "subgraph %zu optimize failed", i); + GELOGE(ret_status, "[Check][Param] subgraph %zu optimize failed", i); return ret_status; } } @@ -735,7 +776,7 @@ bool GraphManager::CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_ Graph2SubGraphInfoList &sub_graph_map) { if (compute_graph == nullptr) { REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid"); - GELOGE(PARAM_INVALID, "Input param compute_graph is nullptr."); + GELOGE(PARAM_INVALID, "[Check][Param] Input param compute_graph is nullptr."); return false; } @@ -776,7 +817,8 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ if (iter == copy_graphs.end()) { REPORT_INNER_ERROR("E19999", "Can not find subgraph:%s in copy graphs, check invalid", subgraph->GetSubGraph()->GetName().c_str()); - GELOGE(FAILED, "Can not find subgraph:%s in copy graphs.", subgraph->GetSubGraph()->GetName().c_str()); + GELOGE(FAILED, "[Check][Param] Can not find subgraph:%s in copy graphs.", + subgraph->GetSubGraph()->GetName().c_str()); return FAILED; } subgraph->SetSubGraph(iter->second); @@ -789,7 +831,8 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_ if (iter == copy_graphs.end()) { REPORT_INNER_ERROR("E19999", "Can not find subgraph:%s in copy graphs, check invalid", subgraph->GetSubGraph()->GetName().c_str()); - GELOGE(FAILED, "Can not find subgraph:%s in copy graphs.", subgraph->GetSubGraph()->GetName().c_str()); + GELOGE(FAILED, "[Check][Param] Can not find subgraph:%s in copy graphs.", + subgraph->GetSubGraph()->GetName().c_str()); return FAILED; } subgraph->SetSubGraph(iter->second); @@ -807,7 +850,7 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr options_.build_step.c_str()); Status ret = OptimizeSubGraphWithMultiThreads(compute_graph, sub_graph_map, session_id); if (ret != SUCCESS) { - GELOGE(ret, "Multiply optimize subgraph failed"); + GELOGE(ret, "[Call][OptimizeSubGraphWithMultiThreads] failed, ret:%d, session_id:%lu", ret, session_id); return ret; } return SUCCESS; @@ -849,7 +892,8 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::CtrlEdgeTransferPass", new (std::nothrow) CtrlEdgeTransferPass)) GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); - GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), "Identify reference failed."); + GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), + "[Identify][Reference] failed, graph:%s.", compute_graph->GetName().c_str()); GELOGD("PreRun:PreRunOptimizeOriginalGraph success."); return SUCCESS; } @@ -887,9 +931,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, Status ret = compute_graph->TopologicalSorting(); if (ret != SUCCESS) { - REPORT_CALL_ERROR("E19999", "TopologicalSorting fail, graph_id:%u", - compute_graph->GetGraphID()); - GELOGE(ret, "Graph topological sort failed, ret:%d.", ret); + REPORT_CALL_ERROR("E19999", "TopologicalSorting fail, graph_id:%u", compute_graph->GetGraphID()); + GELOGE(ret, "[Call][TopologicalSorting] fail, graph_id:%u", compute_graph->GetGraphID()); return ret; } @@ -906,14 +949,14 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCtxCreate faileded, session_id:%lu, graph_id:%u, mode:%d", session_id, graph_id, mode); - GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(FAILED, "[Call][RtCtxCreate] faileded, session_id:%lu, graph_id:%u, mode:%d", session_id, graph_id, mode); return FAILED; } rt_ret = rtCtxSetCurrent(rt_context); if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCtxSetCurrent failed, session_id:%lu, graph_id:%u, mode:%d", session_id, graph_id, mode); - GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + GELOGE(FAILED, "[Call][RtCtxSetCurrent] failed, session_id:%lu, graph_id:%u, mode:%d", session_id, graph_id, mode); return FAILED; } RtContextUtil::GetInstance().AddRtContext(session_id, graph_id, rt_context); @@ -927,7 +970,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { GE_TIMESTAMP_START(RunCustomPass); GraphPtr graph = std::const_pointer_cast(const_graph); - GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.", + GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "[Call][Run] for Graph[%s] fail.", comp_graph->GetName().c_str()); GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); return SUCCESS; @@ -943,7 +986,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorSetSessionID(session_id); auto analyzer_instance = Analyzer::GetInstance(); GE_CHK_STATUS_RET(analyzer_instance->BuildJsonObject(session_id, compute_graph->GetGraphID()), - "BuildJsonObject Failed") + "[Build][JsonObject] Failed, session_id:%lu", session_id) GEEVENT("PreRun start: graph node size %zu, session id %lu, graph id %u, graph name %s.", compute_graph->GetDirectNodesSize(), session_id, compute_graph->GetGraphID(), @@ -952,7 +995,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetGraphID()); if (ret != SUCCESS) { - GELOGE(ret, "Set rt context failed."); + GELOGE(ret, "[Set][RtContext] failed, session_id:%lu, graph_id:%u.", session_id, compute_graph->GetGraphID()); return ret; } @@ -966,17 +1009,20 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetName().c_str()); + GELOGE(ret, "[Run][PreRunOptimizeOriginalGraph] failed for graph:%s, session_id:%lu", + compute_graph->GetName().c_str(), session_id); return ret; } } ErrorManager::GetInstance().SetStage(error_message::kModelCompile, error_message::kSubGraphOptimize); // set fuzz compile flag after origin graph optimize - GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed."); + GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), + "[Set][FuzzCompileFlag] failed for graph:%s.", compute_graph->GetName().c_str()); ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); if (ret != SUCCESS) { - GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); + GELOGE(ret, "[Run][PreRunOptimizeSubGraph] failed for graph:%s, session_id:%lu.", + compute_graph->GetName().c_str(), session_id); return ret; } @@ -990,7 +1036,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetName().c_str()); + GELOGE(ret, "[Run][PreRunAfterOptimizeSubGraph] failed for graph:%s, session_id:%lu.", + compute_graph->GetName().c_str(), session_id); return ret; } } @@ -1015,7 +1062,7 @@ Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) { GE_CHECK_NOTNULL(op_desc); GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag); if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) { - GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str()); + GELOGE(FAILED, "[Set][ATTR] %s to %s failed.", ATTR_NAME_FUZZ_BUILD.c_str(), op_desc->GetName().c_str()); return FAILED; } } @@ -1031,7 +1078,7 @@ Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { auto ret = pass_manager.Run(compute_graph); GE_TIMESTAMP_END(SubexpressionMigrationPass, "GraphManager::SubexpressionMigration"); if (ret != SUCCESS && ret != NOT_CHANGED) { - GELOGE(ret, "Run SubexpressionMigrationPass failed, ret:%u.", ret); + GELOGE(ret, "[Run][SubexpressionMigrationPass] failed, ret:%u.", ret); return ret; } @@ -1048,7 +1095,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: REPORT_INNER_ERROR("E19999", "Graph:%u has not build before, can't run directly, " "check invalid", graph_node->GetGraphId()); GELOGE(PARAM_INVALID, - "The graph %u need to re-build, you should remove it from GE " + "[Get][BuildFlag] The graph %u need to re-build, you should remove it from GE " "first, then AddGraph again and rebuild it.", graph_node->GetGraphId()); return PARAM_INVALID; @@ -1061,7 +1108,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: // release rts generate context RtContextUtil::GetInstance().DestroyRtContexts(session_id, graph_node->GetGraphId()); if (ret != SUCCESS) { - GELOGE(ret, "PreRun Failed, graph_id:%u.", graph_node->GetGraphId()); + GELOGE(ret, "[Call][PreRun] Failed, graph_id:%u, session_id:%lu.", graph_node->GetGraphId(), session_id); return ret; } } @@ -1072,7 +1119,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: ret = LoadGraphAsync(ge_root_model, graph_node); } if (ret != SUCCESS) { - GELOGE(ret, "LoadGraph Failed."); + GELOGE(ret, "[Load][Graph] Failed, graph_id:%u.", graph_node->GetGraphId()); return ret; } graph_node->SetBuildFlag(true); @@ -1086,7 +1133,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std: ret = LoadGraphAsync(ge_root_model_ptr, graph_node); } if (ret != SUCCESS) { - GELOGE(ret, "LoadGraph Failed."); + GELOGE(ret, "[Load][Graph] Failed, graph_id:%u.", graph_node->GetGraphId()); return ret; } } @@ -1117,7 +1164,7 @@ Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphN Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, model_listener); GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraph"); if (ret != SUCCESS) { - GELOGE(ret, "[StartForRunGraph] LoadGraph Failed"); + GELOGE(ret, "[Load][Model] failed, ret:%d", ret); graph_node->SetRunFlag(false); return ret; } @@ -1217,7 +1264,7 @@ Status GraphManager::InnerRunGraph(GraphNodePtr &graph_node, const GraphId &grap const std::vector &inputs, std::vector &outputs) { Status ret = graph_executor_.SetCondition(&sync_run_mutex_, &condition_, graph_run_listener_); if (ret != SUCCESS) { - GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[RunGraph] set condition failed, graph_id = %u.", graph_id); + GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Set][Condition] failed, graph_id = %u.", graph_id); graph_node->SetRunFlag(false); return GE_GRAPH_RUNGRAPH_FAILED; } @@ -1230,7 +1277,7 @@ Status GraphManager::InnerRunGraph(GraphNodePtr &graph_node, const GraphId &grap graph_node->SetRunFlag(false); if (ret != SUCCESS) { - GELOGE(ret, "[RunGraph] execute graph failed, graph_id = %u.", graph_id); + GELOGE(ret, "[Execute][Graph] failed, graph_id = %u.", graph_id); return ret; } return SUCCESS; @@ -1240,8 +1287,7 @@ Status GraphManager::InnerRunGraphWithStream(GraphNodePtr &graph_node, const Gra const std::vector &inputs, std::vector &outputs) { auto ret = graph_executor_.SetCondition(&sync_run_mutex_, &condition_, graph_run_listener_); if (ret != SUCCESS) { - GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Run][GraphWithStreamAsync] set condition failed, " - "graph id = %u, stream = %p.", graph_id, stream); + GELOGE(GE_GRAPH_RUNGRAPH_FAILED, "[Set][Condition] failed, graph id = %u, stream = %p.", graph_id, stream); graph_node->SetRunFlag(false); return GE_GRAPH_RUNGRAPH_FAILED; } @@ -1250,7 +1296,7 @@ Status GraphManager::InnerRunGraphWithStream(GraphNodePtr &graph_node, const Gra graph_node->SetRunFlag(false); graph_node->SetIsSpecificStream(false); if (ret != SUCCESS) { - GELOGE(ret, "[Run][GraphWithStreamAsync] execute graph failed, graph id = %u, stream = %p.", graph_id, stream); + GELOGE(ret, "[Execute][Graph] With Stream failed, graph id = %u, stream = %p.", graph_id, stream); return ret; } GELOGI("[Run][GraphWithStreamAsync] run graph success, graph id = %u, stream = %p.", graph_id, stream); @@ -1272,18 +1318,20 @@ Status GraphManager::RunGraphWithStreamAsync(const GraphId &graph_id, rtStream_t Status ret = GetGraphNode(graph_id, graph_node); if (ret != SUCCESS) { REPORT_INNER_ERROR("E19999", "graph id = %u not exist in graph_map, check invalid.", graph_id); - GELOGE(ret, "Run graph with stream async graph not exist, graph id = %u.", graph_id); + GELOGE(ret, "[Get][GraphNode] failed, Run graph with stream async, graph not exist, graph id = %u.", graph_id); return ret; } if (graph_node == nullptr) { REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph id = %u, check invalid.", graph_id); - GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "Run graph with stream async graph node is NULL, graph id = %u.", graph_id); + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[Check][Param] Run graph with stream async, graph node is NULL, " + "graph id = %u.", graph_id); return GE_GRAPH_GRAPH_NODE_NULL; } if (graph_node->GetRunFlag()) { REPORT_INNER_ERROR("E19999", "Graph is already running, can't be run again, graph id = %u, " "check invalid.", graph_id); - GELOGE(GE_GRAPH_ALREADY_RUNNING, "Run graph with stream async graph already running, graph id = %u.", graph_id); + GELOGE(GE_GRAPH_ALREADY_RUNNING, "[Get][RunFlag] Run graph with stream async graph already running, " + "graph id = %u.", graph_id); return GE_GRAPH_ALREADY_RUNNING; } @@ -1301,7 +1349,7 @@ Status GraphManager::RunGraphWithStreamAsync(const GraphId &graph_id, rtStream_t GeRootModelPtr ge_root_model = nullptr; ret = StartForRunGraph(graph_node, inputs, ge_root_model, session_id); if (ret != SUCCESS) { - GELOGE(ret, "[Run][GraphWithStreamAsync] StartForRunGraph failed!"); + GELOGE(ret, "[Call][StartForRunGraph] failed, session_id:%lu", session_id); graph_node->SetRunFlag(false); return ret; } @@ -1322,23 +1370,20 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vectorGetRunFlag()) { - REPORT_INNER_ERROR("E19999", "Graph is already running, can't be run again, graph_id:%u, " - "check invalid", graph_id); - GELOGE(GE_GRAPH_ALREADY_RUNNING, "[RunGraph] graph already running, graph id = %u", graph_id); + REPORT_INNER_ERROR("E19999", "Graph is already running, can't be run again, graph_id:%u, check invalid", graph_id); + GELOGE(GE_GRAPH_ALREADY_RUNNING, "[Get][RunFlag] graph already running, graph id = %u", graph_id); return GE_GRAPH_ALREADY_RUNNING; } @@ -1352,8 +1397,8 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vectorSetRunFlag(false); return ret; } @@ -1382,7 +1427,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vectorIsSummaryGraph()) { ret = SummaryHandle(graph_id, outputs); if (ret != SUCCESS) { - GELOGE(ret, "[RunGraph] SummaryHandle failed!"); + GELOGE(ret, "[Call][SummaryHandle] failed, graph_id:%u", graph_id); } } @@ -1393,7 +1438,7 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vectorInitFlag()) { - REPORT_INNER_ERROR("E19999", "GELib is not init before, graph_id:%u, check invalid", - graph_id); - GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized"); + REPORT_INNER_ERROR("E19999", "GELib is not init before, graph_id:%u, check invalid", graph_id); + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Get][GELib] GELib is not init before, graph_id:%u", graph_id); return GE_CLI_GE_NOT_INITIALIZED; } @@ -1481,7 +1523,9 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const REPORT_INNER_ERROR("E19999", "GetOpsKernelInfoStore fail for op:%s(%s), kernel_lib_name:%s, graph_id:%u, " "check invalid", op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_desc->GetOpKernelLibName().c_str(), graph_id); - GELOGE(FAILED, "Get op kernel info store failed"); + GELOGE(FAILED, "[Get][OpsKernelInfoStore] fail for op:%s(%s), kernel_lib_name:%s, graph_id:%u", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), + op_desc->GetOpKernelLibName().c_str(), graph_id); return FAILED; } @@ -1490,8 +1534,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const REPORT_CALL_ERROR("E19999", "Call CompileOp fail for op:%s(%s), kernel_lib_name:%s, graph_id:%u, " "check invalid", op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_desc->GetOpKernelLibName().c_str(), graph_id); - GELOGE(FAILED, "Get op kernel info store failed"); - GELOGE(ret, "Compile op failed, op = %s, graph_id = %u.", op_desc->GetName().c_str(), graph_id); + GELOGE(ret, "[Compile][Op] failed, op = %s, graph_id = %u.", op_desc->GetName().c_str(), graph_id); return ret; } } @@ -1514,23 +1557,21 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vectorGetRunFlag()) { REPORT_INNER_ERROR("E19999", "Graph is already running, can't be run again, graph_id:%u, " "check invalid", graph_id); - GELOGE(GE_GRAPH_ALREADY_RUNNING, "[BuildGraph] graph already running, graph id = %u", graph_node->GetGraphId()); + GELOGE(GE_GRAPH_ALREADY_RUNNING, "[Get][RunFlag] graph already running, graph id = %u", graph_node->GetGraphId()); return GE_GRAPH_ALREADY_RUNNING; } @@ -1543,7 +1584,7 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vectorSetRunFlag(false); if (ret != SUCCESS) { - GELOGE(GE_GRAPH_PRERUN_FAILED, "[BuildGraph] StartForRunGraph failed! graph_id:%u.", graph_id); + GELOGE(GE_GRAPH_PRERUN_FAILED, "[Call][StartForRunGraph] failed! graph_id:%u.", graph_id); return GE_GRAPH_PRERUN_FAILED; } @@ -1563,18 +1604,18 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector &attrs, const std::vector &inputs, const std::vector &outputs) { - GE_CHK_BOOL_EXEC(ge::AttrUtils::SetStr(&model, "ATTR_MODEL_OP_TYPE", type), return FAILED, "Set Op[%s] type fail", - type.c_str()); + GE_CHK_BOOL_EXEC(ge::AttrUtils::SetStr(&model, "ATTR_MODEL_OP_TYPE", type), return FAILED, + "[Set][Str] model type[%s] fail", type.c_str()); for (const auto &it : attrs) { GE_CHK_BOOL_EXEC(model.SetAttr("ATTR_MODEL_" + it.first, it.second) == GRAPH_SUCCESS, return FAILED, - "Set OpDesc attribute[%s] fail", it.first.c_str()); + "[Set][Attr] OpDesc attribute[%s] fail", it.first.c_str()); } GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListTensor(&model, "ATTR_MODEL_TENSOR_INPUTS", inputs), return FAILED, - "Set Inputs tensor list fail"); + "[Set][InputsTensor] list fail"); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListTensor(&model, "ATTR_MODEL_TENSOR_OUTPUTS", outputs), return FAILED, - "Set Outputs tensor list fail"); + "[Set][OutputsTensor] list fail"); return SUCCESS; } @@ -1601,9 +1642,8 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { GraphNodePtr graph_node = nullptr; Status ret = GetGraphNode(graph_id, graph_node); if (ret != SUCCESS || graph_node == nullptr) { - REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s", - graph_id, __FUNCTION__); - GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id); + REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid", graph_id); + GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[Get][GraphNode] Id %u does not exists.", graph_id); return GE_GRAPH_GRAPH_NOT_EXIST; } if (graph_node->GetRunFlag()) { @@ -1628,7 +1668,7 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, graph_id:%u", GetContext().DeviceId(), graph_id); - GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), + GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), graph_id); return FAILED; } @@ -1636,16 +1676,15 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { // unload them respectively. middle_ret = UnloadModel(ge_root_model, graph_id); if (middle_ret != SUCCESS) { - REPORT_INNER_ERROR("E19999", "UnloadModel for graph:%u failed, check unload detail in GraphLoader %s", - graph_id, __FUNCTION__); - GELOGE(middle_ret, "[GraphManager:] unload model failed, graph_id=%u.", graph_id); + REPORT_INNER_ERROR("E19999", "UnloadModel for graph:%u failed, check invalid", graph_id); + GELOGE(middle_ret, "[Unload][Model] model failed, graph_id=%u.", graph_id); ret = middle_ret; } rt_ret = rtDeviceReset(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u, when GraphManager %s", - GetContext().DeviceId(), graph_id, __FUNCTION__); - GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id); + REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u", + GetContext().DeviceId(), graph_id); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, graph_id:%u", GetContext().DeviceId(), graph_id); ret = FAILED; } } @@ -1654,7 +1693,7 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) { RemoveGraphCount(graph_id); RemoveAddGraphCondition(graph_id); - GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id); + GE_CHK_STATUS_RET(ret, "[Remove][Graph] failed, graph_id=%u.", graph_id); GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id); return SUCCESS; } @@ -1667,8 +1706,7 @@ Status GraphManager::ParseOptions(const std::map &opti ret = ParseOption(options, STREAM_MAX_PARALLEL_NUM, options_.stream_max_parallel_num); if (ret != SUCCESS) { GELOGE(GE_GRAPH_OPTIONS_INVALID, - "parse Key:%s value failed, it must be same format as " - "DNN_V100:2,DNN_HCCL:3", + "[Parse][Option] %s value failed, it must be same format as DNN_V100:2,DNN_HCCL:3", STREAM_MAX_PARALLEL_NUM.c_str()); return GE_GRAPH_OPTIONS_INVALID; } @@ -1676,23 +1714,23 @@ Status GraphManager::ParseOptions(const std::map &opti // get stream num ret = ParseOption(options, STREAM_NUM, options_.stream_num); if ((ret != SUCCESS) || (options_.stream_num == 0)) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.stream_num, its value %d is invalid, must be not equal zero.", - options_.stream_num); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.stream_num, its value %d is invalid, " + "must be not equal zero.", options_.stream_num); return GE_GRAPH_OPTIONS_INVALID; } // get perf level, its value please see enum PerfLevel ret = ParseOption(options, PERF_LEVEL, options_.perf_level); if ((ret != SUCCESS) || IsPerfLevelInvalid(options_.perf_level)) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.perfLevel, its value %d is invalid, must be enum PerfLevel type.", - options_.perf_level); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.perfLevel, its value %d is invalid, " + "must be enum PerfLevel type.", options_.perf_level); return GE_GRAPH_OPTIONS_INVALID; } // get encrypt mode ret = ParseOption(options, ENCRYPT_MODE, options_.encrypt_mode); GE_IF_BOOL_EXEC(ret != SUCCESS, - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.encryptMode value invalid."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.encryptMode value invalid."); return GE_GRAPH_OPTIONS_INVALID); // get ek file @@ -1732,7 +1770,8 @@ Status GraphManager::ParseOptions(const std::map &opti // get weight compress flag ret = ParseOption(options, COMPRESS_FLAG, options_.compress_flag); GE_IF_BOOL_EXEC(ret != SUCCESS, - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.compressFlag value is invalid, " + "must be 0 or 1."); return GE_GRAPH_OPTIONS_INVALID); // Set Build model and step ParseOption(options, BUILD_MODE, options_.build_mode); @@ -1743,21 +1782,22 @@ Status GraphManager::ParseOptions(const std::map &opti options_.run_graph_flag = true; ret = ParseOption(options, RUN_FLAG, options_.run_graph_flag); GE_IF_BOOL_EXEC(ret != SUCCESS, - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.runFlag value is invalid, must be 0 or 1."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.runFlag value is invalid, must be 0 or 1."); return GE_GRAPH_OPTIONS_INVALID); // ge.graphType ret = ParseTrainGraphFlag(options_.run_graph_flag, options_.train_graph_flag); GE_IF_BOOL_EXEC(ret != SUCCESS, - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.runFlag value is invalid"); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][TrainGraphFlag] Key:ge.runFlag value is invalid"); return GE_GRAPH_OPTIONS_INVALID); // parse FmkOp options_.local_fmk_op_flag = false; ret = ParseOption(options, LOCAL_FMKOP_FLAG, options_.local_fmk_op_flag); GE_IF_BOOL_EXEC(ret != SUCCESS, - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.localFmkopFlag value is invalid, must be 0 or 1."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.localFmkopFlag value is invalid, " + "must be 0 or 1."); return GE_GRAPH_OPTIONS_INVALID); options_.enable_print_op_pass = true; ret = ParseOption(options, ENABLE_PRINT_OP_PASS, options_.enable_print_op_pass); @@ -1765,13 +1805,15 @@ Status GraphManager::ParseOptions(const std::map &opti options_.is_single_op = false; ret = ParseOption(options, SINGLE_OP_FLAG, options_.is_single_op); GE_IF_BOOL_EXEC(ret != SUCCESS, - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.enablePrintOpPass value is invalid, must be 0 or 1."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.enablePrintOpPass value is invalid, " + "must be 0 or 1."); return GE_GRAPH_OPTIONS_INVALID); // parse hcom parallel options_.hcom_parallel = false; ret = ParseOption(options, HCOM_PARALLEL, options_.hcom_parallel); GE_IF_BOOL_EXEC(ret != SUCCESS, - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.hcomParallel value is invalid, must be 0 or 1."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][Option] Key:ge.hcomParallel value is invalid, " + "must be 0 or 1."); return GE_GRAPH_OPTIONS_INVALID); // net output node dataType ParseOption(options, OUTPUT_DATATYPE, options_.output_datatype); @@ -1831,10 +1873,9 @@ Status GraphManager::ParseOption(const std::map &optio } else if (flag == "1") { option = true; } else { - REPORT_INNER_ERROR("E19999", "Option:%s value:%s must be 0 or 1, check invalid", - key.c_str(), flag.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", key.c_str(), - flag.c_str()); + REPORT_INNER_ERROR("E19999", "Option:%s value:%s must be 0 or 1, check invalid", key.c_str(), flag.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] Key:%s, its value %s is invalid, it must be 0 or 1.", + key.c_str(), flag.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } @@ -1851,8 +1892,8 @@ Status GraphManager::ParseOption(const std::map &optio if (ptr != nullptr && *ptr != '\0') { REPORT_INNER_ERROR("E19999", "Option:%s value:%s must be int32_t type, check invalid", key.c_str(), iter->second.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, must be int32_t type.", key.c_str(), - iter->second.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] Key:%s, its value %s is invalid, must be int32_t type.", + key.c_str(), iter->second.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } @@ -1896,10 +1937,8 @@ Status GraphManager::ParseOption(const std::map &optio if (pos == string::npos) { REPORT_INNER_ERROR("E19999", "Option:%s, value:%s, engine and num must be connected by :, check invalid", key.c_str(), engine_parallel.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, - "engine and num must be connected by :, " - "while your input is %s", - engine_parallel.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] engine and num must be connected by :, " + "while your input is %s", engine_parallel.c_str()); return GE_GRAPH_OPTIONS_INVALID; } std::string engine_name = engine_parallel.substr(0, pos); @@ -1909,14 +1948,14 @@ Status GraphManager::ParseOption(const std::map &optio Status ret = CheckEngineName(engine_name, key, option); if (ret != SUCCESS) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "check engine name : %s failed, ", engine_name.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][EngineName] %s failed", engine_name.c_str()); return GE_GRAPH_OPTIONS_INVALID; } int num = 0; ret = ParseParallelNum(parallel_num, key, num); if (ret != SUCCESS) { - GELOGE(GE_GRAPH_OPTIONS_INVALID, "parse parallel num failed"); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][ParallelNum] %s failed", parallel_num.c_str()); return GE_GRAPH_OPTIONS_INVALID; } @@ -1931,7 +1970,7 @@ Status GraphManager::CheckEngineName(const std::string &engine_name, const std:: if (engine_name.empty()) { REPORT_INNER_ERROR("E19999", "Option:%s, param engine_name:%s is empty, check invalid", key.c_str(), engine_name.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "engine name of %s is empty", key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] engine name of %s is empty", key.c_str()); return GE_GRAPH_OPTIONS_INVALID; } // judge whether exist in engine list @@ -1943,7 +1982,7 @@ Status GraphManager::CheckEngineName(const std::string &engine_name, const std:: if (it_stream_repeat != option.end()) { REPORT_INNER_ERROR("E19999", "Option:%s, param engine_name:%s is repeated, check invalid", key.c_str(), engine_name.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "engine : %s of %s is repeated", engine_name.c_str(), key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] engine:%s of %s is repeated", engine_name.c_str(), key.c_str()); return GE_GRAPH_OPTIONS_INVALID; } return SUCCESS; @@ -1953,14 +1992,15 @@ Status GraphManager::ParseParallelNum(const std::string ¶llel_num, const std if (parallel_num.empty()) { REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is empty, check invalid", key.c_str(), parallel_num.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num of %s is empty", key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] parallel num of %s is empty", key.c_str()); return GE_GRAPH_OPTIONS_INVALID; } for (char c : parallel_num) { if (!isdigit(c)) { REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is not digit, check invalid", key.c_str(), parallel_num.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "%s input is invalid ", key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] Option:%s, param parallel num:%s is not digit, check invalid", + key.c_str(), parallel_num.c_str()); return GE_GRAPH_OPTIONS_INVALID; } } @@ -1970,24 +2010,28 @@ Status GraphManager::ParseParallelNum(const std::string ¶llel_num, const std } catch (std::invalid_argument &) { REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is invalid argument, check", key.c_str(), parallel_num.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s is invalid argument", parallel_num.c_str(), key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] parallel num:%s of %s is invalid argument", + parallel_num.c_str(), key.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (std::out_of_range &) { REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is out of range, check", key.c_str(), parallel_num.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s is out of range", parallel_num.c_str(), key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] parallel num:%s of %s is out of range", + parallel_num.c_str(), key.c_str()); return GE_GRAPH_OPTIONS_INVALID; } catch (...) { REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is invalid argument, check", key.c_str(), parallel_num.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s is invalid argument", parallel_num.c_str(), key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] parallel num:%s of %s is invalid argument", + parallel_num.c_str(), key.c_str()); return GE_GRAPH_OPTIONS_INVALID; } if (num < 1) { REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s < 1, check invalid", key.c_str(), parallel_num.c_str()); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s must bigger than 0", parallel_num.c_str(), key.c_str()); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] parallel num:%s of %s must bigger than 0", + parallel_num.c_str(), key.c_str()); return GE_GRAPH_OPTIONS_INVALID; } return SUCCESS; @@ -2014,9 +2058,8 @@ Status GraphManager::GetGraphNode(const GraphId &graph_id, GraphNodePtr &out) { auto iter = graph_map_.find(graph_id); if (iter == graph_map_.end()) { out = nullptr; - REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid", - graph_id); - GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] graph not exist, graph_id= %u.", graph_id); + REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid", graph_id); + GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[Check][Param] graph not exist, graph_id= %u.", graph_id); return GE_GRAPH_GRAPH_NOT_EXIST; } out = iter->second; @@ -2037,7 +2080,7 @@ Status GraphManager::SummaryHandle(const GraphId &graph_id, std::vector &summary_output_indexes = whole_summary_output_indexes.at(graph_id); @@ -2082,9 +2125,8 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap } } if (netoutput == nullptr) { - REPORT_INNER_ERROR("E19999", "No netoutput node in graph:%u, check invalid", - graph_id); - GELOGE(FAILED, "Netoutput is null."); + REPORT_INNER_ERROR("E19999", "No netoutput node in graph:%u, check invalid", graph_id); + GELOGE(FAILED, "[Check][Param] No netoutput node in graph:%u", graph_id); return FAILED; } for (const auto &in : netoutput->GetAllInDataAnchors()) { @@ -2092,9 +2134,9 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap auto out_anchor = in->GetPeerOutAnchor(); if (out_anchor == nullptr) { REPORT_INNER_ERROR("E19999", "Peer anchor of op:%s(%s), in_index:%u is nullptr, graph_id:%u, check invalid", - netoutput->GetName().c_str(), netoutput->GetType().c_str(), - in->GetIdx(), graph_id); - GELOGE(FAILED, "out_anchor is null."); + netoutput->GetName().c_str(), netoutput->GetType().c_str(), in->GetIdx(), graph_id); + GELOGE(FAILED, "[Get][PeerOutAnchor] Peer anchor of op:%s(%s), in_index:%u is nullptr, graph_id:%u", + netoutput->GetName().c_str(), netoutput->GetType().c_str(), in->GetIdx(), graph_id); return FAILED; } ge::NodePtr peer_node = out_anchor->GetOwnerNode(); @@ -2103,7 +2145,8 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap if (peer_node->GetAllInDataAnchors().size() != 1) { REPORT_INNER_ERROR("E19999", "More than one prior nodes of peer_node:%s(%s) in checkpoint Graph:%u, " "check invalid", peer_node->GetName().c_str(), peer_node->GetType().c_str(), graph_id); - GELOGE(FAILED, "More than one prior nodes of peer_node %s in checkpoint Graph.", peer_node->GetName().c_str()); + GELOGE(FAILED, "[Check][Param] More than one prior nodes of peer_node:%s(%s) in checkpoint Graph:%u.", + peer_node->GetName().c_str(), peer_node->GetType().c_str(), graph_id); return FAILED; } auto peer_node_in = peer_node->GetAllInDataAnchors().at(0); @@ -2117,9 +2160,9 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap } if (peer_node == nullptr) { REPORT_INNER_ERROR("E19999", "Peer anchor node of op:%s(%s), in_index:%u is nullptr, graph_id:%u, check invalid", - netoutput->GetName().c_str(), netoutput->GetType().c_str(), - in->GetIdx(), graph_id); - GELOGE(FAILED, "No variable op found in one branch, checkpoint graph illegal."); + netoutput->GetName().c_str(), netoutput->GetType().c_str(), in->GetIdx(), graph_id); + GELOGE(FAILED, "[Check][Param] Peer anchor node of op:%s(%s), in_index:%u is nullptr, graph_id:%u", + netoutput->GetName().c_str(), netoutput->GetType().c_str(), in->GetIdx(), graph_id); return FAILED; } desc_name = peer_node->GetName(); @@ -2128,7 +2171,8 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap REPORT_INNER_ERROR("E19999", "in index:%u of op:%s(%s) is out of outputs.size:%zu range, graph_id:%u, " "check invalid", in->GetIdx(), netoutput->GetName().c_str(), netoutput->GetType().c_str(), outputs.size(), graph_id); - GELOGE(FAILED, "variable index out of range."); + GELOGE(FAILED, "[Check][Param] in index:%u of op:%s(%s) is out of outputs.size:%zu range, graph_id:%u", + in->GetIdx(), netoutput->GetName().c_str(), netoutput->GetType().c_str(), outputs.size(), graph_id); return FAILED; } save_results.emplace(desc_name, TensorAdapter::AsTensor(outputs.at(in->GetIdx()))); @@ -2174,9 +2218,8 @@ Status GraphManager::PushSummaryData2ME(const GraphId &graph_id, } return iter->second(graph_id, tmp_summary_data); } - REPORT_INNER_ERROR("E19999", "No summary callback found, graph_id:%u, check invalid", - graph_id); - GELOGE(FAILED, "[GraphManager] PushSummaryData2ME failed, not found summary callback."); + REPORT_INNER_ERROR("E19999", "No summary callback found, graph_id:%u, check invalid", graph_id); + GELOGE(FAILED, "[Check][Param] No summary callback found, graph_id:%u", graph_id); return FAILED; } return itr->second(graph_id, summary_data); @@ -2196,9 +2239,8 @@ Status GraphManager::PushSaveData2ME(const GraphId &graph_id, const std::mapsecond(graph_id, tmp_save_data); } - REPORT_INNER_ERROR("E19999", "No checkpoint callback found, graph_id:%u, check invalid", - graph_id); - GELOGE(FAILED, "[GraphManager] PushSaveData2ME failed, not found checkpoint callback."); + REPORT_INNER_ERROR("E19999", "No checkpoint callback found, graph_id:%u, check invalid", graph_id); + GELOGE(FAILED, "[Check][Param] No checkpoint callback found, graph_id:%u", graph_id); return FAILED; } return itr->second(graph_id, save_data); @@ -2228,7 +2270,8 @@ bool GraphManager::CheckVariableForCheckpointGraph(NodePtr &node) { if (out == nullptr) { REPORT_INNER_ERROR("E19999", "anchor index:0 of op:%s(%s) is nullptr, check invalid", node->GetName().c_str(), node->GetType().c_str()); - GELOGE(GE_GRAPH_PARAM_NULLPTR, "out is nullptr."); + GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Get][OutDataAnchor] anchor index:0 of op:%s(%s) is nullptr", + node->GetName().c_str(), node->GetType().c_str()); return false; } auto peer_out = out->GetPeerInDataAnchors(); @@ -2261,7 +2304,7 @@ static inline bool CheckConstanOpForCheckpointGraph(NodePtr &node) { return node bool GraphManager::IsCheckpointGraph(ComputeGraphPtr &compute_graph) { if (compute_graph == nullptr) { REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid"); - GELOGE(GE_GRAPH_PARAM_NULLPTR, "[IsCheckpointGraph] computeGraph is nullptr."); + GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] computeGraph is nullptr."); return false; } for (auto &node : compute_graph->GetAllNodes()) { @@ -2395,9 +2438,8 @@ Status GraphManager::RemoveIsolatedConstInThisGraph(ge::ComputeGraphPtr &compute if (n->GetOutAllNodes().empty() && n->GetInAllNodes().empty()) { // it is an isolated constant, just remove it if (GraphUtils::RemoveJustNode(compute_graph, n) != GRAPH_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Remove constant op:%s(%s) failed", - n->GetName().c_str(), n->GetType().c_str()); - GELOGE(FAILED, "remove constant %s failed.", n->GetName().c_str()); + REPORT_CALL_ERROR("E19999", "Remove constant op:%s(%s) failed", n->GetName().c_str(), n->GetType().c_str()); + GELOGE(FAILED, "[Call][RemoveJustNode] remove constant %s failed.", n->GetName().c_str()); return FAILED; } } @@ -2476,7 +2518,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { auto ret = after_merge_passes.Run(compute_graph); GE_TIMESTAMP_END(after_merge_passes, "GraphManager::OptimizeStage1_1"); if (ret != SUCCESS && ret != NOT_CHANGED) { - GELOGE(ret, "Run passes when OptimizeStage1_1 failed, ret:%u.", ret); + GELOGE(ret, "[Run][Passes] when OptimizeStage1_1 failed, ret:%u.", ret); return ret; } @@ -2515,7 +2557,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphManager::OptimizeStage1_2"); if (ret != SUCCESS) { - GELOGE(ret, "Run passes when OptimizeStage1_2 failed, ret:%u.", ret); + GELOGE(ret, "[Run][Passes] when OptimizeStage1_2 failed, ret:%u.", ret); return ret; } // Calculate Op/Fe constantfolding cost @@ -2538,8 +2580,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::Migration", new (std::nothrow) SubgraphConstMigrationPass)); GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ArgsClean", new (std::nothrow) UnusedArgsCleanPass)); GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::PrunePass", new (std::nothrow) PrunePass)); - auto mark_force_unknown_pass = new (std::nothrow) MarkForceUnknownForCondPass; - GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::MarkForceUnknownForCondPass", mark_force_unknown_pass)); GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::NextIterationPass", new (std::nothrow) NextIterationPass)) GE_CHK_STATUS_RET(graph_pass.AddPass("OptimizeStage1_3::ControlTriggerPass", new (std::nothrow) ControlTriggerPass)) GE_CHK_STATUS_RET( @@ -2573,7 +2613,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { ret = graph_pass.Run(compute_graph); GE_TIMESTAMP_END(graph_pass, "GraphManager::OptimizeStage1_3"); if (ret != SUCCESS && ret != NOT_CHANGED) { - GELOGE(ret, "Run passes when OptimizeStage1_3 failed, ret:%u.", ret); + GELOGE(ret, "[Run][Passes] when OptimizeStage1_3 failed, ret:%u.", ret); return ret; } NamesToPass node_pass; @@ -2583,7 +2623,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { ret = GEPass(compute_graph).Run(node_pass); GE_TIMESTAMP_END(node_pass, "GraphPrepare::node_pass"); if (ret != SUCCESS) { - GELOGE(ret, "Run identity remove pass for preprocess failed, ret:%u.", ret); + GELOGE(ret, "[Run][Identity] remove pass for preprocess failed, ret:%u.", ret); return ret; } return SUCCESS; @@ -2603,7 +2643,7 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { auto ret = after_merge_passes.Run(compute_graph); GE_TIMESTAMP_END(after_merge_passes, "OptimizeStage2::AfterMergePasses"); if (ret != SUCCESS && ret != NOT_CHANGED) { - GELOGE(ret, "Run passes after merge sub graph failed, ret:%d.", ret); + GELOGE(ret, "[Run][Passes] after merge sub graph failed, ret:%d.", ret); return ret; } SetAttrForHcomBroadCastOp(compute_graph); @@ -2627,13 +2667,13 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); if (ret != SUCCESS) { - GELOGE(ret, "Run ge_passes optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret); + GELOGE(ret, "[Run][GEPasses] optimize for OptimizeAfterMergeSubGraph failed, ret:%d.", ret); return ret; } ret = RemoveIsolatedConst(compute_graph); if (ret != SUCCESS) { - GELOGE(ret, "Remove isolated Constant failed, ret:%d.", ret); + GELOGE(ret, "[Remove][IsolatedConst] failed, ret:%d.", ret); return ret; } @@ -2684,32 +2724,33 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { ret = pass_for_control_attr_optimize.Run(compute_graph); GE_TIMESTAMP_END(pass_for_control_attr_optimize, "OptimizeStage2::ControlAttrOptimize"); if (ret != SUCCESS && ret != NOT_CHANGED) { - GELOGE(ret, "Run passes when optimize stage 2 failed"); + GELOGE(ret, "[Run][Passes] when optimize stage 2 failed"); return ret; } // Assign functional op labels. GE_TIMESTAMP_START(AssignFunctionalLabels); LabelAllocator label_allocator(compute_graph); - GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(), "Assign label failed."); + GE_CHK_STATUS_RET(label_allocator.AssignFunctionalLabels(), "[Assign][Label] failed."); GE_TIMESTAMP_END(AssignFunctionalLabels, "ModelBuilder::AssignFunctionalLabels"); // Add memcpy addr asynchronous node. GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); MemcpyAddrAsyncPass memcpy_addr; - GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph), "Add memcpy_addr_async node failed."); + GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph), "[Call][Run] Add memcpy_addr_async node failed."); GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); // Process offset and dependency for buffer pool memory assigner. GE_TIMESTAMP_START(BufferPoolMemoryPass); BufferPoolMemoryPass buffer_pool_mem_pass; - GE_CHK_STATUS_RET(buffer_pool_mem_pass.Run(compute_graph), "Failed to process for buffer pool allocator."); + GE_CHK_STATUS_RET(buffer_pool_mem_pass.Run(compute_graph), + "[Call][Run] Failed to process for buffer pool allocator."); GE_TIMESTAMP_END(BufferPoolMemoryPass, "BufferPoolMemoryPass::Run."); // Handle parallel group . GE_TIMESTAMP_START(ParallelGroup); ParallelGroupPass parallel_group_pass; - GE_CHK_STATUS_RET(parallel_group_pass.Run(compute_graph), "Handle parallel group failed."); + GE_CHK_STATUS_RET(parallel_group_pass.Run(compute_graph), "[Handle][ParallelGroup] failed."); GE_TIMESTAMP_END(ParallelGroup, "ParallelGroupPass::Run."); // After while sub graph handle, mark all node rw type @@ -2762,7 +2803,7 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, listener); GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync"); if (ret != SUCCESS) { - GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed"); + GELOGE(ret, "[Load][ModelOnline] Failed, model_id:%u", model_id_info.model_id); graph_node->SetRunFlag(false); return ret; } @@ -2777,9 +2818,8 @@ void GraphManager::ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph const std::vector &model_ids, uint32_t graph_id, uint64_t session_id) { rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, when GraphManager %s", - GetContext().DeviceId(), __FUNCTION__); - GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, graphId=%u.", graph_id); + REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u", GetContext().DeviceId()); + GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, device_id=%u.", GetContext().DeviceId()); return; } for (auto model_id : model_ids) { @@ -2821,9 +2861,8 @@ void GraphManager::ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph ge_root_model->ClearAllModelId(); rt_ret = rtDeviceReset(GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, when GraphManager %s", - GetContext().DeviceId(), __FUNCTION__); - GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id); + REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u", GetContext().DeviceId()); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u.", GetContext().DeviceId()); return; } } @@ -2851,7 +2890,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra if (ge::CheckInt64AddOverflow(memory_size, weight_size) != SUCCESS) { REPORT_INNER_ERROR("E19999", "memory_size:%ld and weight_size:%ld will overflow after add, check invalid", memory_size, weight_size); - GELOGE(INTERNAL_ERROR, "The sum of Memory size and weight size exceeds INT64_MAX"); + GELOGE(INTERNAL_ERROR, "[Check][Param] memory_size:%ld and weight_size:%ld will overflow after add", + memory_size, weight_size); return INTERNAL_ERROR; } if (free_memory >= (memory_size + weight_size)) { @@ -2911,23 +2951,25 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); GE_CHECK_NOTNULL(compute_graph_tmp); if (!AttrUtils::SetInt(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_ID, root_graph_id)) { - REPORT_CALL_ERROR("E19999", "Set Attr:%s to graph:%u", ATTR_NAME_ROOT_GRAPH_ID.c_str(), + REPORT_CALL_ERROR("E19999", "Set Attr:%s to graph:%u failed", ATTR_NAME_ROOT_GRAPH_ID.c_str(), compute_graph_tmp->GetGraphID()); - GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); + GELOGE(FAILED, "[Set][Attr] %s to graph:%u failed", ATTR_NAME_ROOT_GRAPH_ID.c_str(), + compute_graph_tmp->GetGraphID()); return FAILED; } if (!AttrUtils::SetStr(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_NAME, root_graph_name)) { - REPORT_CALL_ERROR("E19999", "Set Attr:%s to graph:%u", ATTR_NAME_ROOT_GRAPH_NAME.c_str(), + REPORT_CALL_ERROR("E19999", "Set Attr:%s to graph:%u failed", ATTR_NAME_ROOT_GRAPH_NAME.c_str(), compute_graph_tmp->GetGraphID()); - GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_NAME for subgraph, \ - root_graph_name: %s.", root_graph_name.c_str()); + GELOGE(FAILED, "[Set][Attr] %s to graph:%u failed", ATTR_NAME_ROOT_GRAPH_NAME.c_str(), + compute_graph_tmp->GetGraphID()); return FAILED; } compute_graph_tmp->SetSessionID(session_id); Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, engine_name); if (ret != SUCCESS) { - GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); + GELOGE(ret, "[Optimize][SubGraph] Failed, engine:%s, graph:%s", + engine_name.c_str(), compute_graph_tmp->GetName().c_str()); return ret; } else { GELOGD("SubGraph optimize success %s", engine_name.c_str()); @@ -2939,7 +2981,7 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager pthread_self()); } else { REPORT_INNER_ERROR("E19999", "Param sub_graph_info_ptr or graph_manager is nullptr"); - GELOGE(FAILED, "graph_manager or sub_graph_info_ptr is nullptr"); + GELOGE(FAILED, "[Check][Param] graph_manager or sub_graph_info_ptr is nullptr"); return FAILED; } @@ -2956,7 +2998,7 @@ Status GraphManager::RunGraphAsync(const GraphId &graph_id, const std::vectorGetGraphId()) + " need to re-build, you should remove it" " from GE first, then AddGraph again and rebuild it."); - graph_node->Unlock(); return PARAM_INVALID; } // check need incre build. @@ -3084,7 +3125,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { // more than one graph owns same graph_id uint32_t count = 0; if (graph_manager->GetGraphCount(args.graph_id, count) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed.", args.graph_id); + GELOGE(INTERNAL_ERROR, "[Get][GraphCount] failed, graph id:%u.", args.graph_id); return; } // Avoid repeatively prerun for graphs owns same graph_id in online inference concurrency @@ -3135,12 +3176,11 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) { if (ret != SUCCESS) { graph_node->SetRunFlag(false); if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) { - ReturnError(graph_manager, args.callback, ret, "CheckIncreBuildAndPreRun Failed, thread exit.."); + GELOGE(ret, "CheckIncreBuildAndPreRun Failed, thread exit.."); graph_node->Unlock(); return; } else { - ReturnError(graph_manager, graph_node, args.callback, ret, - "CheckIncreBuildAndPreRun Failed, keep geop continue!"); + GELOGE(ret, "CheckIncreBuildAndPreRun Failed, keep geop continue!"); graph_node->Unlock(); continue; } @@ -3174,7 +3214,8 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector if (!(AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index))) { REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail", ATTR_NAME_INDEX.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, "Get index from attr failed"); + GELOGE(PARAM_INVALID, "[Get][Attr] %s from op:%s(%s) fail", ATTR_NAME_INDEX.c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } if (static_cast(index) > input_tensor.size()) { @@ -3182,7 +3223,9 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector "check invalid", ATTR_NAME_INDEX.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, input_tensor.size()); - GELOGE(PARAM_INVALID, "The count of input tensor should be equal to the count of data."); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in op:%s(%s) value:%ld > param input_tensor.size:%zu", + ATTR_NAME_INDEX.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), + index, input_tensor.size()); return PARAM_INVALID; } @@ -3212,7 +3255,7 @@ Status GraphManager::ParseInputsDims(const std::vector &input_tensor } else { // data+getnext_nosink, but only need to get shape_dims of data if (ParseInputsDimsForGetNexNosinkAndData(data_nodes, input_tensor) != SUCCESS) { - GELOGE(PARAM_INVALID, "Failed to parse dims from data, when data coexist with getnext nosink."); + GELOGE(PARAM_INVALID, "[Parse][Dims] from data failed, when data coexist with getnext nosink."); return PARAM_INVALID; } } @@ -3228,7 +3271,7 @@ Status GraphManager::ParseInputsDims(const std::vector &input_tensor } else { // getnext_nosink + data, but only need to get shape_dims of getnext_nosink if (ParseInputsDimsForGetNexNosinkAndData(getnext_nosink_nodes, input_tensor) != SUCCESS) { - GELOGE(PARAM_INVALID, "Failed to parse dims from getnext nosink, when data coexist with getnext nosink"); + GELOGE(PARAM_INVALID, "[Parse][Dims] from getnext nosink failed, when data coexist with getnext nosink"); return PARAM_INVALID; } } @@ -3335,9 +3378,8 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_ std::vector outputs; auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); if (graph_manager == nullptr || compute_graph == nullptr) { - REPORT_INNER_ERROR("E19999", "Param graph_manager or compute_graph in graph_node is nullptr, " - "check invalid"); - GELOGE(GRAPH_FAILED, "[Analyze Mode] compute graph is null!"); + REPORT_INNER_ERROR("E19999", "Param graph_manager or compute_graph in graph_node is nullptr, check invalid"); + GELOGE(GRAPH_FAILED, "[Check][Param] compute graph or graph manager is nullptr"); callback(GRAPH_FAILED, outputs); return; } @@ -3360,7 +3402,8 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_ REPORT_INNER_ERROR("E19999", "InputIndex:%zu ShapeSize:%ld of op:%s(%s) < 0, unknown shape is not support, " "check invalid", i, len, node->GetName().c_str(), node->GetType().c_str()); - GELOGE(GRAPH_FAILED, "Analyze Mode does not support GEOP output unknown shape!"); + GELOGE(GRAPH_FAILED, "[Check][Param] InputIndex:%zu ShapeSize:%ld of op:%s(%s) < 0, " + "unknown shape is not support", i, len, node->GetName().c_str(), node->GetType().c_str()); callback(GRAPH_FAILED, outputs); return; } else if (len == 0) { @@ -3370,8 +3413,8 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_ auto length = GetSizeInBytes(len, input_desc->GetDataType()); auto aligned_ptr = MakeShared(length, kAlignment); if (aligned_ptr == nullptr) { - REPORT_INNER_ERROR("E19999", "Aligned_ptr is nullptr"); - GELOGE(GRAPH_FAILED, "[Analyze Mode] Aligned_ptr is nullptr"); + REPORT_CALL_ERROR("E19999", "New AlignedPtr failed, len:%ld", length); + GELOGE(GRAPH_FAILED, "[Create][AlignedPtr] failed, len:%ld", length); return; } ge_tensor.SetData(aligned_ptr, length); @@ -3393,16 +3436,14 @@ bool GraphManager::IsGraphNeedRebuild(uint32_t graph_id) { GraphNodePtr graph_node = nullptr; Status ret = GetGraphNode(graph_id, graph_node); if (ret != SUCCESS) { - REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid", - graph_id); - GELOGE(ret, "[RunGraph] graph not exist, graph_id=%u.", graph_id); + REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid", graph_id); + GELOGE(ret, "[Get][GraphNode] failed, graph not exist, graph_id:%u.", graph_id); return true; } if (graph_node == nullptr) { - REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid", - graph_id); - GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] graph node is NULL, graphId=%u.", graph_id); + REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid", graph_id); + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[Check][Param] graph node is NULL, graph_id:%u.", graph_id); return true; } @@ -3416,16 +3457,14 @@ const map *GraphManager::GetGraphOptions(uint32_t grap GraphNodePtr graph_node = nullptr; Status ret = GetGraphNode(graph_id, graph_node); if (ret != SUCCESS) { - REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid", - graph_id); - GELOGE(ret, "[RunGraph] graph not exist, graph_id=%u.", graph_id); + REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid", graph_id); + GELOGE(ret, "[Get][GraphNode] failed, graph not exist, graph_id:%u.", graph_id); return nullptr; } if (!graph_node) { - REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid", - graph_id); - GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] graph node is NULL, graph_id=%u.", graph_id); + REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid", graph_id); + GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[Check][Param] graph node is NULL, graph_id:%u.", graph_id); return nullptr; } return &(graph_node->GetOptions()); @@ -3441,7 +3480,7 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra StagePartitioner stage_partitioner(compute_graph); auto ret = stage_partitioner.Partition(); if (ret != SUCCESS) { - GELOGE(ret, "Graph partition by stage Failed"); + GELOGE(ret, "[Call][Partition] for Graph:%s by stage Failed", compute_graph->GetName().c_str()); return ret; } GE_TIMESTAMP_EVENT_END(StagePartition, "OptimizeSubgraph::StagePartition"); @@ -3450,14 +3489,16 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra DynamicShapePartitioner dynamic_shape_partitioner(compute_graph); ret = dynamic_shape_partitioner.Partition(); if (ret != SUCCESS) { - GELOGE(ret, "Graph partition by dynamic shape Failed"); + GELOGE(ret, "[Call][Partition] for Graph:%s by dynamic shape Failed", compute_graph->GetName().c_str()); return ret; } bool dynamic_shape_partitioned = false; if (!AttrUtils::GetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { - REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%u fail", - ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); - GELOGE(FAILED, "failed get dynamic shape partitioned flag on partitioned graph."); + REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%s(id:%u) fail", + ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetName().c_str(), + compute_graph->GetGraphID()); + GELOGE(FAILED, "[Get][Attr] %s from graph:%u failed", + ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); return FAILED; } GE_TIMESTAMP_EVENT_END(GraphPartitionDynamicShape, "OptimizeSubgraph::GraphPartitionDynamicShape"); @@ -3466,14 +3507,14 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra GraphPartitioner &partitioner = GetCompilerStages(graph_node->GetGraphId()).partitioner; ret = partitioner.Partition(compute_graph, GraphPartitioner::kPartitioning); if (ret != SUCCESS) { - GELOGE(ret, "Graph partition Failed"); + GELOGE(ret, "[Call][Partition] for Graph:%s Failed", compute_graph->GetName().c_str()); return ret; } GE_TIMESTAMP_EVENT_END(GraphPartition, "OptimizeSubgraph::Partition1"); GE_TIMESTAMP_START(SetSubgraph); ret = SetSubgraph(session_id, compute_graph, partitioner); if (ret != SUCCESS) { - GELOGE(ret, "Graph set subgraph Failed"); + GELOGE(ret, "[Set][Subgraph] failed for graph:%s, session_id:%lu", compute_graph->GetName().c_str(), session_id); return ret; } GE_TIMESTAMP_EVENT_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph"); @@ -3486,7 +3527,7 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra Status ret = ConvertGraphToFile(compute_graph, partitioner, tuning_path, (options_.build_step == BUILD_STEP_AFTER_BUILDER)); if (ret != SUCCESS) { - GELOGE(ret, "Convert graph[%s] to file failed", compute_graph->GetName().c_str()); + GELOGE(ret, "[Convert][Graph] [%s] to file failed", compute_graph->GetName().c_str()); return ret; } GE_TIMESTAMP_EVENT_END(ConvertGraphToFile, "OptimizeSubgraph::ConvertGraphToFile"); @@ -3499,7 +3540,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra GE_TIMESTAMP_START(MergeSubgraph); ret = MergeSubGraph(merged_compute_graph, compute_graph, graph_node->GetGraphId()); if (ret != SUCCESS) { - GELOGE(ret, "Merge SubGraph Failed"); + GELOGE(ret, "[Merge][SubGraph] Failed, graph:%s(id:%u)", + compute_graph->GetName().c_str(), graph_node->GetGraphId()); return ret; } GE_CHECK_NOTNULL(merged_compute_graph); @@ -3516,8 +3558,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail", ATTR_NAME_OFF_SUPERKERNEL_ATTR.c_str(), compute_graph->GetGraphID()); - GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(), - off_superkernel); + GELOGE(FAILED, "[Set][Attr] %s to graph:%u fail", + ATTR_NAME_OFF_SUPERKERNEL_ATTR.c_str(), compute_graph->GetGraphID()); return FAILED; } } @@ -3527,7 +3569,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra if (!AttrUtils::SetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) { REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail", ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); - GELOGE(FAILED, "failed set dynamic shape partitioned flag on partitioned graph."); + GELOGE(FAILED, "[Set][Attr] %s to graph:%u fail", + ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID()); return FAILED; } return SUCCESS; @@ -3587,7 +3630,7 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id); if (ret != SUCCESS) { - GELOGE(ret, "SubGraph build Failed."); + GELOGE(ret, "[Call][Build] failed, session_id:%lu.", session_id); return ret; } @@ -3625,7 +3668,7 @@ Status GraphManager::GenCheckPointGraph(const std::mapAddOutputDesc(iter->second)); NodePtr var_node = compute_graph->AddNode(var_desc); GE_CHK_STATUS(GraphUtils::AddEdge(var_node->GetOutDataAnchor(0), save_node->GetInDataAnchor(index)), - "Add edge[%s->%s] fail.", var_node->GetName().c_str(), save_node->GetName().c_str()); + "[Add][Edge][%s->%s] fail.", var_node->GetName().c_str(), save_node->GetName().c_str()); index++; } compute_graph->Dump(); @@ -3636,13 +3679,12 @@ Status GraphManager::GenCheckPointGraph(const std::map &var_names, const std::vector &outputs, std::vector &var_values) { map var_results; - GE_CHK_STATUS_RET(SaveCheckPointResult(graph, outputs, var_results), "Save check point result failed."); + GE_CHK_STATUS_RET(SaveCheckPointResult(graph, outputs, var_results), "[Save][CheckPointResult] failed."); if (!var_names.empty()) { for (const auto &var_name : var_names) { if (var_results.count(var_name) == 0) { - REPORT_INNER_ERROR("E19999", "Fetch Var:%s result value fail", - var_name.c_str()); - GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str()); + REPORT_INNER_ERROR("E19999", "Fetch Var:%s result value fail", var_name.c_str()); + GELOGE(FAILED, "[Check][Param] Fetch var[%s] value failed.", var_name.c_str()); return FAILED; } else { auto var_tensor = var_results[var_name].GetTensorDesc(); @@ -3683,7 +3725,8 @@ Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector< REPORT_INNER_ERROR("E19999", "peer node:%s(%s) of netoutput has more than 1 input in checkpoint Graph, " "check invalid", peer_node->GetName().c_str(), peer_node->GetType().c_str()); - GELOGE(FAILED, "peer_node [%s] has more than 1 input in checkpoint Graph.", peer_node->GetName().c_str()); + GELOGE(FAILED, "[Check][Param] peer_node [%s] has more than 1 input in checkpoint Graph.", + peer_node->GetName().c_str()); return FAILED; } auto peer_node_in_anchor = peer_node->GetAllInDataAnchors().at(0); @@ -3699,7 +3742,7 @@ Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector< REPORT_INNER_ERROR("E19999", "peer node:%s(%s) of netoutput is not variable in checkpoint Graph, " "check invalid", peer_node->GetName().c_str(), peer_node->GetType().c_str()); - GELOGE(FAILED, " peer_node %s is not variable in checkpoint Graph.", peer_node->GetName().c_str()); + GELOGE(FAILED, "[Check][Param] peer_node %s is not variable in checkpoint Graph.", peer_node->GetName().c_str()); return FAILED; } auto var_name = peer_node->GetName(); @@ -3707,7 +3750,7 @@ Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector< if (in->GetIdx() >= static_cast(outputs.size())) { REPORT_INNER_ERROR("E19999", "In index:%u of netoutput is out of outputs.size:%zu range in checkpoint Graph, " "check invalid", in->GetIdx(), outputs.size()); - GELOGE(FAILED, "variable index[%d] out of range[%zu].", in->GetIdx(), outputs.size()); + GELOGE(FAILED, "[Check][Param] variable index[%d] out of range[%zu].", in->GetIdx(), outputs.size()); return FAILED; } var_results.emplace(var_name, outputs.at(in->GetIdx())); diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index 36c1143f..945a5e5d 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -427,6 +427,8 @@ class GraphManager { void SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id); + Status ModifyDataIndex(const Graph &graph, const std::map &graph_option); + static Status CheckGraphAdded(const GraphId &graph_id, const Graph &graph); std::atomic_bool thread_run_flag_; diff --git a/ge/graph/manager/graph_manager_utils.cc b/ge/graph/manager/graph_manager_utils.cc index 0f93654c..a70b15a6 100644 --- a/ge/graph/manager/graph_manager_utils.cc +++ b/ge/graph/manager/graph_manager_utils.cc @@ -46,7 +46,7 @@ GraphNode::GraphNode(GraphId graph_id) sem_(1) { graph_run_async_listener_ = MakeShared(); if (graph_run_async_listener_ == nullptr) { - GELOGE(MEMALLOC_FAILED, "Make shared failed"); + GELOGE(MEMALLOC_FAILED, "[New][RunAsyncListener] failed"); } } @@ -82,7 +82,8 @@ SubGraphInfo::~SubGraphInfo() { rt_ret = rtFreeHost(buffer_addr); buffer_addr = nullptr; if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "[GraphManager] subgraph free buffer failed, modelId = %u", model_id_info_.model_id); + GELOGE(rt_ret, "[Call][RtFreeHost] subgraph free buffer failed, modelId = %u", + model_id_info_.model_id); } } } @@ -94,8 +95,8 @@ Status SubGraphInfo::FreeInOutBuffer() { rtError_t rt_ret; rt_ret = rtFreeHost(*iter); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtFreeHost fail"); - GELOGE(rt_ret, "[GraphManager] subgraph free buffer failed, modelId = %u", model_id_info_.model_id); + REPORT_CALL_ERROR("E19999", "Call rtFreeHost fail, ret:%d", rt_ret); + GELOGE(rt_ret, "[Call][RtFreeHost] subgraph free buffer failed, modelId = %u", model_id_info_.model_id); buffer_addr_.erase(buffer_addr_.begin(), iter); return GE_GRAPH_FREE_FAILED; } @@ -131,7 +132,7 @@ Status GraphModelListener::OnComputeDone(uint32_t model_id, uint32_t task_id, ui uint32_t GraphModelListener::GetResultCode() const { if (!is_finished_) { REPORT_CALL_ERROR("E19999", "Model not run finish"); - GELOGE(INTERNAL_ERROR, "[GraphManager] model not run finish."); + GELOGE(INTERNAL_ERROR, "[Check][Param] model not run finish."); return INTERNAL_ERROR; } return result_code_; @@ -170,7 +171,9 @@ bool HasCalcOp(const ComputeGraphPtr &graph) { for (const auto &node : graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(FAILED, "Node GetOpDesc is nullptr"); return false); + GE_IF_BOOL_EXEC(op_desc == nullptr, + REPORT_INNER_ERROR("E19999", "GetOpDesc failed, Node GetOpDesc is nullptr"); + GELOGE(FAILED, "[Get][OpDesc] failed, Node GetOpDesc is nullptr"); return false); if (calc_op_type.find(op_desc->GetType()) != calc_op_type.end()) { return true; } diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index 24e75356..dd38274e 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -17,11 +17,9 @@ #include "graph/manager/graph_mem_allocator.h" #include -#include "graph/manager/graph_caching_allocator.h" -#include "graph/manager/rdma_pool_allocator.h" -#include "graph/manager/host_mem_allocator.h" + namespace ge { -void MemoryAllocator::Initialize(uint32_t device_id) { +Status MemoryAllocator::Initialize(uint32_t device_id) { GELOGI("MemoryAllocator::Initialize"); // when redo Initialize free memory @@ -31,6 +29,7 @@ void MemoryAllocator::Initialize(uint32_t device_id) { } } memory_base_map_.clear(); + return SUCCESS; } void MemoryAllocator::Finalize(uint32_t device_id) { @@ -51,9 +50,7 @@ uint8_t *MemoryAllocator::MallocMemory(const string &purpose, size_t memory_size if (rtMalloc(reinterpret_cast(&memory_addr), memory_size, memory_type_) != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, purpose:%s, size:%zu, device_id:%u", purpose.c_str(), memory_size, device_id); - GELOGE(ge::INTERNAL_ERROR, - "MemoryAllocator::MallocMemory device_id = %u," - " size= %lu", + GELOGE(ge::INTERNAL_ERROR, "[Malloc][Memory] failed, device_id = %u, size= %lu", device_id, memory_size); return nullptr; @@ -69,7 +66,7 @@ Status MemoryAllocator::FreeMemory(uint8_t *memory_addr, uint32_t device_id) con auto rtRet = rtFree(memory_addr); if (rtRet != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtFree fail, device_id:%u", device_id); - GELOGE(rtRet, "MemoryAllocator::MallocMemory device_id = %u", device_id); + GELOGE(rtRet, "[Call][RtFree] failed, device_id = %u", device_id); return RT_ERROR_TO_GE_STATUS(rtRet); } memory_addr = nullptr; @@ -89,10 +86,8 @@ uint8_t *MemoryAllocator::MallocMemory(const string &purpose, const string &memo if (memory_addr == nullptr) { REPORT_CALL_ERROR("E19999", "Malloc Memory fail, purpose:%s, memory_key:%s, memory_size:%zu, device_id:%u", purpose.c_str(), memory_key.c_str(), memory_size, device_id); - GELOGE(ge::INTERNAL_ERROR, - "MemoryAllocator::MallocMemory failed," - " memory_key[%s], size = %lu.", - memory_key.c_str(), memory_size); + GELOGE(ge::INTERNAL_ERROR, "[Malloc][Memory] failed, memory_key[%s], size = %lu, device_id:%u.", + memory_key.c_str(), memory_size, device_id); return nullptr; } @@ -127,10 +122,8 @@ Status MemoryAllocator::FreeMemory(const string &memory_key, uint32_t device_id) if (FreeMemory(it->second.memory_addr_, device_id) != ge::SUCCESS) { REPORT_CALL_ERROR("E19999", "Free Memory fail, memory_key:%s, device_id:%u", memory_key.c_str(), device_id); - GELOGE(ge::INTERNAL_ERROR, - "MemoryAllocator::FreeMemory rtFree failed," - " memory_key[%s]", - memory_key.c_str()); + GELOGE(ge::INTERNAL_ERROR, "[Free][Memory] failed, memory_key[%s], device_id:%u", + memory_key.c_str(), device_id); return ge::INTERNAL_ERROR; } @@ -152,113 +145,4 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic return it->second.memory_addr_; } - -MemManager::MemManager() {} - -MemManager::~MemManager() { Finalize(); } - -MemManager &MemManager::Instance() { - static MemManager mem_manager; - return mem_manager; -} - -MemoryAllocator *MemManager::Instance(rtMemType_t memory_type) { return Instance().GetMemoryAllocator(memory_type); } - -Status MemManager::Initialize(const std::vector &memory_type) { - std::lock_guard lock(allocator_mutex_); - MemoryAllocator *memory_allocator = nullptr; - for (unsigned int index : memory_type) { - auto it = memory_allocator_map_.find(index); - if (it == memory_allocator_map_.end()) { - memory_allocator = new (std::nothrow) MemoryAllocator(index); - - if (memory_allocator != nullptr) { - memory_allocator_map_[index] = memory_allocator; - GELOGI("Create MemoryAllocator memory type[%u] success.", index); - } else { - REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index); - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed."); - } - } else { - memory_allocator = it->second; - } - - if (memory_allocator == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed."); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } else { - memory_allocator->Initialize(0); - } - } - - auto ret = InitAllocator(memory_type, caching_allocator_map_); - if (ret != SUCCESS) { - GELOGE(ret, "Create CachingAllocator failed."); - return ret; - } - - ret = InitAllocator(memory_type, rdma_allocator_map_); - if (ret != SUCCESS) { - GELOGE(ret, "Create RdmaAllocator failed."); - return ret; - } - - ret = InitAllocator(memory_type, host_allocator_map_); - if (ret != SUCCESS) { - GELOGE(ret, "Create HostMemAllocator failed."); - return ret; - } - return SUCCESS; -} - -template -void FinalizeAllocatorMap(std::map &allocate_map) { - for (auto &allocator : allocate_map) { - if (allocator.second != nullptr) { - allocator.second->Finalize(); - delete allocator.second; - allocator.second = nullptr; - } - } - allocate_map.clear(); -} - -void MemManager::Finalize() noexcept { - GELOGI("Finalize."); - std::lock_guard lock(allocator_mutex_); - // caching and rdma allocator use memory allocator, so finalize them first - FinalizeAllocatorMap(caching_allocator_map_); - FinalizeAllocatorMap(rdma_allocator_map_); - FinalizeAllocatorMap(host_allocator_map_); - FinalizeAllocatorMap(memory_allocator_map_); -} - -MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) { - std::lock_guard lock(allocator_mutex_); - MemoryAllocator *memory_allocator = nullptr; - auto it = memory_allocator_map_.find(memory_type); - if (it != memory_allocator_map_.end()) { - memory_allocator = it->second; - } - - // Usually impossible - if (memory_allocator == nullptr) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); - static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED); - return &default_memory_allocator; - } - - return memory_allocator; -} - -CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { - return Instance().GetAllocator(memory_type, caching_allocator_map_); -} - -RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { - return Instance().GetAllocator(memory_type, rdma_allocator_map_); -} -HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { - return Instance().GetAllocator(memory_type, host_allocator_map_); -} } // namespace ge diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index 9f8b86b2..b6d73f0a 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -26,7 +26,6 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" -#include "graph/manager/host_mem_allocator.h" #include "graph/node.h" #include "runtime/mem.h" @@ -71,9 +70,9 @@ class MemoryAllocator { /// @ingroup ge_graph /// @brief memory allocator init /// @param [in] options user config params - /// @return void + /// @return Status of init /// - void Initialize(uint32_t device_id = 0); + Status Initialize(uint32_t device_id = 0); /// /// @ingroup ge_graph @@ -136,109 +135,6 @@ class MemoryAllocator { bool mem_malloced_; map memory_base_map_; }; - -using MemoryAllocatorPtr = std::shared_ptr; -class CachingAllocator; -class RdmaPoolAllocator; -class MemManager { - public: - MemManager(); - virtual ~MemManager(); - static MemManager &Instance(); - static MemoryAllocator *Instance(rtMemType_t memory_type); - CachingAllocator &CachingInstance(rtMemType_t memory_type); - RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); - HostMemAllocator &HostMemInstance(rtMemType_t memory_type); - MemManager(const MemManager &) = delete; - MemManager &operator=(const MemManager &) = delete; - /// - /// @ingroup ge_graph - /// @brief memory allocator manager init - /// @param [in] options user config params - /// @return Status result of function - /// - Status Initialize(const std::vector &memory_type); - - /// - /// @ingroup ge_graph - /// @brief memory allocator finalize - /// @return void - /// - void Finalize() noexcept; - - private: - /// - /// @ingroup ge_graph - /// @brief ge memory allocator - /// @param [in] memory_type memory type - /// @return MemoryAllocator ptr - /// - MemoryAllocator *GetMemoryAllocator(rtMemType_t memory_type); - - /// - /// @ingroup ge_graph - /// @param [in] memory_type memory type - /// @param [in] allocate_map memory allocator map - /// @return Status result of function - /// - template - Status InitAllocator(const std::vector &memory_type, std::map &allocate_map) { - T *allocator = nullptr; - for (unsigned int index : memory_type) { - auto it = allocate_map.find(index); - if (it == allocate_map.end()) { - allocator = new (std::nothrow) T(index); - if (allocator != nullptr) { - allocate_map[index] = allocator; - GELOGI("Create Allocator memory type[%u] success.", index); - } else { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); - } - } else { - allocator = it->second; - } - - if (allocator == nullptr) { - GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); - return ACL_ERROR_GE_MEMORY_ALLOCATION; - } else { - if (allocator->Initialize() != SUCCESS) { - return ACL_ERROR_GE_INTERNAL_ERROR; - } - } - } - return SUCCESS; - } - /// - /// @ingroup ge_graph - /// @param [in] memory_type memory type - /// @param [in] allocate_map memory allocator map - /// @return Allocator ptr - /// - template - T &GetAllocator(rtMemType_t memory_type, std::map allocate_map) { - std::lock_guard lock(allocator_mutex_); - T *allocator = nullptr; - auto it = allocate_map.find(memory_type); - if (it != allocate_map.end()) { - allocator = it->second; - } - - // Usually impossible - if (allocator == nullptr) { - GELOGW("Get allocator failed, memory type is %u.", memory_type); - static T default_allocator(RT_MEMORY_RESERVED); - return default_allocator; - } - return *allocator; - } - - std::map memory_allocator_map_; - std::map caching_allocator_map_; - std::map rdma_allocator_map_; - std::map host_allocator_map_; - std::recursive_mutex allocator_mutex_; -}; } // namespace ge #endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ diff --git a/ge/graph/manager/graph_mem_manager.cc b/ge/graph/manager/graph_mem_manager.cc new file mode 100644 index 00000000..21eaf302 --- /dev/null +++ b/ge/graph/manager/graph_mem_manager.cc @@ -0,0 +1,116 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/manager/graph_mem_manager.h" + +#include + +namespace ge { +MemManager::MemManager() {} + +MemManager::~MemManager() { Finalize(); } + +MemManager &MemManager::Instance() { + static MemManager mem_manager; + return mem_manager; +} + +Status MemManager::Initialize(const std::vector &memory_type) { + std::lock_guard lock(allocator_mutex_); + if (init_) { + GELOGW("MemManager has been inited."); + return SUCCESS; + } + + auto ret = InitAllocator(memory_type, memory_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create MemoryAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, caching_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create CachingAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, rdma_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create RdmaAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, host_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create HostMemAllocator failed."); + return ret; + } + + ret = InitAllocator(memory_type, session_scope_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create HostMemAllocator failed."); + return ret; + } + init_ = true; + memory_type_ = memory_type; + return SUCCESS; +} + +template +void FinalizeAllocatorMap(std::map &allocate_map) { + for (auto &allocator : allocate_map) { + if (allocator.second != nullptr) { + allocator.second->Finalize(); + delete allocator.second; + allocator.second = nullptr; + } + } + allocate_map.clear(); +} + +void MemManager::Finalize() noexcept { + GELOGI("Finalize."); + std::lock_guard lock(allocator_mutex_); + // caching and rdma allocator use memory allocator, so finalize them first + FinalizeAllocatorMap(session_scope_allocator_map_); + FinalizeAllocatorMap(caching_allocator_map_); + FinalizeAllocatorMap(rdma_allocator_map_); + FinalizeAllocatorMap(host_allocator_map_); + FinalizeAllocatorMap(memory_allocator_map_); + init_ = false; + memory_type_.clear(); +} + +MemoryAllocator &MemManager::MemInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, memory_allocator_map_); +} + +CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, caching_allocator_map_); +} + +RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, rdma_allocator_map_); +} + +HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, host_allocator_map_); +} + +SessionScopeMemAllocator &MemManager::SessionScopeMemInstance(rtMemType_t memory_type) { + return GetAllocator(memory_type, session_scope_allocator_map_); +} +} // namespace ge diff --git a/ge/graph/manager/graph_mem_manager.h b/ge/graph/manager/graph_mem_manager.h new file mode 100644 index 00000000..d7993ed4 --- /dev/null +++ b/ge/graph/manager/graph_mem_manager.h @@ -0,0 +1,141 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_ +#define GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_ + +#include +#include +#include +#include +#include +#include + +#include "framework/common/debug/ge_log.h" +#include "framework/common/ge_inner_error_codes.h" +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/session_scope_mem_allocator.h" +#include "graph/node.h" +#include "runtime/mem.h" + +namespace ge { +using MemoryAllocatorPtr = std::shared_ptr; + +class MemManager { + public: + MemManager(); + virtual ~MemManager(); + static MemManager &Instance(); + MemoryAllocator &MemInstance(rtMemType_t memory_type); + CachingAllocator &CachingInstance(rtMemType_t memory_type); + RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); + HostMemAllocator &HostMemInstance(rtMemType_t memory_type); + SessionScopeMemAllocator &SessionScopeMemInstance(rtMemType_t memory_type); + MemManager(const MemManager &) = delete; + MemManager &operator=(const MemManager &) = delete; + /// + /// @ingroup ge_graph + /// @brief memory allocator manager init + /// @param [in] options user config params + /// @return Status result of function + /// + Status Initialize(const std::vector &memory_type); + + /// + /// @ingroup ge_graph + /// @brief memory allocator finalize + /// @return void + /// + void Finalize() noexcept; + + const std::vector &GetAllMemoryType() const { return memory_type_; } + + private: + /// + /// @ingroup ge_graph + /// @param [in] memory_type memory type + /// @param [in] allocate_map memory allocator map + /// @return Status result of function + /// + template + Status InitAllocator(const std::vector &memory_type, std::map &allocate_map) { + T *allocator = nullptr; + for (unsigned int index : memory_type) { + auto it = allocate_map.find(index); + if (it == allocate_map.end()) { + allocator = new (std::nothrow) T(index); + if (allocator != nullptr) { + allocate_map[index] = allocator; + GELOGI("Create Allocator memory type[%u] success.", index); + } else { + REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); + } + } else { + allocator = it->second; + } + + if (allocator == nullptr) { + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; + } else { + if (allocator->Initialize() != SUCCESS) { + return ACL_ERROR_GE_INTERNAL_ERROR; + } + } + } + return SUCCESS; + } + /// + /// @ingroup ge_graph + /// @param [in] memory_type memory type + /// @param [in] allocate_map memory allocator map + /// @return Allocator ptr + /// + template + T &GetAllocator(rtMemType_t memory_type, std::map allocate_map) { + std::lock_guard lock(allocator_mutex_); + T *allocator = nullptr; + auto it = allocate_map.find(memory_type); + if (it != allocate_map.end()) { + allocator = it->second; + } + + // Usually impossible + if (allocator == nullptr) { + GELOGW("Get allocator failed, memory type is %u.", memory_type); + static T default_allocator(RT_MEMORY_RESERVED); + return default_allocator; + } + return *allocator; + } + + std::map memory_allocator_map_; + std::map caching_allocator_map_; + std::map rdma_allocator_map_; + std::map host_allocator_map_; + std::map session_scope_allocator_map_; + std::recursive_mutex allocator_mutex_; + std::vector memory_type_; + bool init_ = false; +}; +} // namespace ge + +#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 79103b88..64918aab 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -17,8 +17,7 @@ #include "graph/manager/graph_var_manager.h" #include "graph/debug/ge_attr_define.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/rdma_pool_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/utils/type_utils.h" @@ -41,7 +40,8 @@ ge::Status VarResource::GetVarAddr(const std::string &var_name, const ge::GeTens if (dev_ptr == nullptr) { REPORT_INNER_ERROR("E19999", "Param dev_ptr is nullptr, var_name:%s, session_id:%lu, " "check invalid", var_name.c_str(), session_id_); - GELOGE(FAILED, "[GetVarAddr] dev_ptr is null!"); + GELOGE(FAILED, "[Check][Param] Param dev_ptr is nullptr, var_name:%s, session_id:%lu", + var_name.c_str(), session_id_); return FAILED; } std::string var_key = VarKey(var_name, tensor_desc); @@ -52,7 +52,8 @@ ge::Status VarResource::GetVarAddr(const std::string &var_name, const ge::GeTens REPORT_INNER_ERROR("E19999", "var_key:%s can't find in var_addr_mgr_map_, var_name:%s, session_id:%lu, " "check invalid", var_key.c_str(), var_name.c_str(), session_id_); - GELOGE(FAILED, "VarResource::GetVarAddr failed, var_key %s", var_key.c_str()); + GELOGE(FAILED, "[Check][Param] var_key:%s can't find in var_addr_mgr_map_, var_name:%s, session_id:%lu", + var_key.c_str(), var_name.c_str(), session_id_); return FAILED; } @@ -110,7 +111,8 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen REPORT_INNER_ERROR("E19999", "var_key:%s conflict in var_addr_mgr_map_, var_name:%s, session_id:%lu, " "check invalid", var_key.c_str(), var_name.c_str(), session_id_); - GELOGE(FAILED, "VarResource::SaveVarAddr, var_key %s save addr conflict", var_key.c_str()); + GELOGE(FAILED, "[Check][Param] var_key:%s conflict in var_addr_mgr_map_, var_name:%s, session_id:%lu", + var_key.c_str(), var_name.c_str(), session_id_); return FAILED; } @@ -146,14 +148,15 @@ ge::Status VarResource::RenewCurVarDesc(const std::string &var_name, const ge::O if (op_desc == nullptr) { REPORT_INNER_ERROR("E19999", "Param op_desc is nullptr, var_name:%s, session_id:%lu, check invalid", var_name.c_str(), session_id_); - GELOGE(FAILED, "[RenewCurVarDesc] renew var desc fail! input opdesc is null!"); + GELOGE(FAILED, "[Check][Param] input opdesc is nullptr, var_name:%s, session_id:%lu", + var_name.c_str(), session_id_); return FAILED; } ge::GeTensorDesc curr_desc; ge::Status ret = GetCurVarDesc(var_name, curr_desc); if (ret != SUCCESS) { - GELOGE(FAILED, "[RenewCurVarDesc] Get var desc fail!"); + GELOGE(FAILED, "[Get][CurVarDesc] fail, var_name:%s, session_id:%lu", var_name.c_str(), session_id_); return FAILED; } std::string key = VarKey(var_name, curr_desc); @@ -165,7 +168,8 @@ ge::Status VarResource::RenewCurVarDesc(const std::string &var_name, const ge::O REPORT_INNER_ERROR("E19999", "var_key:%s can't find in var_addr_mgr_map_, var_name:%s, session_id:%lu, op:%s(%s), " "check invalid", key.c_str(), var_name.c_str(), session_id_, op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(FAILED, "[RenewCurVarDesc] can't find ele with key [%s]", key.c_str()); + GELOGE(FAILED, "[Check][Param] var_key:%s can't find in var_addr_mgr_map_, var_name:%s, session_id:%lu, op:%s(%s)", + key.c_str(), var_name.c_str(), session_id_, op_desc->GetName().c_str(), op_desc->GetType().c_str()); return FAILED; } auto val = iter->second; @@ -286,14 +290,15 @@ Status HbmMemResource::AssignVarMem(const std::string &var_name, uint64_t size, if (total_size_ < var_mem_size_) { REPORT_INNER_ERROR("E19999", "VarMemMaxSize:%lu < var_mem_size_:%lu, var_size:%lu, var_name:%s, check invalid" "", total_size_, var_mem_size_, size, var_name.c_str()); - GELOGE(PARAM_INVALID, "total_size_: %lu is smaller than var_mem_size_: %lu", total_size_, var_mem_size_); + GELOGE(PARAM_INVALID, "[Check][Param] total_size_:%lu is smaller than var_mem_size_:%lu, var_name:%s", + total_size_, var_mem_size_, var_name.c_str()); return PARAM_INVALID; } uint64_t free_size = total_size_ - var_mem_size_; if (free_size < (size + kSessionMemAlignSize * kSessionMemAlignUnit)) { REPORT_INNER_ERROR("E19999", "free_size:%lu not enough, var_align_size:%lu, var_name:%s, check invalid", free_size, size, var_name.c_str()); - GELOGE(PARAM_INVALID, "Out of memory : current var size[%lu] exceeds total var size[%lu]", + GELOGE(PARAM_INVALID, "[Check][Param] Out of memory: current var size[%lu] exceeds total var size[%lu]", size + kSessionMemAlignSize * kSessionMemAlignUnit + var_mem_size_, total_size_); return PARAM_INVALID; } @@ -318,7 +323,7 @@ Status RdmaMemResource::AssignVarMem(const std::string &var_name, uint64_t size, if (buffer == nullptr) { REPORT_CALL_ERROR("E19999", "malloc rdma memory fail, var_size:%lu, var_name:%s", size, var_name.c_str()); - GELOGE(MEMALLOC_FAILED, "Failed to malloc rdma memory for node %s, size = %lu", var_name.c_str(), size); + GELOGE(MEMALLOC_FAILED, "[Malloc][RdmaMemory] for node %s failed, size = %lu", var_name.c_str(), size); return MEMALLOC_FAILED; } address = static_cast(reinterpret_cast(buffer)); @@ -469,7 +474,8 @@ int64_t VarManager::GetVarMemSize(rtMemType_t memory_type) { if (mem_resource == nullptr) { REPORT_INNER_ERROR("E19999", "Find no mem_resource in map, memory_type:%d, session_id:%lu", memory_type, session_id_); - GELOGE(ge::INTERNAL_ERROR, "MemResource is invalid."); + GELOGE(ge::INTERNAL_ERROR, "[Check][Param] MemResource is invalid, memory_type:%d, session_id:%lu", + memory_type, session_id_); return 0; } return mem_resource->GetVarMemSize(); @@ -484,7 +490,8 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) { if (mem_resource == nullptr) { REPORT_CALL_ERROR("E19999", "memory_type:%d invalid or New MemResource fail, session_id:%lu", memory_type, session_id_); - GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); + GELOGE(ge::INTERNAL_ERROR, "[Alloc][MemResource] failed, memory_type:%u, session_id:%lu", + memory_type, session_id_); return ge::INTERNAL_ERROR; } else { mem_resource_map_[memory_type] = mem_resource; @@ -496,7 +503,8 @@ Status VarManager::UpdateVarMemSize(rtMemType_t memory_type, int64_t mem_size) { if (mem_resource == nullptr) { REPORT_INNER_ERROR("E19999", "MemResource is invalid, memory_type:%d, session_id:%lu", memory_type, session_id_); - GELOGE(ge::INTERNAL_ERROR, "MemResource is invalid."); + GELOGE(ge::INTERNAL_ERROR, "[Check][Param] MemResource is invalid, memory_type:%u, session_id:%lu", + memory_type, session_id_); return FAILED; } mem_resource->UpdateVarMemSize(mem_size); @@ -516,7 +524,8 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen if (result != ge::SUCCESS) { REPORT_CALL_ERROR("E19999", "Get size from tensor fail, var_name:%s, memory_type:%d, session_id:%lu", var_name.c_str(), memory_type, session_id_); - GELOGE(result, "get size from TensorDesc failed"); + GELOGE(result, "[Get][Size] from tensor fail, var_name:%s, memory_type:%u, session_id:%lu", + var_name.c_str(), memory_type, session_id_); return result; } @@ -527,7 +536,8 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen if (mem_resource == nullptr) { REPORT_CALL_ERROR("E19999", "memory_type:%d invalid or New MemResource fail, session_id:%lu", memory_type, session_id_); - GELOGE(ge::INTERNAL_ERROR, "Alloc MemResource failed, memory_type = %u.", memory_type); + GELOGE(ge::INTERNAL_ERROR, "[Alloc][MemResource] failed, memory_type:%u, session_id:%lu.", + memory_type, session_id_); return ge::INTERNAL_ERROR; } else { mem_resource_map_[memory_type] = mem_resource; @@ -539,7 +549,8 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen if (mem_resource == nullptr) { REPORT_INNER_ERROR("E19999", "MemResource is invalid, memory_type:%d, session_id:%lu", memory_type, session_id_); - GELOGE(ge::INTERNAL_ERROR, "MemResource is invalid, memory_type = %u.", memory_type); + GELOGE(ge::INTERNAL_ERROR, "[Check][Param] MemResource is invalid, memory_type:%u, session_id:%lu.", + memory_type, session_id_); return ge::INTERNAL_ERROR; } @@ -568,14 +579,15 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen if (can_not_reuse_old_memory) { result = mem_resource->AssignVarMem(var_name, tensor_desc_size, session_id_, mem_offset); if (result != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed."); + GELOGE(ge::INTERNAL_ERROR, "[Assign][VarMem] by offset failed, session_id:%lu.", session_id_); return ge::INTERNAL_ERROR; } result = var_resource_->SaveVarAddr( var_name, tensor_desc, reinterpret_cast(static_cast(mem_offset)), memory_type); if (result != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed."); + GELOGE(ge::INTERNAL_ERROR, "[Save][VarAddr] by offset failed, memory type:%u, session_id:%lu.", + memory_type, session_id_); return ge::INTERNAL_ERROR; } } @@ -682,7 +694,8 @@ ge::Status VarManager::RenewCurVarDesc(const std::string &var_name, ge::OpDescPt REPORT_INNER_ERROR("E19999", "VarManager has not been init, op:%s(%s), session_id:%lu, check invalid", op_desc->GetName().c_str(), op_desc->GetType().c_str(), session_id_); - GELOGE(ge::INTERNAL_ERROR, "VarManager has not been init."); + GELOGE(ge::INTERNAL_ERROR, "[Check][Param] VarManager has not been init, op:%s(%s), session_id:%lu", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), session_id_); return ge::INTERNAL_ERROR; } return var_resource_->RenewCurVarDesc(var_name, std::move(op_desc)); @@ -728,12 +741,10 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { var_memory_size = (var_memory_size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; const string purpose("variables and constant op memory in training network."); - var_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, var_memory_size); + var_mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, var_memory_size); if (var_mem_base == nullptr) { - GELOGE(ge::INTERNAL_ERROR, - "VarManager::MallocVarMemory failed " - "session_id = %s", - memory_key.c_str()); + GELOGE(ge::INTERNAL_ERROR, "[Malloc][VarMemory] failed, size:%zu, session_id:%s", + var_memory_size, memory_key.c_str()); return ge::INTERNAL_ERROR; } return SUCCESS; @@ -745,7 +756,7 @@ uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); } string memory_key = std::to_string(session_id_); - return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); + return MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(memory_key); } uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { @@ -754,7 +765,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty return logic_addr; } string mem_key = std::to_string(session_id_); - uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); + uint8_t *mem_base = MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(mem_key); if (mem_base == nullptr) { return nullptr; } @@ -766,7 +777,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty ge::Status VarManager::FreeVarMemory() { std::lock_guard lock(mutex_); string memory_key = std::to_string(SessionId()); - return MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key); + return MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key); } ge::Status VarManager::SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { @@ -813,7 +824,7 @@ Status VarManager::SetMemoryMallocSize(const map &options) { string graph_memory_manager_malloc_max_size = it->second; ge::Status ret = ParseMemoryMallocSize(graph_memory_manager_malloc_max_size, graph_mem_max_size_); if (ret != SUCCESS) { - GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "Parse graph memory manager malloc max size failed."); + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_); return ge::GE_GRAPH_OPTIONS_INVALID; } GELOGI("The max size for graph mem is set to %zu", graph_mem_max_size_); @@ -826,7 +837,7 @@ Status VarManager::SetMemoryMallocSize(const map &options) { string memory_var_manager_malloc_size = it->second; ge::Status ret = ParseMemoryMallocSize(memory_var_manager_malloc_size, var_mem_max_size_); if (ret != SUCCESS) { - GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "Parse memory var manager malloc size failed."); + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_); return ge::GE_GRAPH_OPTIONS_INVALID; } } @@ -835,8 +846,8 @@ Status VarManager::SetMemoryMallocSize(const map &options) { if (var_mem_logic_base_ > kMaxMemorySize) { REPORT_INNER_ERROR("E19999", "var_login_base:%zu can not exeed limit:%zu, session_id:%lu, check invalid", var_mem_logic_base_, kMaxMemorySize, session_id_); - GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "kMemoryVarLogicBase : %zu can not exceed max memory size : %zu.", - var_mem_logic_base_, kMaxMemorySize); + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Check][Param] kMemoryVarLogicBase:%zu can not exceed " + "max memory size:%zu, session_id:%lu.", var_mem_logic_base_, kMaxMemorySize, session_id_); return ge::GE_GRAPH_OPTIONS_INVALID; } @@ -844,8 +855,8 @@ Status VarManager::SetMemoryMallocSize(const map &options) { if (use_max_mem_size_ > kMaxMemorySize) { REPORT_INNER_ERROR("E19999", "all mem_use size:%zu can not exeed limit:%zu, session_id:%lu, check invalid", use_max_mem_size_, kMaxMemorySize, session_id_); - GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "kUseMaxMemorySize : %zu can not exceed max memory size : %zu.", - use_max_mem_size_, kMaxMemorySize); + GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Check][Param] kUseMaxMemorySize:%zu can not exceed " + "max memory size:%zu, session_id:%lu.", use_max_mem_size_, kMaxMemorySize, session_id_); return ge::GE_GRAPH_OPTIONS_INVALID; } GELOGI("Set memory malloc size successfully"); @@ -856,7 +867,7 @@ Status VarManager::ParseMemoryMallocSize(string &memory_size, size_t &result) { if (memory_size.empty()) { REPORT_INNER_ERROR("E19999", "Param memory_size is empty, session_id:%lu, check invalid", session_id_); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Memory malloc size input is empty."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Check][Param] Memory malloc size input is empty, session_id:%lu.", session_id_); return GE_GRAPH_OPTIONS_INVALID; } // split string by '*' @@ -883,7 +894,9 @@ Status VarManager::ParseMemoryMallocSize(string &memory_size, size_t &result) { if (!isdigit(c)) { REPORT_INNER_ERROR("E19999", "Param memory_size:%s contains non digit, session_id:%lu, check invalid", memory_size.c_str(), session_id_); - GELOGE(GE_GRAPH_OPTIONS_INVALID, "Memory malloc size input contains non digit."); + GELOGE(GE_GRAPH_OPTIONS_INVALID, + "[Check][Param] Memory malloc size:%s input contains non digit, session_id:%lu.", + memory_size.c_str(), session_id_); return GE_GRAPH_OPTIONS_INVALID; } } @@ -892,13 +905,15 @@ Status VarManager::ParseMemoryMallocSize(string &memory_size, size_t &result) { REPORT_INNER_ERROR("E19999", "Param memory_size:%s will overflow after multi all, session_id:%lu, " "check invalid", memory_size.c_str(), session_id_); - GELOGE(FAILED, "Input memory size is out of range."); + GELOGE(FAILED, "[Check][Param] Param memory_size:%s will overflow after multi all, session_id:%lu", + memory_size.c_str(), session_id_); return FAILED); if ((num > kMaxMemorySize) || (result * static_cast(num) > kMaxMemorySize)) { REPORT_INNER_ERROR("E19999", "Param memory_size:%s after multi will exceed limit:%lu, session_id:%lu, " "check invalid", memory_size.c_str(), kMaxMemorySize, session_id_); - GELOGE(FAILED, "Input memory size can not exceed max memory size : %zu.", kMaxMemorySize); + GELOGE(FAILED, "[Check][Param] Input memory size can not exceed max memory size:%zu, session_id:%lu.", + kMaxMemorySize, session_id_); return FAILED; } result *= static_cast(num); @@ -1002,10 +1017,7 @@ VarManager *VarManagerPool::GetVarManager(uint64_t session_id) { VarManager *var_manager = new (std::nothrow) VarManager(session_id); if (var_manager == nullptr) { REPORT_INNER_ERROR("E19999", "New VarManager fail, session_id:%lu", session_id); - GELOGE(INTERNAL_ERROR, - "VarManager::Instance find session by " - "session_id[%lu] failed.", - session_id); + GELOGE(INTERNAL_ERROR, "[New][VarManager] fail, session_id:%lu", session_id); static VarManager new_var_manager(0); return &new_var_manager; } diff --git a/ge/graph/manager/host_mem_allocator.cc b/ge/graph/manager/host_mem_allocator.cc index 98f9a313..e349719c 100644 --- a/ge/graph/manager/host_mem_allocator.cc +++ b/ge/graph/manager/host_mem_allocator.cc @@ -21,7 +21,10 @@ namespace ge { const void *HostMemAllocator::Malloc(const std::shared_ptr &aligned_ptr, size_t size) { if (aligned_ptr == nullptr) { - GELOGW("Insert a null aligned_ptr"); + GELOGW("Insert a null aligned_ptr, size=%zu", size); + if (size == 0) { + allocated_blocks_[nullptr] = { size, nullptr }; + } return nullptr; } GELOGD("allocate existed host memory succ, size=%zu", size); @@ -34,8 +37,8 @@ uint8_t *HostMemAllocator::Malloc(size_t size) { std::lock_guard lock(mutex_); std::shared_ptr aligned_ptr = MakeShared(size); if (aligned_ptr == nullptr) { - REPORT_INNER_ERROR("E19999", "New AlignedPtr fail"); - GELOGE(INTERNAL_ERROR, "make shared_ptr for AlignedPtr failed"); + REPORT_INNER_ERROR("E19999", "New AlignedPtr fail, size:%zu", size); + GELOGE(INTERNAL_ERROR, "[Call][MakeShared] for AlignedPtr failed, size:%zu", size); return nullptr; } allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr }; @@ -46,7 +49,7 @@ uint8_t *HostMemAllocator::Malloc(size_t size) { Status HostMemAllocator::Free(const void *memory_addr) { if (memory_addr == nullptr) { REPORT_INNER_ERROR("E19999", "Param memory_addr is nullptr, check invalid"); - GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer"); + GELOGE(GE_GRAPH_FREE_FAILED, "[Check][Param] Invalid memory pointer"); return GE_GRAPH_FREE_FAILED; } @@ -54,7 +57,7 @@ Status HostMemAllocator::Free(const void *memory_addr) { auto it = allocated_blocks_.find(memory_addr); if (it == allocated_blocks_.end()) { REPORT_INNER_ERROR("E19999", "Memory_addr is not alloc before, check invalid"); - GELOGE(PARAM_INVALID, "Invalid memory pointer"); + GELOGE(PARAM_INVALID, "[Check][Param] Invalid memory pointer:%p", memory_addr); return PARAM_INVALID; } it->second.second.reset(); diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index 2908df39..63265bf4 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -39,9 +39,8 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { rtMallocHostSharedMemoryOut output_para; rtError_t rt_ret = rtMallocHostSharedMemory(&input_para, &output_para); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMallocHostSharedMemory fail, ret:0x%X", - rt_ret); - GELOGE(RT_FAILED, "Call rt api(rtMallocHostSharedMemory) failed, devid:[%u].", device_id); + REPORT_CALL_ERROR("E19999", "Call rtMallocHostSharedMemory fail, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtMallocHostSharedMemory] failed, devid:[%u].", device_id); return GE_GRAPH_MEMORY_ALLOC_FAILED; } mem_info.fd = output_para.fd; @@ -60,9 +59,8 @@ Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtFreeHostSharedMemory fail, ret:0x%X", - rt_ret); - GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); + REPORT_CALL_ERROR("E19999", "Call rtFreeHostSharedMemory fail, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtFreeHostSharedMemory] failed, ret:0x%X.", rt_ret); return RT_FAILED; } return ge::SUCCESS; @@ -78,7 +76,7 @@ Status HostMemManager::Initialize() { allocator_ = std::unique_ptr(new (std::nothrow) SharedMemAllocator()); if (allocator_ == nullptr) { REPORT_CALL_ERROR("E19999", "New SharedMemAllocator fail"); - GELOGE(GE_GRAPH_MALLOC_FAILED, "Shared memory allocator init failed!"); + GELOGE(GE_GRAPH_MALLOC_FAILED, "[New][SharedMemAllocator] failed!"); return GE_GRAPH_MALLOC_FAILED; } return SUCCESS; @@ -98,9 +96,8 @@ Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) { std::lock_guard lock(mutex_); auto iter = var_memory_base_map_.find(mem_info.op_name); if (iter != var_memory_base_map_.end()) { - REPORT_INNER_ERROR("E19999", "MemInfo.op_name:%s can't find in var_memory_base_map_", - mem_info.op_name.c_str()); - GELOGE(FAILED, "Host shared memory for op %s has been malloced", mem_info.op_name.c_str()); + REPORT_INNER_ERROR("E19999", "Host shared memory for op %s has been malloced", mem_info.op_name.c_str()); + GELOGE(FAILED, "[Check][Param] Host shared memory for op %s has been malloced", mem_info.op_name.c_str()); return FAILED; } mem_info.shm_name = OpNameToShmName(mem_info.op_name); @@ -113,9 +110,8 @@ Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) { Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size) { std::lock_guard lock(mutex_); if (var_memory_base_map_.find(op_name) == var_memory_base_map_.end()) { - REPORT_INNER_ERROR("E19999", "MemInfo.op_name:%s can't find in var_memory_base_map_", - op_name.c_str()); - GELOGE(INTERNAL_ERROR, "Find host base base_addr failed,node name:%s!", op_name.c_str()); + REPORT_INNER_ERROR("E19999", "MemInfo.op_name:%s can't find in var_memory_base_map_", op_name.c_str()); + GELOGE(INTERNAL_ERROR, "[Check][Param] Find host base base_addr failed, node name:%s!", op_name.c_str()); return INTERNAL_ERROR; } base_addr = static_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 415f8088..8ea0594b 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -19,7 +19,7 @@ #include #include "common/ge/plugin_manager.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/rdma_pool_allocator.h" #include "graph/utils/type_utils.h" @@ -50,9 +50,8 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t path.append(file_name); string canonical_path = RealPath(path.c_str()); if (canonical_path.empty()) { - REPORT_INNER_ERROR("E19999", "canonical_path:%s is empty, check invalid", - canonical_path.c_str()); - GELOGE(FAILED, "Failed to get realpath of %s", path.c_str()); + REPORT_INNER_ERROR("E19999", "canonical_path:%s is empty, check invalid", canonical_path.c_str()); + GELOGE(FAILED, "[Call][RealPath] Failed to get realpath of %s", path.c_str()); return FAILED; } GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonical_path.c_str()); @@ -69,15 +68,14 @@ Status RdmaRemoteRegister(const std::vector &var_info, rtMemType_t if (hcom_remote_mem_register == nullptr) { REPORT_CALL_ERROR("E19999", "Symbol HcomRegRemoteAccessMem can't find in %s, check invalid", canonical_path.c_str()); - GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); + GELOGE(FAILED, "[Check][Param] Symbol HcomRegRemoteAccessMem can't find in %s", canonical_path.c_str()); return FAILED; } HcclResult hccl_ret = hcom_remote_mem_register(reg_addrs.get(), table_len); if (hccl_ret != HCCL_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Call hcom_remote_mem_register failed, ret:%d,", - hccl_ret); - GELOGE(HCCL_E_INTERNAL, "Rdma mem register failed, ret: 0x%X", hccl_ret); + REPORT_CALL_ERROR("E19999", "Call hcom_remote_mem_register failed, ret:%d,", hccl_ret); + GELOGE(HCCL_E_INTERNAL, "[Call][HcomRemoteMemRegister] Rdma mem register failed, ret:0x%X", hccl_ret); return HCCL_E_INTERNAL; } return SUCCESS; @@ -88,14 +86,14 @@ Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uin uint32_t type_size = 0; bool result = TypeUtils::GetDataTypeLength(tensor_info.data_type, type_size); if (!result) { - GELOGE(GRAPH_FAILED, "GetDataTypeLength failed, data_type=(%s).", + GELOGE(GRAPH_FAILED, "[Get][DataTypeLength] failed, data_type=(%s).", TypeUtils::DataTypeToSerialString(tensor_info.data_type).c_str()); return GRAPH_FAILED; } memory_size = type_size; for (auto dim : tensor_info.dims) { if (dim <= 0) { - GELOGE(GRAPH_FAILED, "Tensor dims should be positive"); + GELOGE(GRAPH_FAILED, "[Check][Param] Tensor dims should be positive"); return GRAPH_FAILED; } memory_size *= dim; @@ -103,7 +101,7 @@ Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uin SharedMemInfo mem_info(tensor_info.var_name, memory_size); Status ret = HostMemManager::Instance().MallocSharedMemory(mem_info); if (ret != SUCCESS) { - GELOGE(GRAPH_FAILED, "MallocSharedMemory failed op name [%s]", tensor_info.var_name.c_str()); + GELOGE(GRAPH_FAILED, "[Malloc][SharedMemory] failed, op name [%s]", tensor_info.var_name.c_str()); return GRAPH_FAILED; } dev_addr = reinterpret_cast(reinterpret_cast(mem_info.device_address)); diff --git a/ge/graph/manager/model_manager/event_manager.cc b/ge/graph/manager/model_manager/event_manager.cc index 69a946f9..339e9894 100644 --- a/ge/graph/manager/model_manager/event_manager.cc +++ b/ge/graph/manager/model_manager/event_manager.cc @@ -45,7 +45,7 @@ Status EventManager::Init(size_t event_num) { void EventManager::Release() noexcept { for (size_t i = 0; i < this->event_list_.size(); ++i) { rtError_t rt_ret = rtEventDestroy(this->event_list_[i]); - RETURN_IF_COND_NOT_MET(rt_ret == RT_ERROR_NONE, "Destroy event failed, idx is %zu, ret is 0x%x.", i, rt_ret); + RETURN_IF_COND_NOT_MET(rt_ret == RT_ERROR_NONE, "[Destroy][Event] failed, idx is %zu, ret is 0x%x.", i, rt_ret); } this->event_list_.clear(); diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc index c19a2159..4297be95 100644 --- a/ge/graph/manager/rdma_pool_allocator.cc +++ b/ge/graph/manager/rdma_pool_allocator.cc @@ -20,6 +20,7 @@ #include "framework/common/debug/ge_log.h" #include "graph/ge_context.h" #include "runtime/dev.h" +#include "graph/manager/graph_mem_manager.h" namespace { const size_t kAlignedSize = 512; @@ -49,7 +50,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type) })) {} Status RdmaPoolAllocator::Initialize() { - memory_allocator_ = MemManager::Instance(memory_type_); + memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); if (memory_allocator_ == nullptr) { return ACL_ERROR_GE_INTERNAL_ERROR; } @@ -81,8 +82,8 @@ Status RdmaPoolAllocator::InitMemory(size_t mem_size) { auto device_id = GetContext().DeviceId(); GELOGD("Init Rdma Memory with size [%zu] for devid:[%u]", mem_size, device_id); if (rdma_base_addr_ != nullptr) { - REPORT_INNER_ERROR("E19999", "Param rdma_base_addr_ is nullptr, check invalid"); - GELOGE(GE_MULTI_INIT, "Rdma pool has been malloced"); + REPORT_INNER_ERROR("E19999", "Param rdma_base_addr_ is not nullptr, devid:%u, check invalid", device_id); + GELOGE(GE_MULTI_INIT, "[Check][Param] Rdma pool has been malloced, devid:%u", device_id); return GE_MULTI_INIT; } const std::string purpose = "Memory for rdma pool."; @@ -94,15 +95,15 @@ Status RdmaPoolAllocator::InitMemory(size_t mem_size) { rdma_base_addr_ = memory_allocator_->MallocMemory(purpose, mem_size, device_id); if (rdma_base_addr_ == nullptr) { - GELOGE(GE_GRAPH_MALLOC_FAILED, "Rdma pool memory malloc failed"); + GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][Memory] failed, size:%zu, device_id:%u", mem_size, device_id); return GE_GRAPH_MALLOC_FAILED; } rdma_mem_size_ = mem_size; // Init with a base block. auto *base_block = new (std::nothrow) Block(device_id, mem_size, rdma_base_addr_); if (base_block == nullptr) { - REPORT_CALL_ERROR("E19999", "New Block failed, device_id:%u", device_id); - GELOGE(GE_GRAPH_MALLOC_FAILED, "Block malloc failed"); + REPORT_CALL_ERROR("E19999", "New Block failed, size:%zu, device_id:%u", mem_size, device_id); + GELOGE(GE_GRAPH_MALLOC_FAILED, "[New][Block] failed, size:%zu, device_id:%u", mem_size, device_id); return GE_GRAPH_MALLOC_FAILED; } block_bin_.insert(base_block); @@ -122,7 +123,7 @@ uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) { if (block->ptr == nullptr) { REPORT_INNER_ERROR("E19999", "Rdmapool memory address is nullptr, device_id:%u, check invalid", device_id); - GELOGE(INTERNAL_ERROR, "Rdmapool memory address is nullptr."); + GELOGE(INTERNAL_ERROR, "[Check][Param] Rdmapool memory address is nullptr, device_id:%u", device_id); return nullptr; } allocated_blocks_.emplace(block->ptr, block); @@ -154,9 +155,8 @@ uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) { Status RdmaPoolAllocator::Free(uint8_t *memory_addr, uint32_t device_id) { GELOGI("Free rdma memory, device id = %u", device_id); if (memory_addr == nullptr) { - REPORT_INNER_ERROR("E19999", "Param memory_addr is nullptr, device_id:%u, check invalid", - device_id); - GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer"); + REPORT_INNER_ERROR("E19999", "Param memory_addr is nullptr, device_id:%u, check invalid", device_id); + GELOGE(GE_GRAPH_FREE_FAILED, "[Check][Param] Invalid memory pointer, device id:%u", device_id); return GE_GRAPH_FREE_FAILED; } @@ -165,7 +165,7 @@ Status RdmaPoolAllocator::Free(uint8_t *memory_addr, uint32_t device_id) { if (it == allocated_blocks_.end()) { REPORT_INNER_ERROR("E19999", "Param memory_addr is not allocated before, device_id:%u, " "check invalid", device_id); - GELOGE(PARAM_INVALID, "Invalid memory pointer"); + GELOGE(PARAM_INVALID, "[Check][Param] Invalid memory pointer, device id:%u", device_id); return PARAM_INVALID; } @@ -208,7 +208,7 @@ void RdmaPoolAllocator::MergeBlocks(Block *dst, Block *src) { Status RdmaPoolAllocator::GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size) { if (rdma_base_addr_ == nullptr) { REPORT_INNER_ERROR("E19999", "Param rdma_base_addr_ is nullptr, check invalid"); - GELOGE(INTERNAL_ERROR, "Rdma base addr is nullptr."); + GELOGE(INTERNAL_ERROR, "[Check][Param] Rdma base addr is nullptr."); return INTERNAL_ERROR; } base_addr = static_cast(reinterpret_cast(rdma_base_addr_)); diff --git a/ge/graph/manager/session_scope_mem_allocator.cc b/ge/graph/manager/session_scope_mem_allocator.cc new file mode 100644 index 00000000..aedc2e92 --- /dev/null +++ b/ge/graph/manager/session_scope_mem_allocator.cc @@ -0,0 +1,83 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/manager/session_scope_mem_allocator.h" + +#include +#include +#include + +#include "framework/common/debug/ge_log.h" +#include "graph/manager/graph_mem_manager.h" + +namespace ge { + +SessionScopeMemAllocator::SessionScopeMemAllocator(rtMemType_t memory_type) + : memory_type_(memory_type), memory_allocator_(nullptr) {} + +Status SessionScopeMemAllocator::Initialize(uint32_t device_id) { + GELOGI("Device id %u", device_id); + // when redo Initialize free old memory + FreeAllMemory(); + std::lock_guard lock(mutex_); + memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); + if (memory_allocator_ == nullptr) { + return ACL_ERROR_GE_INTERNAL_ERROR; + } + return ge::SUCCESS; +} + +void SessionScopeMemAllocator::Finalize(uint32_t device_id) { + GELOGI("Device id %u", device_id); + FreeAllMemory(); +} + +uint8_t *SessionScopeMemAllocator::Malloc(size_t size, uint64_t session_id, uint32_t device_id) { + GELOGI("Start malloc memory, size:%zu, session id:%lu device id:%u", size, session_id, device_id); + const std::string purpose = "Memory for session scope."; + auto ptr = memory_allocator_->MallocMemory(purpose, size, device_id); + if (ptr == nullptr) { + GELOGE(ge::FAILED, "Malloc failed, no enough memory for size:%zu, session_id:%lu device_id:%u", size, + session_id, device_id); + return nullptr; + } + std::lock_guard lock(mutex_); + std::shared_ptr mem_ptr(ptr, [&](uint8_t *p) { (void)memory_allocator_->FreeMemory(p); }); + allocated_memory_[session_id].emplace_back(size, mem_ptr); + return ptr; +} + +Status SessionScopeMemAllocator::Free(uint64_t session_id, uint32_t device_id) { + GELOGI("Free session:%lu memory, device id:%u.", session_id, device_id); + std::lock_guard lock(mutex_); + auto it = allocated_memory_.find(session_id); + if (it == allocated_memory_.end()) { + GELOGW("Invalid session_id"); + return ge::PARAM_INVALID; + } + allocated_memory_.erase(it); + return ge::SUCCESS; +} + +void SessionScopeMemAllocator::FreeAllMemory() { + GELOGI("Free all memory"); + std::lock_guard lock(mutex_); + for (auto &session_mem : allocated_memory_) { + session_mem.second.clear(); + } + allocated_memory_.clear(); +} +} // namespace ge diff --git a/ge/graph/manager/session_scope_mem_allocator.h b/ge/graph/manager/session_scope_mem_allocator.h new file mode 100644 index 00000000..3dbf3cb0 --- /dev/null +++ b/ge/graph/manager/session_scope_mem_allocator.h @@ -0,0 +1,124 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ +#define GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "framework/common/ge_inner_error_codes.h" +#include "graph/node.h" +#include "graph/manager/block_memory.h" +#include "runtime/mem.h" +#include "graph/manager/graph_mem_allocator.h" + +namespace ge { +class SessionScopeMemoryInfo { + public: + SessionScopeMemoryInfo(size_t size, const std::shared_ptr &ptr) : size(size), ptr(ptr) {} + SessionScopeMemoryInfo() = delete; + virtual ~SessionScopeMemoryInfo() = default; + + SessionScopeMemoryInfo(const SessionScopeMemoryInfo &other) { + if (&other == this) { + return; + } + size = other.size; + ptr = other.ptr; + }; + + SessionScopeMemoryInfo &operator=(const SessionScopeMemoryInfo &other) { + if (&other == this) { + return *this; + } + size = other.size; + ptr = other.ptr; + return *this; + }; + + private: + size_t size = 0; + std::shared_ptr ptr = nullptr; +}; + +class SessionScopeMemAllocator { + public: + explicit SessionScopeMemAllocator(rtMemType_t memory_type); + + SessionScopeMemAllocator(const SessionScopeMemAllocator &) = delete; + + SessionScopeMemAllocator &operator=(const SessionScopeMemAllocator &) = delete; + + virtual ~SessionScopeMemAllocator() = default; + + /// + /// @ingroup ge_graph + /// @brief caching allocator init + /// @param [in] device id + /// @return Status of init + /// + Status Initialize(uint32_t device_id = 0); + + /// + /// @ingroup ge_graph + /// @brief memory allocator finalize, release all memory + /// @return void + /// + void Finalize(uint32_t device_id = 0); + + /// + /// @ingroup ge_graph + /// @brief malloc memory + /// @param [in] size memory size + /// @param [in] session_id session id + /// @param [in] device id + /// @return memory address + /// + uint8_t *Malloc(size_t size, uint64_t session_id, uint32_t device_id = 0); + + /// + /// @ingroup ge_graph + /// @brief free memory + /// @param [in] session_id session id + /// @param [in] device_id device id + /// @return Status result of function + /// + Status Free(uint64_t session_id, uint32_t device_id = 0); + + private: + void FreeAllMemory(); + + private: + rtMemType_t memory_type_; + + // device memory allocator + MemoryAllocator *memory_allocator_; + + // lock around all operations + mutable std::recursive_mutex mutex_; + + // allocated blocks by memory pointer + std::unordered_map> allocated_memory_; +}; +} // namespace ge +#endif // GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ diff --git a/ge/graph/manager/trans_var_data_utils.cc b/ge/graph/manager/trans_var_data_utils.cc index 9c1290fa..621eba79 100644 --- a/ge/graph/manager/trans_var_data_utils.cc +++ b/ge/graph/manager/trans_var_data_utils.cc @@ -37,7 +37,8 @@ class RtContextSwitchGuard { if (ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, device_id:%u, ret:0x%X,", device_id, ret); - GELOGE(RT_FAILED, "Failed to get current context from rt, error-code %d", ret); + GELOGE(RT_FAILED, "[Call][RtCtxGetCurrent] Failed to get current context, device_id:%u, ret:0x%X", + device_id, ret); return; } @@ -45,15 +46,14 @@ class RtContextSwitchGuard { if (ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCtxCreate failed, device_id:%u, ret:0x%X,", device_id, ret); - GELOGE(RT_FAILED, "Failed to create new context for device %u, error-code %d", device_id, ret); + GELOGE(RT_FAILED, "[Call][RtCtxCreate] Failed to create new context for device:%u, ret:%d", device_id, ret); return; } ret = rtCtxSetCurrent(current_); if (ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtCtxSetCurrent failed, device_id:%u, ret:0x%X,", - device_id, ret); - GELOGE(RT_FAILED, "Failed to switch context to normal, context %p, device %u", current_, device_id); + REPORT_CALL_ERROR("E19999", "Call rtCtxSetCurrent failed, device_id:%u, ret:0x%X", device_id, ret); + GELOGE(RT_FAILED, "[Call][RtCtxSetCurrent] failed, device_id:%u, ret:0x%X", device_id, ret); return; } GELOGD("Create and switch rt context %p type %d for device %u, backup last %p.", current_, mode, device_id, last_); @@ -80,7 +80,7 @@ int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { if (var_size <= 0) { REPORT_INNER_ERROR("E19999", "Data type:%s in desc, it's size:%ld < 0, check invalid", TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str(), var_size); - GELOGE(PARAM_INVALID, "Failed to calc var data size from data type %s", + GELOGE(PARAM_INVALID, "[Calc][VarDataSize] by data type %s failed.", TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str()); return -1; } @@ -99,7 +99,8 @@ Status CopyVarToDevice(const NodePtr &var, const formats::TransResult &trans_res if (ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, op:%s(%s), size:%lu, ret:0x%X,", var->GetName().c_str(), var->GetType().c_str(), trans_result.length, ret); - GELOGE(RT_FAILED, "Failed to copy memory to device, size %zu", trans_result.length); + GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, op:%s(%s), size:%lu, ret:0x%X,", var->GetName().c_str(), + var->GetType().c_str(), trans_result.length, ret); return RT_FAILED; } return SUCCESS; @@ -111,21 +112,17 @@ Status CopyVarFromDevice(uint64_t session_id, const NodePtr &var, std::unique_pt GE_CHECK_NOTNULL(var); auto ret = VarManager::Instance(session_id)->GetVarAddr(var->GetName(), input_desc, &var_logic); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to copy var %s from device, can not find it" - " from var manager %u", - var->GetName().c_str(), ret); + GELOGE(INTERNAL_ERROR, "[Get][VarAddr] failed, node:%s, session_id:%lu, ret:%d", + var->GetName().c_str(), session_id, ret); return INTERNAL_ERROR; } uint8_t *var_addr = VarManager::Instance(session_id)->GetVarMemoryAddr(var_logic, RT_MEMORY_HBM); if (var_addr == nullptr) { - REPORT_CALL_ERROR("E19999", "Get variable memory addr failed, mem_type:%d, op:%s(%s), session_id:%lu,", + REPORT_CALL_ERROR("E19999", "Get variable memory addr failed, mem_type:%d, op:%s(%s), session_id:%lu", RT_MEMORY_HBM, var->GetName().c_str(), var->GetType().c_str(), session_id); - GELOGE(INTERNAL_ERROR, - "Failed to copy var %s from device, cant not get " - "var addr from logic addr %p", - var->GetName().c_str(), var_logic); + GELOGE(INTERNAL_ERROR, "[Get][VarMemoryAddr] failed, mem_type:%d, op:%s(%s), session_id:%lu", + RT_MEMORY_HBM, var->GetName().c_str(), var->GetType().c_str(), session_id); return INTERNAL_ERROR; } @@ -136,9 +133,10 @@ Status CopyVarFromDevice(uint64_t session_id, const NodePtr &var, std::unique_pt std::unique_ptr var_host(new(std::nothrow) uint8_t[var_size_bytes]); if (var_host == nullptr) { - REPORT_CALL_ERROR("E19999", "New host memory failed, size:%ld, op:%s(%s), session_id:%lu,", + REPORT_CALL_ERROR("E19999", "New host memory failed, size:%ld, op:%s(%s), session_id:%lu", var_size_bytes, var->GetName().c_str(), var->GetType().c_str(), session_id); - GELOGE(OUT_OF_MEMORY, "Failed to malloc rt-host memory, size %ld", var_size_bytes); + GELOGE(OUT_OF_MEMORY, "[New][Memory] for rt-host failed, size:%ld, op:%s(%s), session_id:%lu", + var_size_bytes, var->GetName().c_str(), var->GetType().c_str(), session_id); return OUT_OF_MEMORY; } @@ -147,10 +145,8 @@ Status CopyVarFromDevice(uint64_t session_id, const NodePtr &var, std::unique_pt if (ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%ld, op:%s(%s), session_id:%lu, ret:0x%X", var_size_bytes, var->GetName().c_str(), var->GetType().c_str(), session_id, ret); - GELOGE(RT_FAILED, - "Failed to copy var memory from device, var %s, size %ld," - " rt-error-code %u", - var->GetName().c_str(), var_size_bytes, ret); + GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%ld, op:%s(%s), session_id:%lu, ret:0x%X", + var_size_bytes, var->GetName().c_str(), var->GetType().c_str(), session_id, ret); return RT_FAILED; } @@ -197,9 +193,7 @@ Status TransVarOnHost(uint8_t *var_data, const VarTransRoad &trans_road, formats formats::ShapeToString(src_shape).c_str(), formats::ShapeToString(dst_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str(), ret); - GELOGE(INTERNAL_ERROR, - "Failed to trans format from %s to %s, shape %s to %s, " - "data type %s error code %u", + GELOGE(INTERNAL_ERROR, "[Trans][Format] from %s to %s, shape %s to %s failed, data type %s error code %u", TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), formats::ShapeToString(src_shape).c_str(), formats::ShapeToString(dst_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str(), ret); @@ -221,7 +215,7 @@ Status TransVarOnHost(uint8_t *var_data, const VarTransRoad &trans_road, formats TypeUtils::DataTypeToSerialString(src_data_type).c_str(), TypeUtils::DataTypeToSerialString(dst_data_type).c_str(), formats::ShapeToString(input_shape).c_str(), src_data_size, ret); - GELOGE(INTERNAL_ERROR, "Failed to trans data type from %s to %s, input shape %s, data size %ld, error code %u", + GELOGE(INTERNAL_ERROR, "[Trans][DataType] from %s to %s failed, input shape %s, data size %ld, error code %u", TypeUtils::DataTypeToSerialString(src_data_type).c_str(), TypeUtils::DataTypeToSerialString(dst_data_type).c_str(), formats::ShapeToString(input_shape).c_str(), src_data_size, ret); @@ -230,7 +224,7 @@ Status TransVarOnHost(uint8_t *var_data, const VarTransRoad &trans_road, formats } else { REPORT_INNER_ERROR("E19999", "Trans var data failed, the trans type %s does not supported, check invalid", trans_info.node_type.c_str()); - GELOGE(UNSUPPORTED, "Failed to trans var data, the trans type %s does not supported", + GELOGE(UNSUPPORTED, "[Trans][VarData] failed, the trans type %s does not supported", trans_info.node_type.c_str()); return UNSUPPORTED; } @@ -255,10 +249,8 @@ Status ReAssignVarAddr(uint64_t session_id, uint8_t *var_logic = nullptr; Status ret = VarManager::Instance(session_id)->GetVarAddr(var_name, tensor_desc, &var_logic); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to get var %s device addr, can not find it" - " from var manager %u", - var_name.c_str(), ret); + GELOGE(INTERNAL_ERROR, "[Get][VarAddr] failed, var name:%s, session_id:%lu, ret:%u", + var_name.c_str(), session_id, ret); return INTERNAL_ERROR; } @@ -266,7 +258,8 @@ Status ReAssignVarAddr(uint64_t session_id, if (var_addr == nullptr) { REPORT_CALL_ERROR("E19999", "Get variable memory addr failed, mem_type:%d, var_name:%s, session_id:%lu,", RT_MEMORY_HBM, var_name.c_str(), session_id); - GELOGE(INTERNAL_ERROR, "Failed to convert var %s logic addr to real addr", var_name.c_str()); + GELOGE(INTERNAL_ERROR, "[Get][VarMemoryAddr] failed, mem_type:%d, var_name:%s, session_id:%lu", + RT_MEMORY_HBM, var_name.c_str(), session_id); return INTERNAL_ERROR; } *var_device = var_addr; @@ -293,9 +286,8 @@ Status TransVarData(const NodePtr &var, const VarTransRoad &trans_road, uint64_t // Sync var data from device std::unique_ptr var_data; if (trans_road.empty()) { - REPORT_INNER_ERROR("E19999", "Param trans_road is empty, session_id:%lu, check invalid", - session_id); - GELOGE(INTERNAL_ERROR, "Failed to get trans_road, trans_road is empty."); + REPORT_INNER_ERROR("E19999", "Param trans_road is empty, session_id:%lu, check invalid", session_id); + GELOGE(INTERNAL_ERROR, "[Check][Param] trans_road is empty, session_id:%lu", session_id); return INTERNAL_ERROR; } const GeTensorDesc &input_desc = trans_road.begin()->input; @@ -307,7 +299,7 @@ Status TransVarData(const NodePtr &var, const VarTransRoad &trans_road, uint64_t formats::TransResult trans_result{}; ret = TransVarOnHost(var_data.get(), trans_road, trans_result); if (ret != SUCCESS) { - GELOGE(ret, "Failed to trans var data on host, error code %u", ret); + GELOGE(ret, "[Call][TransVarOnHost] failed, session_id:%lu, ret:%u", session_id, ret); return ret; } @@ -319,14 +311,15 @@ Status TransVarData(const NodePtr &var, const VarTransRoad &trans_road, uint64_t /// TensorDesc needs to be removed. This change is large and needs to be performed step by step. ret = ReAssignVarAddr(session_id, var->GetName(), trans_road.rbegin()->output, &var_device); if (ret != SUCCESS) { - GELOGE(ret, "Failed to re-assign memory on device, size %zu", trans_result.length); + GELOGE(ret, "[Call][ReAssignVarAddr] failed, session id:%lu, op:%s, ret:%u", + session_id, var->GetName().c_str(), ret); return ret; } // sync new data to device ret = CopyVarToDevice(var, trans_result, var_device); if (ret != SUCCESS) { - GELOGE(ret, "Failed to send var data to device"); + GELOGE(ret, "[Call][CopyVarToDevice] failed, var:%s, ret:%u", var->GetName().c_str(), ret); return ret; } @@ -350,7 +343,10 @@ Status TransTensor(uint8_t *var_data, const NodePtr &var_src, const NodePtr &var TypeUtils::DataTypeToSerialString(src_data_datatype).c_str(), TypeUtils::DataTypeToSerialString(dst_data_datatype).c_str(), src_data_shape_size, ret); - GELOGE(INTERNAL_ERROR, "trans var data on host failed"); + GELOGE(INTERNAL_ERROR, "[Trans][DataType] from %s to %s failed, data size %ld, ret:%u", + TypeUtils::DataTypeToSerialString(src_data_datatype).c_str(), + TypeUtils::DataTypeToSerialString(dst_data_datatype).c_str(), + src_data_shape_size, ret); return ret; }); return SUCCESS; @@ -366,9 +362,11 @@ Status CopyTensorFromSrcVarNode(const NodePtr &var_src, /// need copy value from var_fp32 to var_fp16. /// [opdesc of var_src and var_dst are checked before passed in, no need to check if they are nullptr] GE_IF_BOOL_EXEC(var_src == nullptr || var_dst == nullptr, - REPORT_INNER_ERROR("E19999", "Param var_src or var_dst is empty, session_id:%lu, device_id:%u, " + REPORT_INNER_ERROR("E19999", "Param var_src or var_dst is nullptr, session_id:%lu, device_id:%u, " "check invalid", session_id, device_id); - GELOGE(FAILED, "node var is nullptr"); return FAILED); + GELOGE(FAILED, "[Check][Param] Param var_src or var_dst is nullptr, session_id:%lu, device_id:%u", + session_id, device_id); + return FAILED); // src_node output_desc (fp32) GeTensorDesc output_desc = var_src->GetOpDesc()->GetOutputDesc(0); auto src_data_type = output_desc.GetDataType(); @@ -390,31 +388,45 @@ Status CopyTensorFromSrcVarNode(const NodePtr &var_src, RtContextSwitchGuard switch_context(RT_CTX_NORMAL_MODE, device_id); // copy from src_node auto ret = CopyVarFromDevice(session_id, var_src, var_src_data, output_desc); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(FAILED, "Copy Var From Device failed"); return ret); + GE_IF_BOOL_EXEC(ret != SUCCESS, + GELOGE(FAILED, "[Call][CopyVarFromDevice] failed, session id:%lu, var_src:%s", + session_id, var_src->GetName().c_str()); + return ret); // trans dtype formats::TransResult trans_result{}; ret = TransTensor(var_src_data.get(), var_src, var_dst, trans_result); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(INTERNAL_ERROR, "trans var data on host failed"); return ret); + GE_IF_BOOL_EXEC(ret != SUCCESS, + GELOGE(INTERNAL_ERROR, "[Trans][Tensor] failed, var_src:%s, var_dst:%s", + var_src->GetName().c_str(), var_dst->GetName().c_str()); + return ret); // reset src value. void *var_device = nullptr; ret = ReAssignVarAddr(session_id, var_dst->GetName(), dst_tensor_desc, &var_device); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(INTERNAL_ERROR, "assign mem failed"); return ret); + GE_IF_BOOL_EXEC(ret != SUCCESS, + GELOGE(INTERNAL_ERROR, "[Call][ReAssignVarAddr] failed, session id:%lu, var_dst:%s", + session_id, var_dst->GetName().c_str()); + return ret); // copy to device ret = CopyVarToDevice(var_dst, trans_result, var_device); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "Failed to send var data to device"); return ret); + GE_IF_BOOL_EXEC(ret != SUCCESS, + GELOGE(ret, "[Call][CopyVarToDevice] failed, var_dst:%s, ret:%u", + var_dst->GetName().c_str(), ret); + return ret); return SUCCESS; } } // namespace Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, uint8_t *dst_addr, int64_t dst_addr_size, uint64_t session_id) { - GE_CHK_BOOL_RET_STATUS(dst_addr != nullptr, FAILED, "dst addr is null. "); + GE_CHK_BOOL_RET_STATUS(dst_addr != nullptr, FAILED, "[Check][Param] dst addr is nullptr."); uint8_t *src_host_addr = nullptr; int64_t src_addr_size = 0; GE_MAKE_GUARD_RTMEM(src_host_addr); GE_CHK_STATUS_RET(SyncTensorToHost(var_name, src_tensor_desc, &src_host_addr, src_addr_size, session_id)); GELOGI("src_addr_size: %ld, dst_addr_size: %ld", src_addr_size, dst_addr_size); - GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, "var data size is not equal broadcast "); + GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, + "[Check][Param] src_addr_size:%ld not equal to dst_addr_size:%ld", + src_addr_size, dst_addr_size); GE_CHK_RT_RET(rtMemcpy(dst_addr, dst_addr_size, src_host_addr, src_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); return SUCCESS; @@ -422,7 +434,7 @@ Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge Status TransVarDataUtils::SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_addr_size, const string &var_name, const ge::GeTensorDesc &dst_tensor_desc, uint64_t session_id) { - GE_CHK_BOOL_RET_STATUS(src_addr != nullptr, FAILED, "src addr is null. "); + GE_CHK_BOOL_RET_STATUS(src_addr != nullptr, FAILED, "[Check][Param] src addr is nullptr. "); uint8_t *host_addr = nullptr; GE_MAKE_GUARD_RTMEM(host_addr); GE_CHK_RT_RET(rtMallocHost(reinterpret_cast(&host_addr), src_addr_size)); @@ -436,7 +448,7 @@ Status TransVarDataUtils::SyncBroadCastData2Var(uint8_t *src_addr, int64_t src_a Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeTensorDesc &src_tensor_desc, uint8_t **host_addr, int64_t &src_tensor_size, uint64_t session_id) { - GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(src_tensor_desc, src_tensor_size), "get size from TensorDesc failed"); + GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(src_tensor_desc, src_tensor_size), "[Get][Size] from TensorDesc failed"); uint8_t *src_addr = nullptr; GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); @@ -493,7 +505,8 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, if (rt_ret != RT_ERROR_NONE) { REPORT_CALL_ERROR("E19999", "Call rtCtxSetCurrent failed, session_id:%lu, graph_id:%u, ret:0x%X,", session_id, graph_id, rt_ret); - GELOGE(RT_FAILED, "Failed to set context, error_code is: 0x%X.", rt_ret); + GELOGE(RT_FAILED, "[Call][RtCtxSetCurrent] failed, session_id:%lu, graph_id:%u, ret:0x%X,", + session_id, graph_id, rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } uint32_t allocated_graph_id = 0; @@ -501,8 +514,8 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, if (ret != SUCCESS) { REPORT_CALL_ERROR("E19999", "Get allocated GraphId failed, session_id:%lu, graph_id:%u, ret:0x%X,", session_id, graph_id, ret); - GELOGE(INTERNAL_ERROR, "var has not been allocated, node:%s, graph_id:%u.", node->GetName().c_str(), - graph_id); + GELOGE(INTERNAL_ERROR, "[Get][AllocatedGraphId] failed, node:%s, graph_id:%u.", + node->GetName().c_str(), graph_id); return INTERNAL_ERROR; } uint32_t changed_graph_id = 0; @@ -518,7 +531,8 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, } ret = TransVarData(node, *trans_road, session_id); if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "TransVarData failed, node:%s, graph_id:%u.", node->GetName().c_str(), graph_id); + GELOGE(INTERNAL_ERROR, "[Trans][VarData] failed, node:%s, graph_id:%u, session_id:%lu.", + node->GetName().c_str(), graph_id, session_id); return INTERNAL_ERROR; } VarManager::Instance(session_id)->RemoveChangedGraphId(node->GetName()); @@ -527,7 +541,7 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, }, node, session_id, context, graph_id, ErrorManager::GetInstance().GetErrorManagerContext()); if (!f.valid()) { - GELOGE(FAILED, "Future is invalid"); + GELOGE(FAILED, "[Check][Param] Future is invalid, session id:%lu, graph id:%u", session_id, graph_id); return FAILED; } vector_future.push_back(std::move(f)); @@ -537,7 +551,7 @@ Status TransVarDataUtils::TransAllVarData(const vector &variable_nodes, for (size_t i = 0; i < vector_future.size(); ++i) { ret_status = vector_future[i].get(); if (ret_status != SUCCESS) { - GELOGE(ret_status, "TransAllVarData:: trans %zu vardata failed", i); + GELOGE(ret_status, "[Check][Param] trans %zu vardata failed", i); return ret_status; } } @@ -550,7 +564,8 @@ Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint if (compute_graph == nullptr) { REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, session_id:%lu, device_id:%u, check invalid", session_id, device_id); - GELOGE(FAILED, "compute_graph is nullptr"); + GELOGE(FAILED, "[Check][Param] compute_graph is nullptr, session_id:%lu, device_id:%u", + session_id, device_id); return FAILED; } @@ -568,7 +583,10 @@ Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint GELOGI("current_var_node__: [%s] copy_from_var_node__: [%s].", node->GetName().c_str(), src_node->GetName().c_str()); auto ret = CopyTensorFromSrcVarNode(src_node, node, session_id, device_id); - GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(FAILED, "copy tensor failed!"); return FAILED); + GE_IF_BOOL_EXEC(ret != SUCCESS, + GELOGE(FAILED, "[Copy][Tensor] failed, src_node:%s, node:%s, session_id:%lu, device_id:%u", + src_node->GetName().c_str(), node->GetName().c_str(), session_id, device_id); + return FAILED); // only copy once (void) ge::AttrUtils::SetBool(node->GetOpDesc(), "_copy_value", true); // no need to check value } diff --git a/ge/graph/manager/util/debug.cc b/ge/graph/manager/util/debug.cc index 1dd97bc1..d20280fb 100644 --- a/ge/graph/manager/util/debug.cc +++ b/ge/graph/manager/util/debug.cc @@ -63,17 +63,15 @@ Status Debug::DumpDevMem(const char *file, const void *addr, int64_t size) { uint8_t *host_addr = nullptr; rtError_t ret = rtMallocHost(reinterpret_cast(&host_addr), size); if (ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, size:%zu, ret: 0x%X", - size, ret); - GELOGE(FAILED, "Call rt api rtMallocHost failed, ret: 0x%X", ret); + REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, size:%zu, ret:0x%X", size, ret); + GELOGE(FAILED, "[Call][RtMallocHost] failed, size:%zu, ret:0x%X", size, ret); return FAILED; } GE_MAKE_GUARD_RTMEM(host_addr); ret = rtMemcpy(host_addr, size, addr, size, RT_MEMCPY_DEVICE_TO_HOST); if (ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret: 0x%X", - size, ret); - GELOGE(FAILED, "Call rt api rtMemcpy failed, ret: 0x%X", ret); + REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", size, ret); + GELOGE(FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", size, ret); return FAILED; } diff --git a/ge/graph/manager/util/hcom_util.cc b/ge/graph/manager/util/hcom_util.cc index a30321f9..1843127f 100644 --- a/ge/graph/manager/util/hcom_util.cc +++ b/ge/graph/manager/util/hcom_util.cc @@ -28,7 +28,8 @@ Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc, std::vector &kernel_hccl_infos) { GE_CHECK_NOTNULL(op_desc); if (CheckKernelHcclInfo(op_desc, kernel_hccl_infos) != SUCCESS) { - GELOGE(PARAM_INVALID, "HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid."); + GELOGE(PARAM_INVALID, "[Check][KernelHcclInfo] failed, op:%s(%s).", + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } GELOGI("GetHcclDataType start, node[%s], opType[%s].", op_desc->GetName().c_str(), op_desc->GetType().c_str()); @@ -40,10 +41,10 @@ Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc, if (op_desc->GetType() == HCOMRECEIVE) { bool ret = ge::AttrUtils::GetDataType(op_desc, HCOM_ATTR_DATA_TYPE, src_data_type); if (ret == false) { - REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", - HCOM_ATTR_DATA_TYPE.c_str(), + REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", HCOM_ATTR_DATA_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, "op:HcomReceive, op desc no attr: dtype."); + GELOGE(PARAM_INVALID, "[Get][Attr] %s in op:%s(%s) fail", HCOM_ATTR_DATA_TYPE.c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } } else { @@ -55,13 +56,11 @@ Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc, auto iter = kConstOpHcclDataType.find(static_cast(src_data_type)); if (iter == kConstOpHcclDataType.end()) { REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s), value data_type:%s, not support in kConstOpHcclDataType now, " - "check invalid", HCOM_ATTR_DATA_TYPE.c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str(), - ge::TypeUtils::DataTypeToSerialString(src_data_type).c_str()); - GELOGE(PARAM_INVALID, - "HcomOmeUtil:: Node: %s Optype: %s HcomDataType cann't support! Current Davinci Data Type : %s", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), - ge::TypeUtils::DataTypeToSerialString(src_data_type).c_str()); + "check invalid", HCOM_ATTR_DATA_TYPE.c_str(), op_desc->GetName().c_str(), + op_desc->GetType().c_str(), ge::TypeUtils::DataTypeToSerialString(src_data_type).c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in op:%s(%s), value data_type:%s, " + "not support in kConstOpHcclDataType now", HCOM_ATTR_DATA_TYPE.c_str(), op_desc->GetName().c_str(), + op_desc->GetType().c_str(), ge::TypeUtils::DataTypeToSerialString(src_data_type).c_str()); return PARAM_INVALID; } @@ -73,7 +72,7 @@ Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc, Status HcomOmeUtil::GetHcclTypeSize(HcclDataType data_type, int32_t &size) { auto iter = kConstOpHcclDataTypeSize.find(data_type); GE_CHK_BOOL_EXEC(iter != kConstOpHcclDataTypeSize.end(), return PARAM_INVALID, - "HcomOmeUtil::HcomDataTypeSize , No DataTypeSize!"); + "[Check][Param] param data_type:%d not find", data_type); size = iter->second; return SUCCESS; @@ -83,21 +82,22 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType int &count) { GE_CHECK_NOTNULL(op_desc); if (!IsHCOMOp(op_desc->GetType())) { - REPORT_INNER_ERROR("E19999", "Op:%s(%s) is not hcom op, check invalid", - op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, "HcomOmeUtil:: operator is not Hcom operator."); + REPORT_INNER_ERROR("E19999", "Op:%s(%s) is not hcom op, check invalid", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] Op:%s(%s) is not hcom op", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); return PARAM_INVALID; } int64_t total_size = 0; int64_t align_size = 512; int32_t size = 0; - GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclTypeSize(data_type, size), "GetHcomCount: GetHcclTypeSize fail!"); + GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclTypeSize(data_type, size), "[Get][HcclTypeSize] fail, datatype:%d", data_type); if (op_desc->GetType() == HCOMRECEIVE) { for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) { int64_t output_size = 0; GE_CHECK_NOTNULL(op_desc->GetOutputDescPtr(i)); GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetOutputDescPtr(i), output_size), - "Get size from TensorDesc failed, op: %s, output index: %zu.", op_desc->GetName().c_str(), i); + "[Get][Size] from TensorDesc failed, op:%s, output index:%zu.", op_desc->GetName().c_str(), i); output_size = (output_size + align_size - 1) / align_size * align_size; total_size += output_size; } @@ -107,42 +107,48 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType int64_t block_size = 0; GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(i)); GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetInputDescPtr(i), input_size), - "get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); + "[Get][Size] from TensorDesc failed, op:%s, input index:%zu", op_desc->GetName().c_str(), i); // dynamic shape hccl op get size from output tensor desc if (op_desc->HasAttr(ATTR_NAME_IS_UNKNOWN_SHAPE)) { GE_CHECK_NOTNULL(op_desc->GetOutputDescPtr(i)); GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetOutputDescPtr(i), input_size), - "get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); + "[Get][Size] from TensorDesc failed, op:%s, input index:%zu", op_desc->GetName().c_str(), i); } GE_IF_BOOL_EXEC( op_desc->GetType() == HCOMREDUCESCATTER, int32_t rank_size = 0; GE_CHK_BOOL_RET_STATUS(ge::AttrUtils::GetInt(op_desc, HCOM_ATTR_RANK_SIZE, rank_size), PARAM_INVALID, - "get HCOM_ATTR_RANK_SIZE failed"); - GE_CHK_BOOL_RET_STATUS(rank_size != 0, PARAM_INVALID, "rank size is zero"); - int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); GE_CHK_STATUS_RET( - ge::CheckInt64Uint32MulOverflow(shape_size, size), "Product of shape size and size beyond INT64_MAX"); + "[Get][Attr] %s in op:%s(%s) failed", HCOM_ATTR_RANK_SIZE.c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str()); + GE_CHK_BOOL_RET_STATUS(rank_size != 0, PARAM_INVALID, "[Check][Param] rank size is zero"); + int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); + GE_CHK_STATUS_RET(ge::CheckInt64Uint32MulOverflow(shape_size, size), + "[Check][Param] Product of shape size:%ld and size:%d beyond INT64_MAX, op:%s(%s)", + shape_size, size, op_desc->GetName().c_str(), op_desc->GetType().c_str()); block_size = (shape_size * size) / rank_size; - GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), "Total size is beyond the INT64_MAX"); + GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), + "[Check][Param] Total size:%ld is beyond the INT64_MAX, op:%s(%s)", + total_size, op_desc->GetName().c_str(), op_desc->GetType().c_str()); total_size = total_size + block_size; continue;); int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); GELOGD("hcom util node %s inputsize %ld, shapesize %ld, datasize %d.", op_desc->GetName().c_str(), input_size, shape_size, size); GE_CHK_STATUS_RET(ge::CheckInt64Int32MulOverflow(shape_size, size), - "Product of shape size and size beyond INT64_MAX"); + "[Check][Param] Product of shape size:%ld and size:%d beyond INT64_MAX", shape_size, size); GE_IF_BOOL_EXEC(is_allgather, block_size = shape_size * size;); GE_IF_BOOL_EXEC(!is_allgather, block_size = (input_size + align_size - 1) / align_size * align_size;); - GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), "Total size is beyond the INT64_MAX"); + GE_CHK_STATUS_RET(ge::CheckInt64AddOverflow(total_size, block_size), + "[Check][Param] Total size:%ld is beyond the INT64_MAX", total_size); total_size = total_size + block_size; } } - GE_CHK_BOOL_RET_STATUS(size != 0, PARAM_INVALID, "Size is zero"); + GE_CHK_BOOL_RET_STATUS(size != 0, PARAM_INVALID, "[Check][Param] Size is zero"); count = static_cast(total_size / size); - GE_CHK_BOOL_EXEC(total_size % size == 0, return PARAM_INVALID, "total_size:%ld is not divisiable by size:%d.", - total_size, size); + GE_CHK_BOOL_EXEC(total_size % size == 0, return PARAM_INVALID, + "[Check][Param] total_size:%ld is not divisiable by size:%d.", total_size, size); return SUCCESS; } @@ -153,32 +159,34 @@ Status HcomOmeUtil::GetHorovodCount(const ge::ConstOpDescPtr &op_desc, if (!IsHorovodOp(op_desc->GetType())) { REPORT_INNER_ERROR("E19999", "Op:%s(%s) is not horovod op, check invalid", op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, "HcomOmeUtil:: operator is not Horovod operator."); + GELOGE(PARAM_INVALID, "[Call][IsHorovodOp] failed, Op:%s(%s) is not horovod op", + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } int64_t align_size = 512; int32_t size = 0; for (size_t i = 0; i < op_desc->GetInputsSize(); i++) { GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclTypeSize(static_cast(kernel_hccl_infos[i].dataType), size), - "GetHorovodCount: GetHcclTypeSize fail!"); + "[Call][GetHcclTypeSize] fail, op:%s(%s)", + op_desc->GetName().c_str(), op_desc->GetType().c_str()); int64_t input_size = 0; int64_t block_size = 0; GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(i)); GE_CHK_STATUS_RET(ge::TensorUtils::GetSize(*op_desc->GetInputDescPtr(i), input_size), - "get size from TensorDesc failed, op : %s, input index : %zu", op_desc->GetName().c_str(), i); + "[Get][Size] from TensorDesc failed, op:%s, input index:%zu", op_desc->GetName().c_str(), i); int64_t shape_size = op_desc->GetInputDescPtr(i)->GetShape().GetShapeSize(); GE_CHK_STATUS_RET(ge::CheckInt64Int32MulOverflow(shape_size, size), - "Product of shape size and size beyond INT64_MAX"); + "[Check][Param] Product of shape size:%ld and size:%d beyond INT64_MAX", shape_size, size); if (kernel_hccl_infos[0].hccl_type == HVDCALLBACKALLGATHER) { block_size = shape_size * size; } else { block_size = (input_size + align_size - 1) / align_size * align_size; } - GE_CHK_BOOL_RET_STATUS(size != 0, PARAM_INVALID, "Size is zero"); - GE_CHK_BOOL_EXEC(block_size % size == 0, return PARAM_INVALID, "block_size:%ld is not divisiable by size:%d.", - block_size, size); + GE_CHK_BOOL_RET_STATUS(size != 0, PARAM_INVALID, "[Check][Param] Size is zero"); + GE_CHK_BOOL_EXEC(block_size % size == 0, return PARAM_INVALID, + "[Check][Param] block_size:%ld is not divisiable by size:%d.", block_size, size); kernel_hccl_infos[i].count = static_cast(block_size / size); } @@ -191,7 +199,8 @@ Status HcomOmeUtil::GetHcclCount(const ge::ConstOpDescPtr &op_desc, Status ret; ret = CheckKernelHcclInfo(op_desc, kernel_hccl_infos); if (ret != SUCCESS) { - GELOGE(PARAM_INVALID, "HcomOmeUtil:: the number of GETaskKernelHcclInfo is invalid."); + GELOGE(PARAM_INVALID, "[Check][KernelHcclInfo] failed, the number of GETaskKernelHcclInfo is invalid, op:%s(%s).", + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } GELOGI("GetHcclCount start, node[%s], opType[%s].", op_desc->GetName().c_str(), op_desc->GetType().c_str()); @@ -200,7 +209,7 @@ Status HcomOmeUtil::GetHcclCount(const ge::ConstOpDescPtr &op_desc, ret = GetHcomCount(op_desc, static_cast(kernel_hccl_infos[0].dataType), kernel_hccl_infos[0].hccl_type == HCOMALLGATHER, count); if (ret != SUCCESS) { - GELOGE(ret, "HcomOmeUtil:: Node: %s Optype: %s get the Hcom operator hccl count fail.", + GELOGE(ret, "[Call][GetHcomCount] Node:%s Optype:%s get the Hcom operator hccl count fail.", op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } @@ -210,7 +219,7 @@ Status HcomOmeUtil::GetHcclCount(const ge::ConstOpDescPtr &op_desc, if (IsHorovodOp(op_desc->GetType())) { ret = GetHorovodCount(op_desc, kernel_hccl_infos); if (ret != SUCCESS) { - GELOGE(PARAM_INVALID, "HcomOmeUtil:: Node: %s Optype: %s get the Horovod hccl operator count fail.", + GELOGE(PARAM_INVALID, "[Call][GetHorovodCount] Node:%s Optype:%s get the Horovod hccl operator count fail.", op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } @@ -225,11 +234,10 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl if (IsHCOMOp(op_desc->GetType())) { std::string hcom_op_type; GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(op_desc, HCOM_ATTR_REDUCE_TYPE, hcom_op_type), - REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", - HCOM_ATTR_REDUCE_TYPE.c_str(), + REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail", HCOM_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID, - "HcomOmeUtil:: Node: %s Optype: %s Get HCOM_ATTR_REDUCE_TYPE fail, not support!", + "[Get][Attr] %s in op:%s(%s) fail", HCOM_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); if (hcom_op_type == "min") { @@ -244,7 +252,9 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl REPORT_INNER_ERROR("E19999", "Attr:%s in Op:%s(%s), hcom_op_type value:%s is not support now, " "check invalid", HCOM_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), hcom_op_type.c_str()); - GELOGE(PARAM_INVALID, "HcomOmeUtil::Get HCOM_ATTR_REDUCE_TYPE fail, [%s] not support!", hcom_op_type.c_str()); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in Op:%s(%s), hcom_op_type value:%s is not support now", + HCOM_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), + op_desc->GetType().c_str(), hcom_op_type.c_str()); return PARAM_INVALID; } } @@ -256,7 +266,7 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl ATTR_HOROVOD_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID, - "HcomOmeUtil:: Node: %s Optype: %s Get ATTR_HOROVOD_ATTR_REDUCE_TYPE fail, not support!", + "[Get][Attr] %s in op:%s(%s) fail", ATTR_HOROVOD_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); auto iter = kHorovodRedOpToHcclRedOp.find(static_cast(horovod_op_type)); @@ -264,8 +274,8 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl REPORT_INNER_ERROR("E19999", "Attr:%s in Op:%s(%s), horovod_op_type value:%ld is not support now, " "check invalid", ATTR_HOROVOD_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), horovod_op_type); - GELOGE(PARAM_INVALID, "HcomOmeUtil:: Node: %s Optype: %s HcomOpType cann't support! Current HcomOpType : %ld", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), horovod_op_type); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s in Op:%s(%s), horovod_op_type value:%ld is not support now", + ATTR_HOROVOD_ATTR_REDUCE_TYPE.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), horovod_op_type); return PARAM_INVALID; } op_type = iter->second; @@ -281,7 +291,7 @@ Status HcomOmeUtil::GetHcclRootId(const ge::ConstOpDescPtr &op_desc, int64_t &ro HCOM_ATTR_ROOT_RANK.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID, - "HcomOmeUtil::Node %s Optype: %s Get HCOM_ATTR_ROOT_INDEX fail, not support!", + "[Get][Attr] %s in op:%s(%s) fail", HCOM_ATTR_ROOT_RANK.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); return SUCCESS; @@ -296,7 +306,7 @@ Status HcomOmeUtil::GetAllRootId(const ge::ConstOpDescPtr &op_desc, int64_t root_id = 0; Status dmrt = GetHcclRootId(op_desc, root_id); if (dmrt != SUCCESS) { - GELOGE(FAILED, "davinci_model: GetHcomRootId fail! domi error: %u", dmrt); + GELOGE(FAILED, "[Get][HcclRootId] fail! domi error: %u", dmrt); return FAILED; } @@ -324,7 +334,8 @@ Status HcomOmeUtil::CheckKernelHcclInfo(const ge::ConstOpDescPtr &op_desc, REPORT_INNER_ERROR("E19999", "Op:%s(%s) is not hcom op or param kernel_hccl_infos.size:%zu != 1, " "check invalid", op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_hccl_infos.size()); - GELOGE(PARAM_INVALID, "HcomOmeUtil:: in Hcom scenario, the number of GETaskKernelHcclInfo is invalid."); + GELOGE(PARAM_INVALID, "[Check][Param] Op:%s(%s) is not hcom op or param kernel_hccl_infos.size:%zu != 1", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_hccl_infos.size()); return PARAM_INVALID; } @@ -337,7 +348,9 @@ Status HcomOmeUtil::CheckKernelHcclInfo(const ge::ConstOpDescPtr &op_desc, "in op:%s(%s), check invalid", kernel_hccl_infos.size(), op_desc->GetInputsSize(), op_desc->GetName().c_str(), op_desc->GetType().c_str()); - GELOGE(PARAM_INVALID, "HcomOmeUtil:: in Horovod scenario, the number of GETaskKernelHcclInfo is invalid."); + GELOGE(PARAM_INVALID, "Param kernel_hccl_infos.size:%zu is empty or not equal to " + "input_desc size:%zu in op:%s(%s)", kernel_hccl_infos.size(), op_desc->GetInputsSize(), + op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } } @@ -360,7 +373,7 @@ Status HcomOmeUtil::GetHorovodInputs(const ge::ConstOpDescPtr &op_desc, } if (CheckKernelHcclInfo(op_desc, kernel_hccl_infos) != SUCCESS) { - GELOGE(PARAM_INVALID, "HcomOmeUtil:: Node: %s Optype: %s the number of GETaskKernelHcclInfo is invalid.", + GELOGE(PARAM_INVALID, "[Check][KernelHcclInfo] Node:%s Optype:%s the number of GETaskKernelHcclInfo is invalid.", op_desc->GetName().c_str(), op_desc->GetType().c_str()); return PARAM_INVALID; } diff --git a/ge/graph/manager/util/variable_accelerate_ctrl.cc b/ge/graph/manager/util/variable_accelerate_ctrl.cc index 22f9169c..12ec5a49 100644 --- a/ge/graph/manager/util/variable_accelerate_ctrl.cc +++ b/ge/graph/manager/util/variable_accelerate_ctrl.cc @@ -54,7 +54,7 @@ void VarAccelerateCtrl::SetVarChanged(const std::string &var_name) { void VarAccelerateCtrl::AddGraph(uint32_t graph_id, const ComputeGraphPtr &compute_graph) { std::lock_guard lock(mutex_); if (compute_graph == nullptr) { - GELOGE(PARAM_INVALID, "Failed to add graph %u, the compute graph is null", graph_id); + GELOGE(PARAM_INVALID, "[Check][Param] Failed to add graph %u, the compute graph is null", graph_id); return; } auto &var_names = graph_ids_to_var_names_[graph_id]; diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 516d06d1..8fee1eb5 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -46,11 +46,6 @@ #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__) namespace ge { -namespace { -const std::set kControlFlowOps{ - STREAMACTIVE, STREAMSWITCH, STREAMMERGE, ENTER, REFENTER, LOOPCOND, NEXTITERATION, REFNEXTITERATION, EXIT, REFEXIT -}; -} using Cluster = DynamicShapePartitioner::Cluster; using ClusterPtr = std::shared_ptr; @@ -279,9 +274,17 @@ Status DynamicShapePartitioner::InitClusters() { auto cluster = MakeShared(rank++, type, node, this); REQUIRE_NOT_NULL(cluster, "Failed new memory for cluster."); node_2_cluster_[node] = cluster; - if (cluster->IsUnknownShape() && !cluster->IsControlFlow()) { + if (cluster->IsUnknownShape()) { ordered_cluster_.push_back(cluster); } + + int64_t group_index = -1; + if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index)) { + GELOGD("[%s] is rts control flow Op, group index: %ld", node->GetName().c_str(), group_index); + auto &control_cluster = control_clusters_[group_index]; + control_cluster.emplace_back(cluster); + } + // Already sorted topologically, so access to the parent cluster is safe for (const auto &parent : node->GetInAllNodes()) { cluster->AddInput(node_2_cluster_[parent]); @@ -350,14 +353,38 @@ static std::string ToString(const std::vector &clusters) { } } +void DynamicShapePartitioner::MergeClustersControlFlow() { + for (const auto &item : control_clusters_) { + const auto &control_cluster = item.second; + auto rit = control_cluster.rbegin(); + if (rit == control_cluster.rend()) { + GELOGW("Invalid empty control flow cluster."); + continue; + } + + const auto &cluster = *rit; + for (++rit; rit != control_cluster.rend(); ++rit) { + const auto &cluster_from = *rit; + auto merged_clusters = cluster->MergeAllPathFrom(cluster_from); + GELOGD("Merge all path cluster from %lu to %lu %s.", cluster_from->Id(), cluster->Id(), + ToString(merged_clusters).c_str()); + for (const auto &merged_cluster : merged_clusters) { + for (const auto &node : merged_cluster->Nodes()) { + node_2_cluster_[node] = cluster; + } + } + } + } +} + void DynamicShapePartitioner::MergeClustersUnknownShape() { // Merge unknown shape clusters for (const auto &cluster : ordered_cluster_) { - if (cluster->IsIndependent() || cluster->IsControlFlow()) { + if (cluster->IsIndependent()) { continue; } for (const auto &in_cluster : cluster->Inputs()) { - if (!in_cluster->IsUnknownShape() || in_cluster->IsControlFlow()) { + if (!in_cluster->IsUnknownShape()) { continue; } auto merged_clusters = cluster->MergeAllPathFrom(in_cluster); @@ -419,6 +446,7 @@ void DynamicShapePartitioner::MergeClustersInputData() { } Status DynamicShapePartitioner::MergeClusters() { + MergeClustersControlFlow(); MergeClustersUnknownShape(); REQUIRE_SUCCESS(TopologicalSortClusters(), "Failed topological sort clusters after merge unknown shape clusters."); MergeClustersKnownShape(); @@ -608,13 +636,6 @@ bool Cluster::IsRefVariable() const { return false; } -bool Cluster::IsControlFlow() const { - const auto &op_desc = nodes_[0]->GetOpDesc(); - bool is_ctrl_flow = kControlFlowOps.count(op_desc->GetType()) > 0 && op_desc->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE); - GELOGD("[%s] %s rts control flow Op ", op_desc->GetName().c_str(), is_ctrl_flow ? "Is" : "Not"); - return is_ctrl_flow; -} - void Cluster::AddInput(ClusterPtr in) { if (std::find(in_clusters_.begin(), in_clusters_.end(), in) != in_clusters_.end()) return; in_clusters_.insert(in_clusters_.end(), in); @@ -694,10 +715,7 @@ std::vector Cluster::MergeAllPathFrom(ClusterPtr other) { if (other->IsIndependent()) { return path_clusters; } - if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) == - other->out_clusters_.end()) { - return path_clusters; - } + path_clusters.push_back(other); forward_reached_queue.push(other); backward_reached_queue.push(shared_from_this()); @@ -761,7 +779,7 @@ InControlAnchorPtr Cluster::GetFrameInControlAnchor() { return partition_node_-> OutControlAnchorPtr Cluster::GetFrameOutControlAnchor() { return partition_node_->GetOutControlAnchor(); }; Status Cluster::BuildFrame() { - if ((IsUnknownShape() || IsKnownShape() || IsInputNode()) && !IsControlFlow()) { + if (IsUnknownShape() || IsKnownShape() || IsInputNode()) { return BuildPartitionFrame(); } else { auto node = nodes_.front(); @@ -896,7 +914,7 @@ Status Cluster::CombinePartitionFrame() { } Status Cluster::BuildPartitionSubgraph() { - if (IsData() || IsNetOutput() || IsIndependent() || IsControlFlow()) { + if (IsData() || IsNetOutput() || IsIndependent()) { return SUCCESS; } int64_t parent_node_index = 0; diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index 93f86d82..f1d711eb 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -47,7 +47,6 @@ class DynamicShapePartitioner { bool IsUnknownShape() const; bool IsIndependent() const; bool IsNetOutput() const; - bool IsControlFlow() const; std::vector> Inputs() const; std::vector> Outputs() const; bool IsInputNode() const; @@ -126,13 +125,15 @@ class DynamicShapePartitioner { // and there's only one path between the two clusters , merge the two clusters // 3) Iterate through the INPUT_DATA clusters, merge all INPUT_DATA Status MergeClusters(); + // Merge clusters step0 + void MergeClustersControlFlow(); // Merge clusters step1 void MergeClustersUnknownShape(); // Merge clusters step2 void MergeClustersKnownShape(); // Merge clusters step3 void MergeClustersInputData(); - // Topological sort clusters after merge unknow shape clusters. + // Topological sort clusters after merge unknown shape clusters. Status TopologicalSortClusters(); // Deduplicate merged clusters void PruneUniqueClusters(); @@ -140,7 +141,7 @@ class DynamicShapePartitioner { Status BuildPartitionFrame(); // Establish connection between corresponding partitioned of clusters Status CombinePartitionFrame(); - // Convert the nodes in cluster into a complete ComputeGraoh + // Convert the nodes in cluster into a complete ComputeGraph Status BuildPartitionSubgraph(); // Clear resource and break circular dependency void ClearResource(); @@ -155,6 +156,8 @@ class DynamicShapePartitioner { Status CtrlEdgeTransfer(); ge::ComputeGraphPtr root_graph_; // The original graph to partition std::unordered_map> node_2_cluster_; // Record nodes and the cluster it belongs to + // V1 control flow cluster, need merge to one Graph. + std::unordered_map>> control_clusters_; // topological sorted clusters, this field will change with the splitting. // When partitioning UNKNOWN_SHAPE cluster, it is a collection of all topological sorted UNKNOWN_SHAPE clusters // When partitioning KNOWN_SHAPE cluster, it is a collection of all topological sorted KNOWN_SHAPE clusters diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 0868b729..2f94c6ad 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -36,6 +36,8 @@ struct DuringPassNodeSets { std::unordered_set nodes_re_pass; std::unordered_set nodes_re_pass_immediately; std::unordered_set nodes_last; + std::unordered_set nodes_suspend; + std::unordered_set nodes_resume; }; void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque &input_edge_nodes, @@ -55,8 +57,15 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque &i } } +bool IsAllInNodesAlive(const Node::Vistor &nodes, const std::unordered_set &nodes_suspend) { + return !std::any_of(nodes.begin(), nodes.end(), [&](const NodePtr &n) { return nodes_suspend.count(n) > 0; }); +} + void AddNextIterNodes(const Node::Vistor &nodes, std::deque &nodes_to_pass, - std::unordered_set &nodes_seen, std::unordered_set &nodes_last) { + DuringPassNodeSets &during_pass_node_set) { + auto &nodes_seen = during_pass_node_set.nodes_seen; + const auto &nodes_last = during_pass_node_set.nodes_last; + const auto &nodes_suspend = during_pass_node_set.nodes_suspend; for (auto &node : nodes) { if (node == nullptr) { continue; @@ -64,16 +73,57 @@ void AddNextIterNodes(const Node::Vistor &nodes, std::deque &n if (nodes_last.count(node) != 0) { continue; } + if (nodes_suspend.count(node) > 0) { + GELOGD("The node %s has suspend by pass, skip it.", node->GetName().c_str()); + continue; + } + bool all_in_nodes_alive = IsAllInNodesAlive(node->GetInAllNodes(), nodes_suspend); bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen); - if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) { + if (all_in_nodes_seen && all_in_nodes_alive && nodes_seen.insert(node.get()).second) { nodes_to_pass.push_back(node); } } } +void AddRepassNodes(DuringPassNodeSets &during_pass_node_set, std::deque &nodes) { + for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) { + GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str()); + nodes.push_front(node); + } + during_pass_node_set.nodes_re_pass_immediately.clear(); +} + +void AddResumeNodes(DuringPassNodeSets &during_pass_node_set, std::deque &nodes) { + for (auto &node : during_pass_node_set.nodes_resume) { + const auto &it = during_pass_node_set.nodes_suspend.find(node); + if (it != during_pass_node_set.nodes_suspend.end()) { + during_pass_node_set.nodes_suspend.erase(node); + GELOGD("The node %s resumed by pass.", node->GetName().c_str()); + nodes.push_back(node); + } else { + GELOGW("The node %s not suspend, drop from resumed", node->GetName().c_str()); + } + } + during_pass_node_set.nodes_resume.clear(); +} + +void PushToSuspendNodes(DuringPassNodeSets &during_pass_node_set, const std::string &pass_name, + const std::unordered_set &nodes_suspend, + const std::unordered_set &nodes_resume) { + for (const auto &node : nodes_suspend) { + GELOGD("The iteration suspend of node %s has been set by pass %s", node->GetName().c_str(), pass_name.c_str()); + during_pass_node_set.nodes_suspend.emplace(node); + } + + for (const auto &node : nodes_resume) { + GELOGD("The iteration suspend of node %s has been resumed by pass %s", node->GetName().c_str(), pass_name.c_str()); + during_pass_node_set.nodes_resume.emplace(node); + } +} + void PushToRePassIfSeen(NodePtr &node, const std::pair &name_to_pass, - std::unordered_set &nodes_seen, std::unordered_set &nodes_to_re_pass, + std::unordered_set &nodes_seen, const std::unordered_set &nodes_to_re_pass, std::unordered_set &nodes_re_pass) { for (const auto &node_to_re_pass : nodes_to_re_pass) { if (node_to_re_pass == nullptr) { @@ -113,15 +163,18 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNo return result; } - auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); + const auto &nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass(); PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass, during_pass_node_set.nodes_re_pass); - auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately(); + const auto &nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately(); PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately, during_pass_node_set.nodes_re_pass_immediately); - auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); + PushToSuspendNodes(during_pass_node_set, name_to_pass.first, + name_to_pass.second->GetNodesSuspend(), name_to_pass.second->GetNodesResume()); + + const auto &nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted(); during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end()); if (nodes_deleted_by_pass.count(node) > 0) { GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(), @@ -221,8 +274,13 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); continue; } + if (during_pass_node_set.nodes_suspend.count(node) > 0) { + GELOGD("The node %s has been added to suspend-iteration nodes list, the iteration of it will be suspend.", + node->GetName().c_str()); + continue; + } - AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last); + AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set); auto ret = RunPasses(node, names_to_passes, during_pass_node_set); if (ret != SUCCESS) { @@ -253,11 +311,9 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { // should be called each time at the begin of the iteration ClearOption(names_to_passes); } - for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) { - GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str()); - nodes.push_front(node); - } - during_pass_node_set.nodes_re_pass_immediately.clear(); + + AddRepassNodes(during_pass_node_set, nodes); + AddResumeNodes(during_pass_node_set, nodes); } for (auto &node : during_pass_node_set.nodes_last) { diff --git a/ge/graph/passes/base_pass.h b/ge/graph/passes/base_pass.h index a9f4f000..d0f125b2 100644 --- a/ge/graph/passes/base_pass.h +++ b/ge/graph/passes/base_pass.h @@ -51,11 +51,15 @@ class BaseNodePass { virtual ~BaseNodePass() = default; - std::unordered_set GetNodesNeedRePass() { return nodes_need_re_pass_; } + const std::unordered_set &GetNodesNeedRePass() { return nodes_need_re_pass_; } - std::unordered_set GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; } + const std::unordered_set &GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; } - std::unordered_set GetNodesDeleted() { return nodes_deleted_; } + const std::unordered_set &GetNodesDeleted() { return nodes_deleted_; } + + const std::unordered_set &GetNodesSuspend() { return nodes_suspend_; } + + const std::unordered_set &GetNodesResume() { return nodes_resume_; } void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; } @@ -65,6 +69,8 @@ class BaseNodePass { nodes_need_re_pass_.clear(); nodes_deleted_.clear(); nodes_need_re_pass_immediately_.clear(); + nodes_suspend_.clear(); + nodes_resume_.clear(); } protected: @@ -80,7 +86,7 @@ class BaseNodePass { /// optimized by other passes, call this function. /// @param node /// - void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); } + void AddRePassNode(const NodePtr &node) { nodes_need_re_pass_.insert(node); } /// /// Add a node to be optimized immediately again. If you add a new node to the graph, or @@ -88,13 +94,13 @@ class BaseNodePass { /// optimized by other passes, call this function. /// @param node /// - void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); } + void AddImmediateRePassNode(const NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); } /// /// Add a node and it's input/output data nodes to be optimized again. /// @param node /// - void AddRePassNodesWithInOut(NodePtr &node) { + void AddRePassNodesWithInOut(const NodePtr &node) { AddRePassNode(node); auto out_nodes = node->GetOutNodes(); for (auto &out_node : out_nodes) { @@ -116,12 +122,34 @@ class BaseNodePass { /// void AddNodeDeleted(const NodePtr &node) { nodes_deleted_.insert(node); } + /// + /// If you suspend a node from the graph, especially following node. The remain + /// iterate passes will stop process on the suspend node(if it can be + /// reached by edge connections) till the last one. Obviously it is a waste of + /// time. You can add the suspend nodes by calling this function, to stop the + /// next iterations. + /// @param node + /// + void AddNodeSuspend(const NodePtr &node) { nodes_suspend_.insert(node); } + + /// + /// If you resume a node from the graph, especially following node. The remain + /// iterate passes will continue process on the resume node(if it can be + /// reached by edge connections) till the last one. + /// You can add the resume nodes by calling this function, to resume the + /// next iterations. + /// @param node + /// + void AddNodeResume(const NodePtr &node) { nodes_resume_.insert(node); } + bool OptionExists(NodePassOption option) { return options_.count(option) > 0; } private: std::unordered_set nodes_need_re_pass_; std::unordered_set nodes_need_re_pass_immediately_; std::unordered_set nodes_deleted_; + std::unordered_set nodes_suspend_; + std::unordered_set nodes_resume_; std::map options_; }; diff --git a/ge/graph/passes/infershape_pass.cc b/ge/graph/passes/infershape_pass.cc index 46026023..cb649240 100755 --- a/ge/graph/passes/infershape_pass.cc +++ b/ge/graph/passes/infershape_pass.cc @@ -21,6 +21,8 @@ #include "framework/common/util.h" #include "graph/shape_refiner.h" #include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/common/omg_util.h" #include "graph/debug/ge_attr_define.h" #include "utils/tensor_utils.h" #include "utils/type_utils.h" @@ -117,7 +119,9 @@ Status InferShapePass::RePassLoopNode(const NodePtr &node) { const auto RePassNode = [&](const std::set &re_pass_types) { for (auto &n : node->GetOutDataNodes()) { GE_CHECK_NOTNULL(n); - if (re_pass_types.count(n->GetType()) > 0) { + std::string node_type; + GE_CHK_STATUS_RET(GetOriginalType(n, node_type), "Get original node type failed."); + if (re_pass_types.count(node_type) > 0) { AddImmediateRePassNode(n); (void)AttrUtils::SetBool(n->GetOpDesc(), ATTR_NAME_NEED_INFER_AGAIN, false); GELOGD("Node %s need repass immediately after %s.", n->GetName().c_str(), node->GetName().c_str()); @@ -126,17 +130,44 @@ Status InferShapePass::RePassLoopNode(const NodePtr &node) { return SUCCESS; }; - if (node->GetType() == NEXTITERATION || node->GetType() == REFNEXTITERATION) { - return RePassNode({MERGE, REFMERGE}); // Re-Pass Merge + const auto ExProcNode = [&](const std::set &proc_types, + const std::function &proc_func, + const std::string &info) { + for (auto &n : node->GetOutDataNodes()) { + GE_CHECK_NOTNULL(n); + std::string node_type; + GE_CHK_STATUS_RET(GetOriginalType(n, node_type), "Get original node type failed."); + if (proc_types.count(node_type) > 0) { + proc_func(this, n); + GELOGD("Node %s %s after %s.", n->GetName().c_str(), info.c_str(), node->GetName().c_str()); + } + } + return SUCCESS; + }; + + std::string node_type; + GE_CHK_STATUS_RET(GetOriginalType(node, node_type), "Get original node type failed."); + if (kNextIterationOpTypes.count(node_type) > 0) { + return RePassNode(kMergeOpTypes); // Re-Pass Merge } - if (node->GetType() == MERGE || node->GetType() == REFMERGE) { + if (kMergeOpTypes.count(node_type) > 0) { if (node->GetOpDesc()->HasAttr(ATTR_NAME_NEED_INFER_AGAIN)) { node->GetOpDesc()->DelAttr(ATTR_NAME_NEED_INFER_AGAIN); + return RePassNode(kSwitchOpTypes); // Re-Pass Switch } return SUCCESS; } + if (kSwitchOpTypes.count(node_type) > 0) { + if (node->GetOpDesc()->HasAttr(ATTR_NAME_NEED_INFER_AGAIN)) { + node->GetOpDesc()->DelAttr(ATTR_NAME_NEED_INFER_AGAIN); + return ExProcNode(kExitOpTypes, &InferShapePass::AddNodeResume, "need resume"); // Resume Exit + } else { + return ExProcNode(kExitOpTypes, &InferShapePass::AddNodeSuspend, "need suspend"); // Suspend Exit + } + } + return SUCCESS; } } // namespace ge diff --git a/ge/graph/passes/isolated_op_remove_pass.cc b/ge/graph/passes/isolated_op_remove_pass.cc deleted file mode 100644 index 5c9093e9..00000000 --- a/ge/graph/passes/isolated_op_remove_pass.cc +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/isolated_op_remove_pass.h" - -#include "common/debug/log.h" -#include "common/types.h" -#include "common/util.h" - -namespace ge { -Status IsolatedOpRemovePass::Run(ge::ComputeGraphPtr graph) { - GE_CHECK_NOTNULL(graph); - for (NodePtr &node_ptr : graph->GetDirectNode()) { - GE_IF_BOOL_EXEC(node_ptr->GetOpDesc() == nullptr, continue); - if (node_ptr->GetInDataNodes().size() == 0 && node_ptr->GetOutAllNodes().size() == 0 && - !(node_ptr->GetOpDesc()->HasAttr(TO_BE_OUTPUT))) { - GE_RETURN_WITH_LOG_IF_ERROR(graph->RemoveNode(node_ptr), "remove graph node [%s] fail", - node_ptr->GetOpDesc()->GetName().c_str()); - } - } - - return SUCCESS; -} -} // namespace ge diff --git a/ge/graph/passes/isolated_op_remove_pass.h b/ge/graph/passes/isolated_op_remove_pass.h deleted file mode 100755 index 3b7fe7d1..00000000 --- a/ge/graph/passes/isolated_op_remove_pass.h +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_ISOLATED_OP_REMOVE_PASS_H_ -#define GE_GRAPH_PASSES_ISOLATED_OP_REMOVE_PASS_H_ - -#include "inc/graph_pass.h" - -namespace ge { -class IsolatedOpRemovePass : public GraphPass { - public: - Status Run(ge::ComputeGraphPtr graph); -}; -} // namespace ge -#endif // GE_GRAPH_PASSES_ISOLATED_OP_REMOVE_PASS_H_ diff --git a/ge/graph/passes/mark_force_unknown_for_cond_pass.cc b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc index 6729a647..f6c87d58 100644 --- a/ge/graph/passes/mark_force_unknown_for_cond_pass.cc +++ b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc @@ -18,20 +18,25 @@ #include +#include "graph/utils/node_utils.h" #include "graph/common/omg_util.h" namespace ge { namespace { -const std::set kMergeOpTypes{ MERGE, REFMERGE }; +inline bool IsMergeInLoop(const NodePtr &node) { + const static std::set kLoopMergeInputs{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION }; -const std::set kSwitchOpTypes{ SWITCH, REFSWITCH }; + std::string node_type; + (void)GetOriginalType(node, node_type); + return kLoopMergeInputs.count(node_type) > 0; +} -const std::set kLoopMergeInputs{ ENTER, REFENTER, NEXTITERATION, REFNEXTITERATION }; +inline bool IsSwitchInLoop(const NodePtr &node) { + const static std::set kLoopSwitchInputs{ MERGE, REFMERGE, LOOPCOND }; -inline bool IsMergeInLoop(const NodePtr &node) { std::string node_type; (void)GetOriginalType(node, node_type); - return kLoopMergeInputs.count(node_type) > 0; + return kLoopSwitchInputs.count(node_type) > 0; } } @@ -103,7 +108,13 @@ void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std: if (dst_span > 0) { search_queue.push({in_node, dst_span - 1}); } else { - switch_group.emplace_back(in_node); + const auto &all_in_nodes = in_node->GetInDataNodes(); + if (std::any_of(all_in_nodes.begin(), all_in_nodes.end(), IsSwitchInLoop)) { + GELOGW("Travel node: %s, %s node: %s, Skip LoopCond switch", dst_node->GetName().c_str(), node_type.c_str(), + in_node->GetName().c_str()); + } else { + switch_group.emplace_back(in_node); + } } } else if (kMergeOpTypes.count(node_type) > 0) { // Merge input node. search_queue.push({in_node, dst_span + 1}); @@ -121,19 +132,37 @@ void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std: /// void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const std::map> &switch_groups) { std::function callback = [](const NodePtr &n) { - return n->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE); + return n->GetOpDesc()->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP); }; - for (const auto &group : switch_groups) { - const auto &node = group.first; - const auto &switch_group = group.second; - const auto &op_desc = node->GetOpDesc(); - if (IsUnknownShapeTensor(op_desc->GetOutputDesc(0)) || op_desc->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE) || - std::any_of(switch_group.begin(), switch_group.end(), callback)) { - GELOGI("Mark [%s] as force unknown shape", node->GetName().c_str()); - MarkForceUnknownShape(node, true); - for (const auto &n : switch_group) { - MarkForceUnknownShape(n, true); + for (auto it1 = switch_groups.begin(); it1 != switch_groups.end(); ++it1) { + const auto &op_node1 = it1->first; + const auto &op_desc1 = op_node1->GetOpDesc(); + if (op_desc1->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP)) { + continue; + } + + if (IsUnknownShapeTensor(op_desc1->GetOutputDesc(0))) { + int64_t group_index = op_desc1->GetId(); + GELOGI("Mark %s as unknown shape control flow, group index: %ld", op_desc1->GetName().c_str(), group_index); + MarkForceUnknownShape(op_node1, true, group_index); + for (const auto &n : it1->second) { + MarkForceUnknownShape(n, true, group_index); + } + + for (auto it2 = switch_groups.begin(); it2 != switch_groups.end(); ++it2) { + const auto &op_node2 = it2->first; + const auto &op_desc2 = op_node2->GetOpDesc(); + if (op_desc2->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP)) { + continue; + } + + if (std::any_of(it2->second.begin(), it2->second.end(), callback)) { + MarkForceUnknownShape(op_node2, true, group_index); + for (const auto &n : it2->second) { + MarkForceUnknownShape(n, true, group_index); + } + } } } } diff --git a/ge/graph/passes/memcpy_addr_async_pass.cc b/ge/graph/passes/memcpy_addr_async_pass.cc index aff89f35..e8e4ebd8 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/ge/graph/passes/memcpy_addr_async_pass.cc @@ -25,15 +25,15 @@ namespace ge { Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { GE_CHECK_NOTNULL(graph); - for (const auto &node : graph->GetAllNodes()) { - if (node->GetType() == STREAMSWITCH) { - auto sub_graph = node->GetOwnerComputeGraph(); - if (sub_graph != nullptr && !sub_graph->GetGraphUnknownFlag()) { - GE_CHK_STATUS_RET(AddMemcpyAsyncNode(node), "Add memcpyasync node failed in known subgraph."); + if (graph->GetGraphUnknownFlag()) { + for (const auto &node : graph->GetAllNodes()) { + if (node->GetType() == STREAMSWITCH) { + auto sub_graph = node->GetOwnerComputeGraph(); + if (sub_graph != nullptr && !sub_graph->GetGraphUnknownFlag()) { + GE_CHK_STATUS_RET(AddMemcpyAsyncNode(node), "Add memcpyasync node failed in known subgraph."); + } } } - } - if (graph->GetGraphUnknownFlag()) { GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str()); return SUCCESS; } diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index f3a437a6..4c1ad1ae 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -84,8 +84,9 @@ Status MergeToStreamMergePass::AddActiveNodes(const ComputeGraphPtr &graph, cons GE_CHK_BOOL_EXEC(node != nullptr, REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid"); return FAILED, "Param of pre node is null."); - bool force_unknown = node->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE); - MarkForceUnknownShape(node, force_unknown); + int64_t group_index = -1; + bool force_unknown = AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); + MarkForceUnknownShape(node, force_unknown, group_index); for (const InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); @@ -102,7 +103,7 @@ Status MergeToStreamMergePass::AddActiveNodes(const ComputeGraphPtr &graph, cons GELOGE(FAILED, "SetActiveLabelList for node %s failed.", active_node->GetName().c_str()); return FAILED; } - MarkForceUnknownShape(active_node, force_unknown); + MarkForceUnknownShape(active_node, force_unknown, group_index); } return SUCCESS; diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index 5f4fc4d0..7128b3dc 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -18,6 +18,7 @@ #include "common/ge/ge_util.h" #include "graph/common/omg_util.h" +#include "graph/utils/node_utils.h" using std::string; @@ -203,6 +204,7 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { for (const auto &loop_cond_iter : loop_group_map_) { const LoopCondGroup &loop_group = *loop_cond_iter.second; const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName(); + const int64_t group_index = loop_group.loop_cond->GetOpDesc()->GetId(); GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str()); // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge @@ -223,7 +225,7 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { enter_active->GetName().c_str()); return INTERNAL_ERROR; } - MarkForceUnknownShape(enter_node, loop_group.is_unknown_shape); + MarkForceUnknownShape(enter_node, loop_group.is_unknown_shape, group_index); } for (const auto &pair : loop_cond_iter.second->merge_next_pairs) { @@ -253,8 +255,8 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { return INTERNAL_ERROR; } - MarkForceUnknownShape(next_node, loop_group.is_unknown_shape); - MarkForceUnknownShape(merge_node, loop_group.is_unknown_shape); + MarkForceUnknownShape(next_node, loop_group.is_unknown_shape, group_index); + MarkForceUnknownShape(merge_node, loop_group.is_unknown_shape, group_index); } if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) || @@ -263,10 +265,10 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { return INTERNAL_ERROR; } - MarkForceUnknownShape(loop_group.loop_cond, loop_group.is_unknown_shape); - MarkForceUnknownShape(enter_active, loop_group.is_unknown_shape); - MarkForceUnknownShape(next_active, loop_group.is_unknown_shape); - HandleSwitchExitNodes(loop_group); + MarkForceUnknownShape(loop_group.loop_cond, loop_group.is_unknown_shape, group_index); + MarkForceUnknownShape(enter_active, loop_group.is_unknown_shape, group_index); + MarkForceUnknownShape(next_active, loop_group.is_unknown_shape, group_index); + HandleSwitchExitNodes(loop_group, group_index); } return SUCCESS; @@ -275,20 +277,21 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { /// /// @brief Mark force unknown for Exit node /// @param [in] group of LoopCond +/// @param [in] index of LoopCond Node /// @return void /// -void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group) { +void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group, int64_t group_index) { if (!loop_group.is_unknown_shape) { return; } for (const auto &switch_node : loop_group.switch_nodes) { - MarkForceUnknownShape(switch_node, loop_group.is_unknown_shape); + MarkForceUnknownShape(switch_node, loop_group.is_unknown_shape, group_index); for (const auto &node : switch_node->GetOutDataNodes()) { std::string node_type; (void)GetOriginalType(node, node_type); - if (node_type == EXIT || node_type == REFEXIT) { - MarkForceUnknownShape(node, loop_group.is_unknown_shape); + if (kExitOpTypes.count(node_type) > 0) { + MarkForceUnknownShape(node, loop_group.is_unknown_shape, group_index); } } } diff --git a/ge/graph/passes/next_iteration_pass.h b/ge/graph/passes/next_iteration_pass.h index e8786516..b6a0846d 100755 --- a/ge/graph/passes/next_iteration_pass.h +++ b/ge/graph/passes/next_iteration_pass.h @@ -96,9 +96,10 @@ class NextIterationPass : public GraphPass { /// /// @brief Mark force unknown for Exit node /// @param [in] group of LoopCond + /// @param [in] index of LoopCond Node /// @return void /// - void HandleSwitchExitNodes(const LoopCondGroup &loop_group); + void HandleSwitchExitNodes(const LoopCondGroup &loop_group, int64_t group_index); // map std::unordered_map loop_group_map_; diff --git a/ge/graph/passes/remove_nodes_pass.cc b/ge/graph/passes/remove_nodes_pass.cc deleted file mode 100644 index c238f003..00000000 --- a/ge/graph/passes/remove_nodes_pass.cc +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "remove_nodes_pass.h" -#include "debug/ge_log.h" -#include "inc/framework/common/util.h" -#include "inc/graph/utils/node_utils.h" - -namespace ge { -Status RemoveNodesPass::Run(NodePtr &node) { - GE_CHECK_NOTNULL(node); - auto node_type = NodeUtils::GetNodeType(*node); - auto type_iter = remove_node_types_to_arg_.find(node_type); - if (type_iter != remove_node_types_to_arg_.end()) { - GELOGI("Remove node %s by type %s", node->GetName().c_str(), node_type.c_str()); - return IsolateAndDeleteNode(node, type_iter->second); - } - for (const auto &attr_name_to_arg : remove_node_attr_names_to_arg_) { - if (AttrUtils::HasAttr(node->GetOpDesc(), attr_name_to_arg.first)) { - GELOGI("Remove node %s by attr name %s", node->GetName().c_str(), attr_name_to_arg.first.c_str()); - return IsolateAndDeleteNode(node, attr_name_to_arg.second); - } - } - - return SUCCESS; -} -RemoveNodesPass &RemoveNodesPass::AddNodeType(const string &node_type, std::initializer_list arg) { - remove_node_types_to_arg_[node_type] = std::move(arg); - return *this; -} -RemoveNodesPass &RemoveNodesPass::AddAttrName(const string &attr_name, std::initializer_list arg) { - remove_node_attr_names_to_arg_[attr_name] = std::move(arg); - return *this; -} -} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/remove_nodes_pass.h b/ge/graph/passes/remove_nodes_pass.h deleted file mode 100644 index 1d4fced9..00000000 --- a/ge/graph/passes/remove_nodes_pass.h +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef GE_REMOVE_NODES_PASS_H_ -#define GE_REMOVE_NODES_PASS_H_ -#include "graph/passes/base_pass.h" - -namespace ge { -class RemoveNodesPass : public BaseNodePass { - public: - Status Run(NodePtr &node) override; - RemoveNodesPass &AddNodeType(const std::string &node_type, std::initializer_list arg = {0}); - RemoveNodesPass &AddAttrName(const std::string &attr_name, std::initializer_list arg = {0}); - - private: - std::map> remove_node_types_to_arg_; - std::map> remove_node_attr_names_to_arg_; -}; -} // namespace ge -#endif //GE_REMOVE_NODES_PASS_H_ diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index b931eea8..401dee54 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -464,8 +464,8 @@ Status SubgraphPass::InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDat GE_CHECK_NOTNULL(out_anchor); NodePtr in_node = out_anchor->GetOwnerNode(); OpDescBuilder op_desc_builder(name, IDENTITY); - OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0)) - .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0)) + OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(out_anchor->GetIdx())) + .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(out_anchor->GetIdx())) .Build(); (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false); (void)AttrUtils::SetBool(op_desc, ATTR_NAME_CANNOT_BE_DELETED, true); diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index 949fff41..66a60ab9 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -369,7 +369,9 @@ NodePtr SwitchToStreamSwitchPass::CreateStreamSwitchNode(const ComputeGraphPtr & GE_CHK_STATUS(GraphUtils::AddEdge(peer_cond_anchor, stream_switch->GetInDataAnchor(0)), "StreamSwitch node add cond edge failed."); - MarkForceUnknownShape(stream_switch, switch_node->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE)); + int64_t group_index = -1; + bool force_unknown = AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); + MarkForceUnknownShape(stream_switch, force_unknown, group_index); return stream_switch; } @@ -488,11 +490,12 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) return FAILED; } - std::function callback = [](const NodePtr &n) { - return n->GetOpDesc()->HasAttr(ATTR_NAME_FORCE_UNKNOWN_SHAPE); + int64_t group_index = -1; + std::function callback = [&group_index](const NodePtr &n) { + return AttrUtils::GetInt(n->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); }; bool is_unknown_shape = std::any_of(same_cond_switch.begin(), same_cond_switch.end(), callback); - MarkForceUnknownShape(active_node, is_unknown_shape); + MarkForceUnknownShape(active_node, is_unknown_shape, group_index); const std::string &cond_group = cond_node->GetName(); for (uint32_t i = 0; i < SWITCH_OUTPUT_NUM; ++i) { @@ -522,7 +525,7 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) GE_CHK_STATUS(GraphUtils::AddEdge(cast_node->GetOutDataAnchor(0), stream_switch->GetInDataAnchor(0)), "Cast add data edge failed."); - MarkForceUnknownShape(stream_switch, is_unknown_shape); + MarkForceUnknownShape(stream_switch, is_unknown_shape, group_index); for (const NodePtr &node : switch_list) { GE_IF_BOOL_EXEC(node != stream_switch, { GE_CHK_STATUS(GraphUtils::RemoveEdge(peer_cond_anchor, node->GetInDataAnchor(0)), diff --git a/ge/graph/passes/unused_op_remove_pass.cc b/ge/graph/passes/unused_op_remove_pass.cc deleted file mode 100644 index 41f7c828..00000000 --- a/ge/graph/passes/unused_op_remove_pass.cc +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/unused_op_remove_pass.h" -#include -#include -#include -#include -#include "common/debug/log.h" -#include "common/op/ge_op_utils.h" -#include "common/types.h" -#include "common/util.h" -#include "graph/utils/attr_utils.h" -#include "graph/utils/graph_utils.h" -#include "graph/utils/op_desc_utils.h" -#include "inc/pass_manager.h" -#include "graph/passes/isolated_op_remove_pass.h" - -using domi::SUCCESS; - -namespace ge { -const std::set kRemoveOpSet = {DROPOUT, PERMUTE, UNUSEDCONST, ASSERT}; -const std::set kOtherRemoveOpSet = {DROPOUT}; - -Status UnusedOpRemovePass::Run(ComputeGraphPtr graph) { - GE_CHECK_NOTNULL(graph); - std::set remove_op_set; - vector nodes_to_be_deleted; - if (fmktype_ == TENSORFLOW) { - remove_op_set = kRemoveOpSet; - } else { - remove_op_set = kOtherRemoveOpSet; - } - - for (auto &node : graph->GetDirectNode()) { - GE_CHECK_NOTNULL(node->GetOpDesc()); - std::string op_type_str = node->GetOpDesc()->GetType(); - if (remove_op_set.count(op_type_str)) { - if (IsExceptions(node)) { - continue; - } - for (auto &out_anchor : node->GetAllOutDataAnchors()) { - for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { - NodePtr dst_node = in_anchor->GetOwnerNode(); - GE_CHECK_NOTNULL(dst_node->GetOpDesc()); - int dst_index = in_anchor->GetIdx(); - std::vector list_bool; - GE_CHECK_NOTNULL(dst_node->GetOpDesc()); - list_bool = dst_node->GetOpDesc()->GetIsInputConst(); - GE_IF_BOOL_EXEC(list_bool.size() == 0, continue); - list_bool.erase(list_bool.begin() + dst_index); - dst_node->GetOpDesc()->SetIsInputConst(list_bool); - } - } - if (op_type_str == ASSERT) { - GE_CHK_STATUS_RET(CollectParentNode(graph, node, nodes_to_be_deleted), "remove node failed"); - } else { - GE_CHK_STATUS_RET(graph->RemoveNode(node), "remove node failed"); - } - } - } - for (auto &node : nodes_to_be_deleted) { - for (InDataAnchorPtr &inAnchor : node->GetAllInDataAnchors()) { - inAnchor->UnlinkAll(); - } - for (OutDataAnchorPtr &outAnchorPtr : node->GetAllOutDataAnchors()) { - outAnchorPtr->UnlinkAll(); - } - if (node->GetOutControlAnchor() != nullptr) { - node->GetOutControlAnchor()->UnlinkAll(); - } - GE_CHK_STATUS_RET(graph->RemoveNode(node), "remove node:%s failed", node->GetName().c_str()); - } - - return SUCCESS; -} - -Status UnusedOpRemovePass::CollectParentNode(const ComputeGraphPtr &graph, const NodePtr &node, - vector &node_vec) { - GE_CHECK_NOTNULL(graph); - GE_CHECK_NOTNULL(node); - node_vec.push_back(node); - std::queue node_queue; - - for (auto &src_node : node->GetInDataNodes()) { - if (src_node->GetOutDataNodesSize() == 1) { - node_queue.push(src_node); - } - } - - while (!node_queue.empty()) { - NodePtr temp = node_queue.front(); - node_queue.pop(); - - for (auto &src_node : temp->GetInDataNodes()) { - if (src_node->GetOutDataNodesSize() == 1) { - node_queue.push(src_node); - } - } - node_vec.push_back(temp); - } - - return SUCCESS; -} - -bool UnusedOpRemovePass::IsExceptions(const NodePtr &node) { - GE_CHK_BOOL_EXEC(node != nullptr, return false, "node is nullptr"); - auto op_def = node->GetOpDesc(); - GE_CHK_BOOL_EXEC(op_def != nullptr, return false, "opdesc is nullptr"); - // permute optimised in permute_pass.cpp - if (op_def->GetType() == PERMUTE) { - GE_IF_BOOL_EXEC( - (node->GetInDataNodes().size() != 0 && - (node->GetInDataNodes().at(0) != nullptr && node->GetInDataNodes().at(0)->GetOpDesc() != nullptr && - node->GetInDataNodes().at(0)->GetOpDesc()->GetType() == ATTENTIONDECODER)), - return false); - return true; - } - return false; -} -} // namespace ge diff --git a/ge/graph/passes/unused_op_remove_pass.h b/ge/graph/passes/unused_op_remove_pass.h deleted file mode 100755 index b9429cfd..00000000 --- a/ge/graph/passes/unused_op_remove_pass.h +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_UNUSED_OP_REMOVE_PASS_H_ -#define GE_GRAPH_PASSES_UNUSED_OP_REMOVE_PASS_H_ - -#include -#include -#include "framework/common/ge_types.h" -#include "inc/graph_pass.h" - -namespace ge { -class UnusedOpRemovePass : public GraphPass { - public: - explicit UnusedOpRemovePass(FrameworkType type) : fmktype_(type) {} - ~UnusedOpRemovePass() {} - Status Run(ge::ComputeGraphPtr graph) override; - bool IsExceptions(const ge::NodePtr &node); - - private: - Status CollectParentNode(const ge::ComputeGraphPtr &graph, const ge::NodePtr &node, - std::vector &node_vec); - std::vector v_remove_ops; - FrameworkType fmktype_; -}; -} // namespace ge - -#endif // GE_GRAPH_PASSES_UNUSED_OP_REMOVE_PASS_H_ diff --git a/ge/graph/passes/variable_format_pass.cc b/ge/graph/passes/variable_format_pass.cc deleted file mode 100644 index bd5300a5..00000000 --- a/ge/graph/passes/variable_format_pass.cc +++ /dev/null @@ -1,119 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/variable_format_pass.h" -#include -#include -#include -#include "framework/common/debug/ge_log.h" - -namespace ge { -Status VariableFormatPass::Run(ge::ComputeGraphPtr graph) { - GE_CHECK_NOTNULL(graph); - - for (auto &node : graph->GetDirectNode()) { - GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); - GE_IF_BOOL_EXEC(node->GetOpDesc()->GetType() != VARIABLE, continue); - - ge::NodePtr use_node = nullptr; - if (GetApplyMomentumOpByVariableInput(node, use_node)) { - GE_CHK_STATUS_RET(UpdateVariableOutFormat(node, use_node), "update variable out format failed"); - GE_CHK_STATUS_RET(UpdateApplyMomentumInputFormat(use_node), "update apply momentum input format failed"); - } - } - - return domi::SUCCESS; -} - -bool VariableFormatPass::GetApplyMomentumOpByVariableInput(const ge::NodePtr &var_node, ge::NodePtr &use_node) { - GE_IF_BOOL_EXEC(var_node == nullptr, return false); - - std::map> confirm_ops = {{"ApplyMomentum", {1}}}; - for (auto &out_anchor : var_node->GetAllOutDataAnchors()) { - for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { - GE_IF_BOOL_EXEC(ConfirmUseOpAndIndexByAnchor(in_anchor, confirm_ops, use_node), return true); - } - } - - return false; -} - -bool VariableFormatPass::ConfirmUseOpAndIndexByAnchor(const ge::InDataAnchorPtr &in_anchor, - const map> &confirm_ops, - ge::NodePtr &use_node) { - GE_IF_BOOL_EXEC(in_anchor == nullptr, return false); - ge::NodePtr dst_node = in_anchor->GetOwnerNode(); - ge::OpDescPtr dst_op_desc = dst_node->GetOpDesc(); - GE_IF_BOOL_EXEC(dst_op_desc == nullptr, return false); - const string &dst_type = dst_op_desc->GetType(); - int input_index = in_anchor->GetIdx(); - - GELOGD("ConfirmUseOpAndIndex, var name %s, dst_type = %s, input index %d", dst_node->GetName().c_str(), - dst_type.c_str(), input_index); - - GE_IF_BOOL_EXEC(confirm_ops.count(dst_type) > 0, - GE_IF_BOOL_EXEC(confirm_ops.at(dst_type).count(input_index) > 0, use_node = dst_node; return true);); - return false; -} - -Status VariableFormatPass::UpdateVariableOutFormat(const ge::NodePtr &var_node, ge::NodePtr &use_node) { - GE_CHECK_NOTNULL(var_node); - GE_CHECK_NOTNULL(use_node); - ge::OpDescPtr op_desc_ptr = use_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc_ptr); - GE_CHECK_NOTNULL(use_node->GetInDataAnchor(0)); - GE_CHECK_NOTNULL(use_node->GetInDataAnchor(0)->GetPeerOutAnchor()); - NodePtr in_node = use_node->GetInDataAnchor(0)->GetPeerOutAnchor()->GetOwnerNode(); - if (in_node != nullptr) { - string in_op_type = in_node->GetType(); - if ((in_op_type == VARIABLE) && (in_node->GetOpDesc() != nullptr) && - (in_node->GetOpDesc()->MutableOutputDesc(0) != nullptr)) { - ge::Format format = in_node->GetOpDesc()->MutableOutputDesc(0)->GetFormat(); - ge::OpDescPtr cur_op_desc_ptr = var_node->GetOpDesc(); - if (cur_op_desc_ptr != nullptr) { - cur_op_desc_ptr->MutableOutputDesc(0)->SetFormat(format); - cur_op_desc_ptr->MutableOutputDesc(0)->SetOriginFormat(format); - } - } - } - return domi::SUCCESS; -} - -Status VariableFormatPass::UpdateApplyMomentumInputFormat(const ge::NodePtr &node) { - GE_CHECK_NOTNULL(node); - ge::OpDescPtr op_desc_ptr = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc_ptr); - GE_CHECK_NOTNULL(node->GetInDataAnchor(0)); - GE_CHECK_NOTNULL(node->GetInDataAnchor(0)->GetPeerOutAnchor()); - GE_CHECK_NOTNULL(op_desc_ptr->MutableInputDesc(0)); - GE_CHECK_NOTNULL(op_desc_ptr->MutableInputDesc(1)); - GE_CHECK_NOTNULL(op_desc_ptr->MutableOutputDesc(0)); - NodePtr in_node = node->GetInDataAnchor(0)->GetPeerOutAnchor()->GetOwnerNode(); - if (in_node != nullptr) { - string in_op_type = in_node->GetType(); - if ((in_op_type == VARIABLE) && (in_node->GetOpDesc() != nullptr)) { - ge::Format format = in_node->GetOpDesc()->MutableOutputDesc(0)->GetFormat(); - op_desc_ptr->MutableInputDesc(0)->SetFormat(format); - op_desc_ptr->MutableInputDesc(0)->SetOriginFormat(format); - op_desc_ptr->MutableInputDesc(1)->SetFormat(format); - op_desc_ptr->MutableInputDesc(1)->SetOriginFormat(format); - op_desc_ptr->MutableOutputDesc(0)->SetFormat(format); - op_desc_ptr->MutableOutputDesc(0)->SetOriginFormat(format); - } - } - return domi::SUCCESS; -} -} // namespace ge diff --git a/ge/graph/passes/variable_format_pass.h b/ge/graph/passes/variable_format_pass.h deleted file mode 100755 index e2c32903..00000000 --- a/ge/graph/passes/variable_format_pass.h +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_VARIABLE_FORMAT_PASS_H_ -#define GE_GRAPH_PASSES_VARIABLE_FORMAT_PASS_H_ - -#include -#include -#include -#include "graph/types.h" -#include "graph/utils/op_desc_utils.h" -#include "inc/graph_pass.h" - -namespace ge { -class VariableFormatPass : public GraphPass { - public: - Status Run(ge::ComputeGraphPtr graph) override; - - private: - bool GetApplyMomentumOpByVariableInput(const ge::NodePtr &var_node, ge::NodePtr &use_node); - - bool ConfirmUseOpAndIndexByAnchor(const ge::InDataAnchorPtr &in_anchor, - const map > &confirm_ops, ge::NodePtr &use_node); - - Status UpdateApplyMomentumInputFormat(const ge::NodePtr &node); - - Status UpdateVariableOutFormat(const ge::NodePtr &var_node, ge::NodePtr &use_node); -}; -} // namespace ge - -#endif // GE_GRAPH_PASSES_VARIABLE_FORMAT_PASS_H_ diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 4e9046e4..8597cc61 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -74,6 +74,7 @@ #include "graph/passes/unused_const_pass.h" #include "graph/passes/var_is_initialized_op_pass.h" #include "graph/passes/variable_prepare_op_pass.h" +#include "graph/passes/mark_force_unknown_for_cond_pass.h" #include "graph/preprocess/insert_op/util_insert_aipp_op.h" #include "graph/utils/type_utils.h" #include "inc/pass_manager.h" @@ -1675,6 +1676,7 @@ Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std:: PP_RUN_AND_DUMP("InsertAipp", TryDoAipp); PP_RUN_AND_DUMP("ProcessBeforeInfershape", ProcessBeforeInfershape); PP_RUN_AND_DUMP("InferFormatAndShape", FormatAndShapeProcess); + PP_RUN_AND_DUMP("CtrlFlowPreProcess", CtrlFlowPreProcess); PP_RUN_AND_DUMP("GetDynamicOutputShape", multibatch::GetDynamicOutputShape, compute_graph_); PP_RUN_AND_DUMP("ProcessAippStage2", InsertNewOpUtil::Instance().UpdateDataNodeByAipp, compute_graph_); PP_RUN("SaveOriginalGraphToOmModel", SaveOriginalGraphToOmModel); @@ -1683,6 +1685,17 @@ Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std:: return SUCCESS; } +Status GraphPrepare::CtrlFlowPreProcess() { + PassManager graph_pass; + + // After InferShape Mark v1 control flow for unknown shape. + auto mark_force_unknown_pass = new (std::nothrow) MarkForceUnknownForCondPass; + GE_CHK_STATUS_RET(graph_pass.AddPass("PreRun::MarkForceUnknownForCondPass", mark_force_unknown_pass)); + + GE_CHK_STATUS_RET(graph_pass.Run(compute_graph_)); + return SUCCESS; +} + Status GraphPrepare::RecordAIPPInfo(ge::ComputeGraphPtr &compute_graph) { PP_RUN("RecordAIPPInfo", InsertNewOpUtil::Instance().RecordAIPPInfoToData, compute_graph_); return SUCCESS; diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index 9dc3e679..3eb5e03a 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -79,6 +79,7 @@ class GraphPrepare { Status ProcessNetOutput(); Status ProcessBeforeInfershape(); Status UpdateInputOutputByOptions(); + Status CtrlFlowPreProcess(); bool IsTansDataOpData(const ge::NodePtr &var_node); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index b82d1034..6cb57e6b 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -335,9 +335,9 @@ Status DeleteIdentityInsertByAdapter(ComputeGraphPtr &graph) { GE_IF_BOOL_EXEC(peer_in_anchor == nullptr, continue); auto dst_node = peer_in_anchor->GetOwnerNode(); GE_IF_BOOL_EXEC(dst_node == nullptr, continue); - if (dst_node->GetType() == IDENTITY) { + if (dst_node->GetType() == IDENTITY && dst_node->GetAllOutDataAnchors().empty()) { GELOGI("Need to remove %s.", dst_node->GetName().c_str()); - if (ge::GraphUtils::RemoveNodeWithoutRelink(graph, dst_node) != GRAPH_SUCCESS) { + if (GraphUtils::RemoveNodeWithoutRelink(graph, dst_node) != GRAPH_SUCCESS) { REPORT_CALL_ERROR("E19999", "Remove node:%s(%s) from graph:%s failed", dst_node->GetName().c_str(), dst_node->GetType().c_str(), graph->GetName().c_str()); GELOGE(FAILED, "Remove Identity node %s failed.", dst_node->GetName().c_str()); diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index 5a04c461..b66038d9 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -17,10 +17,7 @@ #include "npu_memory_allocator.h" #include #include "framework/common/debug/log.h" -#include "graph/manager/graph_caching_allocator.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/rdma_pool_allocator.h" -#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { namespace hybrid { diff --git a/ge/hybrid/common/tensor_value.h b/ge/hybrid/common/tensor_value.h index 19e1ba27..348e4e6d 100644 --- a/ge/hybrid/common/tensor_value.h +++ b/ge/hybrid/common/tensor_value.h @@ -40,6 +40,12 @@ class TensorBuffer { TensorBuffer &operator = (const TensorBuffer &) = delete; ~TensorBuffer(); + void* Release() { + auto ret = buffer_; + buffer_ = nullptr; + return ret; + } + void *GetData() { return buffer_; } @@ -48,6 +54,10 @@ class TensorBuffer { return size_; } + MemStorageType GetMemType() const { + return mem_type_; + } + private: TensorBuffer(NpuMemoryAllocator *allocator, void *buffer, size_t size, MemStorageType mem_type = HBM); @@ -69,6 +79,10 @@ class TensorValue { void Destroy(); + void *Release() { + return buffer_->Release(); + } + bool IsEmpty() { return ref_buffer_ == nullptr && buffer_ == nullptr; } @@ -80,6 +94,10 @@ class TensorValue { void SetName(const std::string &name) { name_ = name; } + + MemStorageType GetMemType() const { + return buffer_->GetMemType(); + } void *MutableData(); diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h index f2628409..489d6d99 100644 --- a/ge/hybrid/executor/hybrid_execution_context.h +++ b/ge/hybrid/executor/hybrid_execution_context.h @@ -62,6 +62,7 @@ struct GraphExecutionContext { const HybridModel *model = nullptr; const GEThreadLocalContext *ge_context = nullptr; rtStream_t stream = nullptr; + rtStream_t hccl_stream = nullptr; rtContext_t rt_context = nullptr; rtContext_t rt_gen_context = nullptr; std::unique_ptr callback_manager = nullptr; diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index af06e27b..930412e3 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -19,6 +19,13 @@ #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "graph/ge_context.h" +#include "graph/types.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/rdma_pool_allocator.h" +#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { namespace hybrid { @@ -440,22 +447,31 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a GeShape ge_shape(tensor_desc->GetShape().GetDims()); GeTensorDesc ge_tensor_desc; ge_tensor_desc.SetShape(ge_shape); - GeTensor ge_tensor(ge_tensor_desc); if (output_size > 0) { - auto aligned_ptr = MakeShared(output_size, kAlignment); - GE_CHECK_NOTNULL(aligned_ptr); - auto data_buf = aligned_ptr->MutableGet(); - GE_CHECK_NOTNULL(data_buf); - GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); - ge_tensor.SetData(aligned_ptr, output_size); - output_data->blobs.emplace_back(data_buf, static_cast(output_size), false); + if (execute_mode != kLazyRecompile) { + auto aligned_ptr = MakeShared(output_size, kAlignment); + GE_CHECK_NOTNULL(aligned_ptr); + auto data_buf = aligned_ptr->MutableGet(); + GE_CHECK_NOTNULL(data_buf); + GE_CHK_RT_RET(rtMemcpy(data_buf, output_size, output_tensor.GetData(), output_size, RT_MEMCPY_DEVICE_TO_HOST)); + GeTensor ge_tensor(ge_tensor_desc); + ge_tensor.SetData(aligned_ptr, output_size); + output_data->blobs.emplace_back(data_buf, static_cast(output_size), false); + auto tensor = TensorAdapter::AsTensor(ge_tensor); + outputs.emplace_back(std::move(tensor)); + } else { + BuildDeviceTensor(output_tensor, ge_tensor_desc, output_size, outputs); + output_data->blobs.emplace_back(output_tensor.Release(), static_cast(output_size), false, + static_cast(kPlacementDevice)); + } } else { - GELOGW("Output[%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); + GELOGW("Output [%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str()); + GeTensor ge_tensor(ge_tensor_desc); ge_tensor.SetData(nullptr, 0U); output_data->blobs.emplace_back(nullptr, 0U, false); + auto tensor = TensorAdapter::AsTensor(ge_tensor); + outputs.emplace_back(std::move(tensor)); } - auto tensor = TensorAdapter::AsTensor(ge_tensor); - outputs.emplace_back(std::move(tensor)); GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld", i, TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(), tensor_desc->GetShape().ToString().c_str(), output_size); @@ -464,6 +480,29 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a return SUCCESS; } +void HybridModelAsyncExecutor::BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, + int64_t output_size, std::vector &outputs) { + GELOGD("Start to build device tensor"); + auto mem_type = output_tensor.GetMemType(); + GELOGD("Mem type is %d", static_cast(mem_type)); + auto deleter = [=](uint8_t *device_data) { + if (device_data != nullptr) { + if (mem_type == RDMA_HBM) { + MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(device_data, device_id_); + } else if (mem_type == HOST_DDR) { + MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(device_data); + } else { + MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(device_data, device_id_); + } + } + }; + ge_tensor_desc.SetPlacement(kPlacementDevice); + GeTensor ge_tensor(ge_tensor_desc); + auto tensor = TensorAdapter::AsTensor(ge_tensor); + tensor.SetData(reinterpret_cast(output_tensor.Release()), static_cast(output_size), deleter); + outputs.emplace_back(std::move(tensor)); +} + Status HybridModelAsyncExecutor::Execute(const std::vector &inputs, const std::vector &input_desc, std::vector &outputs, diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index c6d99c7c..5ae1a222 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -75,9 +75,9 @@ class HybridModelAsyncExecutor { HybridModelExecutor::ExecuteArgs &args, OutputData *output_data); - Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, - OutputData *output_data, - std::vector &outputs); + Status CopyOutputs(HybridModelExecutor::ExecuteArgs &args, OutputData *output_data, std::vector &outputs); + void BuildDeviceTensor(TensorValue &output_tensor, GeTensorDesc &ge_tensor_desc, int64_t output_size, + std::vector &outputs); Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector &outputs); diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index f8635a97..d4d97840 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -50,7 +50,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { auto root_graph_item = model_->GetRootGraphItem(); GE_CHECK_NOTNULL(root_graph_item); - if (root_graph_item->IsDynamic()) { + if (root_graph_item->IsDynamic() && !model_->IsSingleOp()) { GE_CHK_STATUS_RET(CheckInputShapeByShapeRange(root_graph_item, args), "[%s] check input node shape by shape range failed.", root_graph_item->GetName().c_str()); diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc index ba24d78d..c0bd5c7d 100644 --- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc +++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc @@ -18,14 +18,26 @@ const char *const kEnvProfilingLevel = "HYBRID_PROFILING_LEVEL"; StageExecutor::StageExecutor(int id, HybridModel *model, PipeExecutionConfig *config) : id_(id), model_(model), pipe_config_(config) {} -StageExecutor::~StageExecutor() { GELOGD("~StageExecutor(), id = %d", id_); } +StageExecutor::~StageExecutor() { + GELOGD("~StageExecutor(), id = %d", id_); + if (stream_ != nullptr) { + GE_CHK_RT(rtStreamDestroy(stream_)); + stream_ = nullptr; + } + if (hccl_stream_ != nullptr) { + GE_CHK_RT(rtStreamDestroy(hccl_stream_)); + hccl_stream_ = nullptr; + } +} Status StageExecutor::Init() { GELOGD("[Executor: %d] Start to init StateExecutor", id_); context_.rt_context = pipe_config_->rt_context; GE_CHK_STATUS_RET_NOLOG(InitExecutionContext()); GE_CHK_RT_RET(rtStreamCreate(&stream_, RT_STREAM_PRIORITY_DEFAULT)); + GE_CHK_RT_RET(rtStreamCreate(&hccl_stream_, RT_STREAM_PRIORITY_DEFAULT)); context_.stream = stream_; + context_.hccl_stream = hccl_stream_; root_graph_executor_.reset(new (std::nothrow) SubgraphExecutor(model_->GetRootGraphItem(), &context_)); GE_CHECK_NOTNULL(root_graph_executor_); @@ -78,11 +90,11 @@ Status StageExecutor::Start(const std::vector &inputs, const std::v if (task_info.event != nullptr) { GELOGD("[%d] Add StreamWaitEvent", id_); GE_CHK_RT_RET(rtStreamWaitEvent(stream_, task_info.event)); - RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] [Stage = %d] End", task_info.iteration - 1, + RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] [Stage = %d] EventWait End", task_info.iteration, task_info.stage); } - RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %lld] [Stage = %d] Start", task_info.iteration, + RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] [Stage = %d] Start", task_info.iteration, task_info.stage); if (task_info.stage == 0) { @@ -102,6 +114,10 @@ Status StageExecutor::Start(const std::vector &inputs, const std::v StageTask next_task; next_task.stage = task_info.stage; next_task.iteration = task_info.iteration + 1; + if ((task_info.iteration + 1) % iteration_count > 0) { + GE_CHK_RT_RET(rtEventCreate(&next_task.event)); + GE_CHK_RT_RET(rtEventRecord(next_task.event, context_.hccl_stream)); + } auto sync_result = Synchronize(); if (sync_result != SUCCESS) { @@ -110,15 +126,22 @@ Status StageExecutor::Start(const std::vector &inputs, const std::v id_, sync_result, task_info.iteration); REPORT_CALL_ERROR("E19999", "[Executor: %d] Failed to sync result:%d. iteration = %ld", id_, sync_result, task_info.iteration); - context_.profiler->Dump(std::cout); + if (context_.profiler != nullptr) { + context_.profiler->Dump(std::cout); + } context_.callback_manager->Destroy(); RuntimeInferenceContext::DestroyContext(std::to_string(context_.context_id)); return sync_result; } + if (task_info.event != nullptr) { + GE_CHK_RT_RET(rtEventDestroy(task_info.event)); + RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] [Stage = %d] EventDestroy End", task_info.iteration, + task_info.stage); + } RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] [Stage = %d] End", task_info.iteration, task_info.stage); - // if not end stage + // if end stage if (task_info.stage >= pipe_config_->num_stages - 1) { RECORD_MODEL_EXECUTION_EVENT(&context_, "[iteration = %ld] Schedule End", task_info.iteration); GELOGD("[Executor: %d] End of iteration [%ld]", id_, task_info.iteration); @@ -163,6 +186,7 @@ Status StageExecutor::InitExecutionContext() { context_.callback_manager = std::unique_ptr(new (std::nothrow) CallbackManager()); GE_CHECK_NOTNULL(context_.callback_manager); context_.dump_properties = DumpManager::GetInstance().GetDumpProperties(context_.session_id); + context_.is_eos_ = false; if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) { context_.trace_enabled = true; } diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.h b/ge/hybrid/executor/hybrid_model_pipeline_executor.h index cb08d872..c59e1462 100644 --- a/ge/hybrid/executor/hybrid_model_pipeline_executor.h +++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.h @@ -63,6 +63,7 @@ class StageExecutor { StageExecutor *next_executor_ = nullptr; rtStream_t stream_ = nullptr; + rtStream_t hccl_stream_ = nullptr; }; class HybridModelPipelineExecutor { diff --git a/ge/hybrid/executor/node_done_manager.cc b/ge/hybrid/executor/node_done_manager.cc index d31765c2..0ea04661 100644 --- a/ge/hybrid/executor/node_done_manager.cc +++ b/ge/hybrid/executor/node_done_manager.cc @@ -121,5 +121,10 @@ void NodeDoneManager::Reset(const NodePtr &node) { GELOGD("[%s] Node reset.", node->GetName().c_str()); } } + +void NodeDoneManager::Reset() { + subjects_.clear(); + destroyed_ = false; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/node_done_manager.h b/ge/hybrid/executor/node_done_manager.h index 292d1369..bedbff3d 100644 --- a/ge/hybrid/executor/node_done_manager.h +++ b/ge/hybrid/executor/node_done_manager.h @@ -35,6 +35,8 @@ class NodeDoneManager { void Destroy(); + void Reset(); + private: class Cond { public: diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index aaa7801f..9ec5431a 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -104,11 +104,47 @@ void ShapeInferenceState::UpdateInputShapeFuture(int idx, ShapeFuture &&future) } } +Status ShapeInferenceState::UpdateInputForMerge(const GraphExecutionContext &context) { + int merge_index = -1; + const auto &guard = node_item.MutexGuard("UpdateInputForMerge"); + if (!AttrUtils::GetInt(node_item.op_desc, ATTR_NAME_MERGE_INPUT_INDEX, merge_index)) { + GELOGE(FAILED, "[%s] Get attr %s failed", node_item.NodeName().c_str(), ATTR_NAME_MERGE_INPUT_INDEX.c_str()); + return FAILED; + } + + if (merge_index < 0 || static_cast(merge_index) >= input_tensor_desc.size()) { + GELOGE(FAILED, "[%s] merge index: %d invalid, should in range[0, %zu)", + node_item.NodeName().c_str(), merge_index, input_tensor_desc.size()); + return FAILED; + } + + auto dst_tensor_desc = node_item.MutableInputDesc(merge_index); + GE_CHECK_NOTNULL(dst_tensor_desc); + + int64_t tensor_size = -1; + auto &tensor_desc = input_tensor_desc[merge_index]; + (void)TensorUtils::GetSize(tensor_desc, tensor_size); + + dst_tensor_desc->SetShape(tensor_desc.MutableShape()); + dst_tensor_desc->SetOriginShape(tensor_desc.GetOriginShape()); + (void)TensorUtils::SetSize(*dst_tensor_desc, tensor_size); + (void)guard; + GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], tensor size = %ld", + node_item.NodeName().c_str(), merge_index, dst_tensor_desc->GetShape().ToString().c_str(), + dst_tensor_desc->GetOriginShape().ToString().c_str(), tensor_size); + + return SUCCESS; +} + Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &context) { if (!node_item.is_dynamic) { return SUCCESS; } std::unique_lock lk(mu_); + if (node_item.IsMergeOp()) { + return UpdateInputForMerge(context); + } + if (num_pending_shapes_ > 0) { GELOGD("[%s] Await pending shape or shape future start.", node_item.NodeName().c_str()); int try_count = 0; @@ -169,7 +205,7 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex int64_t tensor_size = -1; (void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); - GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu", + GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], tensor size = %ld", node_item.NodeName().c_str(), idx, src_tensor_desc->GetShape().ToString().c_str(), @@ -283,11 +319,8 @@ void NodeState::ResetContext(int group) { } switch_index_ = -1; - const auto &guard = node_item_->MutexGuard("ResetContext"); - shape_inference_state_.InitShapeState(); subgraph_context_->ResetContext(node_item_->node); GELOGD("Node[%s] in while loop, current loop: %lu, merge index: %d", GetName().c_str(), loop_count_, merge_index_); - (void)guard; } void NodeState::ResetSchedule() { diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 49861611..d3f176ce 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -67,6 +67,8 @@ struct ShapeInferenceState { const NodeItem &node_item; private: + Status UpdateInputForMerge(const GraphExecutionContext &context); + friend struct NodeState; std::vector> shape_futures; // do not directly update op_desc, in case race condition across pipelines diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index 9a9a97c2..5de0828f 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -15,8 +15,6 @@ */ #include "subgraph_context.h" - -#include "common/debug/log.h" #include "hybrid/executor/hybrid_model_executor.h" namespace ge { @@ -25,6 +23,13 @@ SubgraphContext::SubgraphContext(const GraphItem *graph_item, const GraphExecuti : graph_item_(graph_item), execution_context_(execution_context) { } +SubgraphContext::~SubgraphContext() { + if (mmRWLockDestroy(&rw_lock_) != EN_OK) { + REPORT_CALL_ERROR("E19999", "Destroy rw_lock failed"); + GELOGE(INTERNAL_ERROR, "[RWLock][Destroy] Destroy rw_lock failed"); + } +} + Status SubgraphContext::Init() { GE_CHECK_NOTNULL(graph_item_); GELOGD("[%s] Start to init subgraph context. total inputs = %d, total outputs = %d", @@ -33,7 +38,11 @@ Status SubgraphContext::Init() { graph_item_->TotalOutputs()); all_inputs_.resize(static_cast(graph_item_->TotalInputs())); all_outputs_.resize(static_cast(graph_item_->TotalOutputs())); - + if (mmRWLockInit(&rw_lock_) != EN_OK) { + REPORT_CALL_ERROR("E19999", "Init rw_lock failed"); + GELOGE(INTERNAL_ERROR, "[RWLock][Init] Init rw_lock failed"); + return INTERNAL_ERROR; + } return SUCCESS; } @@ -42,13 +51,48 @@ void SubgraphContext::ResetContext(const NodePtr &node) { } NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) { - std::lock_guard lk(mu_); + GELOGD("[%s] lock for read", node_item->NodeName().c_str()); + if (mmRWLockRDLock(&rw_lock_) != EN_OK) { + REPORT_CALL_ERROR("E19999", "[Node:%s] Lock for read failed", node_item->NodeName().c_str()); + GELOGE(INTERNAL_ERROR, "[RWLock][Lock][Node:%s] Lock for read failed", node_item->NodeName().c_str()); + return nullptr; + } + const auto &iter = node_states_.find(node_item); + if (iter != node_states_.end()) { + auto state = iter->second; + GELOGD("[%s] unlock for read", node_item->NodeName().c_str()); + if (mmRDLockUnLock(&rw_lock_) != EN_OK) { + REPORT_CALL_ERROR("E19999", "[Node:%s] Unlock for read failed", node_item->NodeName().c_str()); + GELOGE(INTERNAL_ERROR, "[RWLock][Unlock][Node:%s] Unlock for read failed", node_item->NodeName().c_str()); + return nullptr; + } + return state; + } + GELOGD("[%s] unlock for read", node_item->NodeName().c_str()); + if (mmRDLockUnLock(&rw_lock_) != EN_OK) { + REPORT_CALL_ERROR("E19999", "[Node:%s] Unlock for read failed", node_item->NodeName().c_str()); + GELOGE(INTERNAL_ERROR, "[RWLock][Unlock][Node:%s] Unlock for read failed", node_item->NodeName().c_str()); + return nullptr; + } + + GELOGD("[%s] lock for write", node_item->NodeName().c_str()); + if (mmRWLockWRLock(&rw_lock_) != EN_OK) { + REPORT_CALL_ERROR("E19999", "[Node:%s] Lock for write failed", node_item->NodeName().c_str()); + GELOGE(INTERNAL_ERROR, "[RWLock][Lock][Node:%s] Lock for write failed", node_item->NodeName().c_str()); + return nullptr; + } auto &node_state = node_states_[node_item]; if (node_state == nullptr) { const auto &guard = node_item->MutexGuard("GetOrCreateNodeState"); - node_state.reset(new(std::nothrow)NodeState(*node_item, this)); + node_state = std::move(std::unique_ptr(new(std::nothrow)NodeState(*node_item, this))); (void)guard; } + GELOGD("[%s] unlock for write", node_item->NodeName().c_str()); + if (mmWRLockUnLock(&rw_lock_) != EN_OK) { + REPORT_CALL_ERROR("E19999", "[Node:%s] Unlock for write failed", node_item->NodeName().c_str()); + GELOGE(INTERNAL_ERROR, "[RWLock][Unlock][Node:%s] Unlock for write failed", node_item->NodeName().c_str()); + return nullptr; + } return node_state; } @@ -144,5 +188,13 @@ void SubgraphContext::OnError(Status error) { void SubgraphContext::NodeDone(const NodePtr &node) { node_done_manager_.NodeDone(node); } + +void SubgraphContext::Reset() { + node_done_manager_.Reset(); + if (mmRWLockWRLock(&rw_lock_) == EN_OK) { + node_states_.clear(); + (void)mmWRLockUnLock(&rw_lock_); + } +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index ff692ed9..7a99e324 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -18,7 +18,7 @@ #define GE_HYBRID_EXECUTOR_ITERATION_CONTEXT_H_ #include - +#include "mmpa/mmpa_api.h" #include "hybrid/common/tensor_value.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/node_state.h" @@ -31,10 +31,11 @@ namespace hybrid { class SubgraphContext { public: explicit SubgraphContext(const GraphItem *graph_item, const GraphExecutionContext *execution_context); - ~SubgraphContext() = default; + ~SubgraphContext(); Status Init(); void ResetContext(const NodePtr &node); + void Reset(); NodeStatePtr GetOrCreateNodeState(const NodeItem *node_item); void OnError(Status error); @@ -52,7 +53,7 @@ class SubgraphContext { friend class TaskContext; const GraphItem *graph_item_; const GraphExecutionContext *execution_context_; - std::mutex mu_; + mmRWLock_t rw_lock_; std::vector all_inputs_; std::vector all_outputs_; NodeDoneManager node_done_manager_; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 60895c7e..3536f295 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -704,7 +704,21 @@ Status SubgraphExecutor::PartialExecuteAsync(int task_group) { Status SubgraphExecutor::InitForPartialExecution(const vector &inputs, const vector &input_desc) { - return Init(inputs, input_desc); + if (subgraph_context_ == nullptr) { + return Init(inputs, input_desc); + } + subgraph_context_->Reset(); + if (graph_item_->IsDynamic()) { + GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc), + "[%s] Failed to set inputs.", + graph_item_->GetName().c_str()); + } else { + GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs), + "[Invoke][InitInputsForKnownShape][%s] Failed to init subgraph executor for known shape subgraph", + graph_item_->GetName().c_str()); + } + + return SUCCESS; } } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 678e5c63..32758f61 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -300,7 +300,7 @@ Status NodeDoneCallback::OnNodeDone() { GE_CHK_STATUS_RET(SaveDumpOpInfo(), "[Save][DumpOpInfo] Failed to dump op info."); } - if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + if (ProfilingManager::Instance().ProfilingModelLoadOn()) { GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.", node_item.NodeName().c_str()); } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index e108dddf..91188326 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -26,8 +26,7 @@ #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" @@ -260,6 +259,10 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n return SUCCESS; } + if (node->GetType() == MEMCPYASYNC) { // Convert MemcpyAsync to Identity. + node->GetOpDesc()->SetType(IDENTITY); + } + std::unique_ptr new_node; GE_CHK_STATUS_RET(NodeItem::Create(node, new_node), "[Invoke][Create] failed, model_name_:[%s]", GetGraphName()); GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance().GetExecutor(*node, &new_node->node_executor)); @@ -1002,14 +1005,18 @@ Status HybridModelBuilder::InitConstantOps() { // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned GeTensor aligned_tensor = ge_tensor->Clone(); GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize()); - if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(), - aligned_tensor.GetData().size()) == nullptr) { - GELOGE(MEMALLOC_FAILED, "[Malloc][HostMemory] for an existed GeTensor failed, model_name_:%s.", - GetGraphName()); - return MEMALLOC_FAILED; + if (aligned_tensor.GetData().size() > 0) { + if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(), + aligned_tensor.GetData().size()) == nullptr) { + GELOGE(MEMALLOC_FAILED, "[Malloc][HostMemory] for an existed GeTensor failed, model_name_:%s.", + GetGraphName()); + return MEMALLOC_FAILED; + } + var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), + aligned_tensor.GetData().size())); + } else { + var_tensor.reset(new(std::nothrow)TensorValue(nullptr, 0)); } - var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), - aligned_tensor.GetData().size())); } else { GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index 29ae831c..7ebb9e39 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -41,9 +41,6 @@ AiCoreNodeTask::AiCoreNodeTask(std::vector> &&task Status AiCoreNodeExecutor::Initialize() { compiler_ = TaskCompilerFactory::GetInstance().GetTaskCompiler(); - if (compiler_ != nullptr) { - GE_CHK_STATUS_RET(compiler_->Initialize(), "[Init][TaskCompiler] failed."); - } return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 61942d51..40118af3 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -23,6 +23,7 @@ #include "graph/load/model_manager/tbe_handle_store.h" #include "graph/types.h" #include "single_op/task/build_task_utils.h" +#include "single_op/task/tbe_task_builder.h" using optiling::OpRunInfo; @@ -131,8 +132,8 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name), GELOGI("Get original type of kernel_name")); GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str()); - GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), - stub_name_.c_str(), kernel_name.c_str(), 0)); + auto stub_func = KernelBinRegistry::GetInstance().GetUnique(stub_name_); + GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_func, stub_name_.c_str(), kernel_name.c_str(), 0)); } return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc index 742b3ca2..0cdea5d5 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.cc @@ -31,6 +31,11 @@ REGISTER_TASK_COMPILER(AiCoreTaskCompiler); std::mutex AiCoreTaskCompiler::mu_; Status AiCoreTaskCompiler::Initialize() { + std::lock_guard lk(mu_); + if (is_initialized_) { + return SUCCESS; + } + auto ge_lib = GELib::GetInstance(); GE_CHECK_NOTNULL(ge_lib); if (!ge_lib->InitFlag()) { @@ -41,6 +46,7 @@ Status AiCoreTaskCompiler::Initialize() { auto &kernel_manager = ge_lib->OpsKernelManagerObj(); aic_kernel_store_ = kernel_manager.GetOpsKernelInfoStore("AIcoreEngine"); GE_CHECK_NOTNULL(aic_kernel_store_); + is_initialized_ = true; return SUCCESS; } @@ -57,6 +63,13 @@ Status AiCoreTaskCompiler::DoCompileOp(const NodePtr &node) const { } Status AiCoreTaskCompiler::CompileOp(const NodePtr &node, std::vector &tasks) { + Status ret = Initialize(); + if (ret != SUCCESS) { + GELOGE(FAILED, "[Check][State][%s] Offline inference not support online compile.", node->GetName().c_str()); + REPORT_INNER_ERROR("E19999", "[%s] Offline inference not support online compile.", node->GetName().c_str()); + return ret; + } + GE_CHECK_NOTNULL(node); GELOGI("AiCoreTaskCompiler(%s) CompileOp Start.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h index b6dfd82b..4cb4dc58 100755 --- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h +++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h @@ -34,6 +34,7 @@ class AiCoreTaskCompiler : public TaskCompiler { Status DoCompileOp(const NodePtr &node) const; Status DoGenerateTask(const Node &node, std::vector &tasks); OpsKernelInfoStorePtr aic_kernel_store_; + bool is_initialized_ = false; static std::mutex mu_; }; } // namespace hybrid diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 20684194..c46d5080 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -314,21 +314,26 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do return SUCCESS; } + rtEvent_t evt = nullptr; + if (context.GetExecutionContext()->hccl_stream != nullptr) { + GE_CHK_RT_RET(rtEventCreateWithFlag(&evt, 0x01)); + GE_CHK_RT_RET(rtStreamWaitEvent(context.GetExecutionContext()->hccl_stream, evt)); + } TaskContext *p_ctx = &context; - auto callback = [p_ctx, done_callback](HcclResult status) { + auto callback = [p_ctx, done_callback, evt](HcclResult status) { if (status != HCCL_SUCCESS) { GELOGE(HCCL_E_INTERNAL, "[Call][HcomExcutorInitialize] failed for node:%s(%s), ret: 0x%X", p_ctx->GetNodeName(), p_ctx->GetNodeItem().NodeType().c_str(), status); p_ctx->SetStatus(FAILED); } done_callback(); + if (evt != nullptr) { + GE_CHK_RT_RET(rtEventRecord(evt, nullptr)); + GE_CHK_RT_RET(rtEventDestroy(evt)); + } GELOGI("rdma callback success."); }; - std::string executor_type = context.GetNodeItem().NodeType(); - if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { - executor_type = context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD ? HCOMREMOTEREAD : HCOMREMOTEWRITE; - } HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); if (hccl_ret != HCCL_SUCCESS) { GELOGE(HCCL_E_INTERNAL, "[Call][HcomExecEnqueueRemoteAccess] failed for node:%s(%s), ret: 0x%X", diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc index 6e8a1eb9..d35989a1 100755 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -18,8 +18,7 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" #include "graph/passes/folding_pass.h" #include "hybrid/model/hybrid_model.h" -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/host_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "ge_local_engine/engine/host_cpu_engine.h" namespace ge { diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 0468930a..59250d8c 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -563,7 +563,7 @@ const DumpProperties &TaskContext::GetDumpProperties() const { bool TaskContext::NeedCallback() { return node_item_->has_observer || IsDumpEnabled() || GraphExecutionContext::profiling_level > 0 || - !execution_context_->model->IsSingleOp(); + !execution_context_->model->IsSingleOp() || ProfilingManager::Instance().ProfilingModelLoadOn(); } Status TaskContext::Synchronize() { @@ -572,7 +572,7 @@ Status TaskContext::Synchronize() { Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type, uint32_t block_dim) { - if (ProfilingManager::Instance().ProfilingModelExecuteOn()) { + if (ProfilingManager::Instance().ProfilingModelLoadOn()) { const NodeItem &node_item = GetNodeItem(); auto op_desc = node_item.GetOpDesc(); GE_CHECK_NOTNULL(op_desc); diff --git a/ge/init/gelib.cc b/ge/init/gelib.cc index 96ed1b9c..2374e75f 100644 --- a/ge/init/gelib.cc +++ b/ge/init/gelib.cc @@ -39,7 +39,7 @@ #include "graph/ge_context.h" #include "graph/ge_global_options.h" #include "graph/load/model_manager/model_manager.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "runtime/kernel.h" diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index c9dfac07..bd6a2d3a 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -251,17 +251,24 @@ class Impl { omg_context_.dynamic_batch_size.clear(); omg_context_.dynamic_image_size.clear(); omg_context_.dynamic_dims.clear(); + omg_context_.user_attr_index_valid = false; }; ~Impl() { (void)generator_.Finalize(); }; graphStatus CheckOptions(const std::map &options); graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector &inputs); graphStatus UpdateDataOpAttr(const Graph &graph); + graphStatus CheckDataOpAttrIndexValid(const Graph &graph, const std::string &input_shape_range); graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); + graphStatus GetInputShapeRange(const string &input_shape_range, + std::map>> &name_shape_range_map, + std::vector>> &index_shape_range_map); static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph); + bool GetUsrAttrIndexValidFlag(); + bool IsAttrIndexSetByUser(const ComputeGraphPtr &compute_graph, size_t &data_num, vector &attr_index); void SetRtSocVersion(); void UpdateThreadContext(); void LoadOpsProto(); @@ -288,11 +295,113 @@ graphStatus Impl::InferShapePrepare(const ComputeGraphPtr &compute_graph) { return GRAPH_SUCCESS; } +bool Impl::GetUsrAttrIndexValidFlag() { + return omg_context_.user_attr_index_valid; +} + +bool Impl::IsAttrIndexSetByUser(const ComputeGraphPtr &compute_graph, + size_t &data_num, + vector &attr_index) { + bool all_zero_flag = true; + for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(input_node); + ge::OpDescPtr op = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op); + if (op->GetType() == DATA) { + data_num++; + GeAttrValue::INT index = 0; + if (AttrUtils::GetInt(op, ATTR_NAME_INDEX, index)) { + if (index != 0) { + all_zero_flag = false; + } + attr_index.push_back(index); + } else { + GELOGW("[Get][AttrIndex] Get index[%ld] failed for op[%s].", index, op->GetName().c_str()); + } + } + } + if (data_num > 1 && attr_index.size() == data_num && all_zero_flag) { + GELOGI("Attr indexes are not set by user."); + return false; + } + return true; +} + +graphStatus Impl::GetInputShapeRange(const string &input_shape_range, + std::map>> &name_shape_range_map, + std::vector>> &index_shape_range_map) { + if (input_shape_range.empty()) { + GELOGI("Input shape range is empty."); + return GRAPH_SUCCESS; + } + Status ret = GRAPH_PARAM_INVALID; + if (input_shape_range.find(":") != string::npos) { + ret = ParseInputShapeRange(input_shape_range, name_shape_range_map); + } else { + ret = ParseInputShapeRange(input_shape_range, index_shape_range_map); + } + if (ret != SUCCESS) { + GELOGE(GRAPH_PARAM_INVALID, "[Parse][InputShapeRange] parse shape range[%s] failed.", input_shape_range.c_str()); + return GRAPH_PARAM_INVALID; + } + return GRAPH_SUCCESS; +} + +graphStatus Impl::CheckDataOpAttrIndexValid(const Graph &graph, const std::string &input_shape_range) { + auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); + GE_CHECK_NOTNULL(compute_graph); + + // when set input shape range by index, user must set data attr index, eg. "[1, 3, 3, -1],[1, 3~5, 6, -1]" + bool index_input_shape_range_flag = !input_shape_range.empty() && (input_shape_range.find(":") == string::npos); + size_t data_num = 0; + vector attr_index; + if (!IsAttrIndexSetByUser(compute_graph, data_num, attr_index)) { + if (index_input_shape_range_flag) { + std::string situation = "Data op index"; + std::string reason = "it must be set by user, total data op num[" + std::to_string(data_num) + "], " + "when set input shape range by index."; + REPORT_INPUT_ERROR("E19025", std::vector({"situation", "reason"}), + std::vector({situation, reason})); + GELOGE(GRAPH_FAILED, "[Check][AttrIndex] Data op index is not set, total data op num[%ld], " + "when set input shape range by index.", data_num); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; + } + + omg_context_.user_attr_index_valid = true; + for (size_t i = 0; i < data_num; ++i) { + if (std::find(attr_index.begin(), attr_index.end(), i) == attr_index.end()) { + omg_context_.user_attr_index_valid = false; + if (index_input_shape_range_flag) { + std::string situation = "Data op index[" + std::to_string(i) + "]"; + std::string reason = "it must be set by user, total data op num[" + std::to_string(data_num) + "], " + "when set input shape range by index"; + REPORT_INPUT_ERROR("E19025", std::vector({"situation", "reason"}), + std::vector({situation, reason})); + GELOGE(GRAPH_FAILED, "[Check][AttrIndex] Attr index [%ld] is not set, total data op num[%ld], " + "when set input shape range by index", i, data_num); + return GRAPH_FAILED; + } else { + GELOGW("[Check][AttrIndex] Attr index [%ld] is not set, total data op num[%ld].", i, data_num); + } + } + } + GELOGI("Data op attr indexes are set by user and valid."); + return GRAPH_SUCCESS; +} + graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { GELOGD("Enter Update Data Attr Process!"); std::string input_shape = (options_.find(kInputShape) == options_.end()) ? "" : options_[kInputShape]; std::string input_shape_range = (options_.find(kInputShapeRange) == options_.end()) ? "" : options_[kInputShapeRange]; + graphStatus ret = CheckDataOpAttrIndexValid(graph, input_shape_range); + if (ret != GRAPH_SUCCESS) { + GELOGE(GRAPH_FAILED, "[Check][DataOpAttrIndex] fail, shape range[%s].", input_shape_range.c_str()); + return GRAPH_FAILED; + } + map> shape_map; vector>> user_shape_map; if (!input_shape.empty()) { @@ -301,20 +410,13 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { } std::map>> name_shape_range_map; std::vector>> index_shape_range_map; - if (!input_shape_range.empty()) { - Status ret = GRAPH_PARAM_INVALID; - if (input_shape_range.find(":") != string::npos) { - ret = ParseInputShapeRange(input_shape_range, name_shape_range_map); - } else { - ret = ParseInputShapeRange(input_shape_range, index_shape_range_map); - } - if (ret != SUCCESS) { - GELOGE(GRAPH_PARAM_INVALID, "[Parse][InputShapeRange] parse shape range[%s] failed.", input_shape_range.c_str()); - return GRAPH_PARAM_INVALID; - } - } auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); GE_CHECK_NOTNULL(compute_graph); + ret = GetInputShapeRange(input_shape_range, name_shape_range_map, index_shape_range_map); + if (ret != GRAPH_SUCCESS) { + GELOGE(GRAPH_FAILED, "[Get][InputShapeRange] fail, shape range[%s].", input_shape_range.c_str()); + return GRAPH_FAILED; + } for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { GE_CHECK_NOTNULL(input_node); ge::OpDescPtr op = input_node->GetOpDesc(); @@ -495,7 +597,9 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vectorGetOpDesc(); GE_CHECK_NOTNULL(op); if (op->GetType() == DATA) { - (void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); + if (!GetUsrAttrIndexValidFlag()) { + (void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); + } GELOGD("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); ge::GeTensorDesc tensor = op->GetInputDesc(0); string data_op_name = op->GetName(); diff --git a/ge/ir_build/option_utils.cc b/ge/ir_build/option_utils.cc index 2ad1577b..9943834e 100755 --- a/ge/ir_build/option_utils.cc +++ b/ge/ir_build/option_utils.cc @@ -253,8 +253,7 @@ bool CheckDynamicImagesizeInputShapeValid(map> shape_map for (auto str : split_set) { split_dim = StringUtils::Split(str, ','); if (split_dim.size() != static_cast(kDynamicImageSizeNum)) { - ErrorManager::GetInstance().ATCReportErrMessage("E10020", {"DynamicImageSizeNum"}, - {std::to_string(kDynamicImageSizeNum)}); + ErrorManager::GetInstance().ATCReportErrMessage("E10020"); GELOGE(ge::PARAM_INVALID, "[Check][DynamicImagesizeInputShape] invalid value:%s number of dimensions of each group must be %ld.", dynamic_image_size.c_str(), kDynamicImageSizeNum); @@ -847,18 +846,23 @@ Status UpdateDataOpShapeRange(const OpDescPtr &op, GELOGE(PARAM_INVALID, "[Check][OpDescPtr] Check shape by shape range failed for op:%s.", data_op_name.c_str()); return PARAM_INVALID; } - for (size_t idx = 0; idx < cur_shape_range.size(); idx++) { + std::vector dims; + for (size_t idx = 0; idx < cur_shape_range.size(); ++idx) { auto left_range = cur_shape_range[idx].first; auto right_range = cur_shape_range[idx].second; if (left_range != right_range) { - origin_shape.SetDim(idx, UNKNOWN_DIM); + dims.push_back(UNKNOWN_DIM); + } else { + dims.push_back(left_range); } } + origin_shape = GeShape(dims); tensor_input->SetShape(origin_shape); tensor_input->SetShapeRange(cur_shape_range); tensor_output->SetShape(origin_shape); tensor_output->SetShapeRange(cur_shape_range); - GELOGI("Update input [%s] shape range info", data_op_name.c_str()); + GELOGI("Update input [%s] shape range and shape [%s] info success.", + data_op_name.c_str(), origin_shape.ToString().c_str()); } else { GELOGI("No need to update input [%s] attr because not found from input_shape_range.", data_op_name.c_str()); } @@ -900,18 +904,23 @@ Status UpdateDataOpShapeRange(const OpDescPtr &op, GELOGE(PARAM_INVALID, "[Check][OpDescPtr] Check shape by shape range failed for op:%s.", data_op_name.c_str()); return PARAM_INVALID; } + std::vector dims; for (size_t idx = 0; idx < cur_shape_range.size(); ++idx) { auto left_range = cur_shape_range[idx].first; auto right_range = cur_shape_range[idx].second; if (left_range != right_range) { - origin_shape.SetDim(idx, UNKNOWN_DIM); + dims.push_back(UNKNOWN_DIM); + } else { + dims.push_back(left_range); } } + origin_shape = GeShape(dims); tensor_input->SetShape(origin_shape); tensor_input->SetShapeRange(cur_shape_range); tensor_output->SetShape(origin_shape); tensor_output->SetShapeRange(cur_shape_range); - GELOGI("Update input [%s] shape range info success.", data_op_name.c_str()); + GELOGI("Update input [%s] shape range and shape [%s] info success.", + data_op_name.c_str(), origin_shape.ToString().c_str()); return SUCCESS; } diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index 9308e267..39c87107 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -32,6 +32,7 @@ #include "graph/common/local_context.h" #include "graph/load/model_manager/model_manager.h" #include "graph/manager/graph_var_manager.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/utils/tensor_adapter.h" #include "runtime/mem.h" @@ -155,6 +156,11 @@ Status InnerSession::Finalize() { // release var memory GELOGI("VarManager free var memory."); (void)VarManager::Instance(session_id_)->FreeVarMemory(); + + for (auto memory_type : MemManager::Instance().GetAllMemoryType()) { + (void)MemManager::Instance().SessionScopeMemInstance(memory_type).Free(session_id_); + } + // release analyzer saved info(Session Level) Analyzer::GetInstance()->DestroySessionJsonObject(session_id_); diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 36ca1850..d09e8398 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -113,6 +113,30 @@ Status UpdateInputsBufferAddr(StreamResource *stream_resource, rtStream_t stream return SUCCESS; } +Status ModifyTensorDesc(GeTensorDesc &tensor) { + int64_t storage_format_val = static_cast(FORMAT_RESERVED); + (void)AttrUtils::GetInt(tensor, ge::ATTR_NAME_STORAGE_FORMAT, storage_format_val); + auto storage_format = static_cast(storage_format_val); + auto format = tensor.GetFormat(); + if (storage_format != FORMAT_RESERVED && storage_format != format) { + std::vector storage_shape; + if (!AttrUtils::GetListInt(tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][storage_shape]failed while storage_format was set."); + REPORT_INNER_ERROR("E19999", "Get storage_shape failed while storage_format was set."); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + + GELOGD("Storage format set. update shape to [%s], and original shape to [%s]", + GeShape(storage_shape).ToString().c_str(), tensor.GetShape().ToString().c_str()); + tensor.SetOriginShape(tensor.GetShape()); + tensor.SetOriginFormat(format); + tensor.SetShape(GeShape(storage_shape)); + tensor.SetFormat(storage_format); + } + + return SUCCESS; +} + Status InitHybridModelArgs(const std::vector &input_buffers, const std::vector &output_buffers, const std::vector &inputs_desc, @@ -126,6 +150,7 @@ Status InitHybridModelArgs(const std::vector &input_buffers, for (auto &tensor_desc : inputs_desc) { auto desc = MakeShared(tensor_desc); GE_CHECK_NOTNULL(desc); + GE_CHK_STATUS_RET_NOLOG(ModifyTensorDesc(*desc)); args.input_desc.emplace_back(desc); } return SUCCESS; diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index 180b50c1..d09dd802 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -19,8 +19,7 @@ #include #include -#include "graph/manager/graph_mem_allocator.h" -#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/graph_mem_manager.h" namespace ge { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() { diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 6959c6b3..5b5f24a2 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -613,7 +613,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & single_op.num_inputs_ = data_ops_.size(); single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); - model_params_.memory_size = UINT_MAX; + model_params_.memory_size = UINT64_MAX; model_params_.graph_is_dynamic = true; auto ge_model = model_helper_.GetGeModel(); diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 01fc7468..64231b8c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -61,6 +61,9 @@ const std::string kTaskTypeAicore = "AI_CORE"; const std::string kTaskTypeAicpu = "AI_CPU"; const std::string kTaskTypeInvalid = "TASK_TYPE_INVALID"; +// dynamic execute mode +const char *const kLazyRecompile = "lazy_recompile"; + // Data cache, including data address and length struct DataBuffer { public: diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index 24f969dd..ee51d29d 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -106,6 +106,7 @@ class GE_FUNC_VISIBILITY GeGenerator { bool CheckNoAicore(const ComputeGraphPtr &graph); void RemoveConst(const vector &inputs, vector &outputs); Status CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs); + Status InferFormatForSingleOp(OpDescPtr &op_desc); using GeRootModelPtr = std::shared_ptr; Status SetModelNameForDump(const GeRootModelPtr &ge_root_model); diff --git a/inc/framework/memory/memory_assigner.h b/inc/framework/memory/memory_assigner.h index f5837b3a..173cc64e 100644 --- a/inc/framework/memory/memory_assigner.h +++ b/inc/framework/memory/memory_assigner.h @@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner { MemoryAssigner &operator=(const MemoryAssigner &) = delete; - Status AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size); + Status AssignMemory(bool is_loop_graph, map &mem_offset, size_t &zero_copy_mem_size); private: ge::ComputeGraphPtr compute_graph_; diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 1024f7e6..0b799bf2 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -125,6 +125,7 @@ struct OmgContext { std::vector getnext_nosink_nodes; bool fuzz_compile_flag = false; std::string atc_cmdline; + bool user_attr_index_valid = false; }; } // namespace ge diff --git a/metadef b/metadef index 8dd3448e..23718da6 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 8dd3448e2f0150c51266bc120bdd5d171a003e6b +Subproject commit 23718da69af64f8a57051ee64d5515ae1e103c70 diff --git a/parser b/parser index 7773435b..9bb03f21 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 7773435b776fb37231abcef2bbcf972814b01dd1 +Subproject commit 9bb03f21773f028b07d5a912db6f176268962c7d diff --git a/tests/depends/mmpa/src/mmpa_stub.cc b/tests/depends/mmpa/src/mmpa_stub.cc index 62499ca1..a82621ef 100644 --- a/tests/depends/mmpa/src/mmpa_stub.cc +++ b/tests/depends/mmpa/src/mmpa_stub.cc @@ -242,6 +242,90 @@ INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen) return ret; } +INT32 mmRWLockInit(mmRWLock_t *rwLock) +{ + if (rwLock == NULL) { + return EN_INVALID_PARAM; + } + + INT32 ret = pthread_rwlock_init(rwLock, NULL); + if (ret != MMPA_ZERO) { + return EN_ERROR; + } + + return EN_OK; +} + +INT32 mmRWLockRDLock(mmRWLock_t *rwLock) +{ + if (rwLock == NULL) { + return EN_INVALID_PARAM; + } + + INT32 ret = pthread_rwlock_rdlock(rwLock); + if (ret != MMPA_ZERO) { + return EN_ERROR; + } + + return EN_OK; +} + +INT32 mmRWLockWRLock(mmRWLock_t *rwLock) +{ + if (rwLock == NULL) { + return EN_INVALID_PARAM; + } + + INT32 ret = pthread_rwlock_wrlock(rwLock); + if (ret != MMPA_ZERO) { + return EN_ERROR; + } + + return EN_OK; +} + +INT32 mmRDLockUnLock(mmRWLock_t *rwLock) +{ + if (rwLock == NULL) { + return EN_INVALID_PARAM; + } + + INT32 ret = pthread_rwlock_unlock(rwLock); + if (ret != MMPA_ZERO) { + return EN_ERROR; + } + + return EN_OK; +} + +INT32 mmWRLockUnLock(mmRWLock_t *rwLock) +{ + if (rwLock == NULL) { + return EN_INVALID_PARAM; + } + + INT32 ret = pthread_rwlock_unlock(rwLock); + if (ret != MMPA_ZERO) { + return EN_ERROR; + } + + return EN_OK; +} + +INT32 mmRWLockDestroy(mmRWLock_t *rwLock) +{ + if (rwLock == NULL) { + return EN_INVALID_PARAM; + } + + INT32 ret = pthread_rwlock_destroy(rwLock); + if (ret != MMPA_ZERO) { + return EN_ERROR; + } + + return EN_OK; +} + INT32 mmGetErrorCode() { return 0; diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 59a98978..2b1af23c 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -434,6 +434,8 @@ rtError_t rtGetRtCapability(rtFeatureType_t featureType, int32_t featureInfo, in rtError_t rtGetMaxStreamAndTask(uint32_t streamType, uint32_t *maxStrCount, uint32_t *maxTaskCount) { + *maxStrCount = 1024; + *maxTaskCount = 1024; return RT_ERROR_NONE; } @@ -456,4 +458,4 @@ rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { } #ifdef __cplusplus } -#endif \ No newline at end of file +#endif diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 7cdec968..16f3672b 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -216,12 +216,10 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/dimension_adjust_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/get_original_format_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/shape_operate_op_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/unused_op_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/assert_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/dropout_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/infershape_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/unused_const_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/isolated_op_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/permute_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/ctrl_edge_transfer_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/end_of_sequence_add_control_pass.cc" @@ -263,7 +261,6 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/switch_logic_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/switch_data_edges_bypass.cc" "${GE_CODE_DIR}/ge/graph/passes/merge_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/variable_format_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/variable_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/cast_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transpose_transdata_pass.cc" @@ -337,8 +334,10 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" "${GE_CODE_DIR}/ge/graph/common/local_context.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" "${GE_CODE_DIR}/ge/common/model_saver.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" @@ -396,8 +395,10 @@ set(GRAPH_LOAD_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" "${GE_CODE_DIR}/ge/common/thread_pool.cc" ) @@ -495,8 +496,6 @@ set(GRAPH_PASS_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/placeholder_with_default_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/snapshot_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/shape_operate_op_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/unused_op_remove_pass.cc" - "${GE_CODE_DIR}/ge/graph/passes/isolated_op_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/permute_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/var_is_initialized_op_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/cast_translate_pass.cc" @@ -670,7 +669,6 @@ set(PASS_TEST_FILES "graph/passes/permute_pass_unittest.cc" "graph/passes/print_op_pass_unittest.cc" "graph/passes/shape_operate_op_remove_pass_unittest.cc" - "graph/passes/unused_and_isolated_op_remove_pass_unittest.cc" "graph/passes/variable_op_pass_unittest.cc" "graph/passes/base_pass_unittest.cc" "graph/passes/addn_pass_unittest.cc" @@ -680,6 +678,7 @@ set(PASS_TEST_FILES "graph/passes/cond_branch_v1_unittest.cc" "graph/passes/loop_branch_v1_unittest.cc" "graph/passes/switch_dead_branch_elimination_unittest.cc" + "graph/passes/subgraph_pass_unittest.cc" "graph/passes/assert_pass_unittest.cc" "graph/passes/dropout_pass_unittest.cc" "graph/passes/unused_const_pass_unittest.cc" @@ -716,6 +715,7 @@ set(PASS_TEST_FILES "graph/passes/mark_node_unknown_shape_pass_unittest.cc" "graph/passes/reshape_recovery_pass_unittest.cc" "graph/passes/cast_remove_pass_unittest.cc" + "graph/passes/memcpy_addr_async_unittest.cc" ) set(KERNEL_TEST_FILES @@ -784,6 +784,7 @@ set(MULTI_PARTS_TEST_FILES "common/ge_auth_file_saver_unittest.cc" "graph/variable_accelerate_ctrl_unittest.cc" "graph/build/logical_stream_allocator_unittest.cc" + "graph/build/stream_allocator_unittest.cc" "graph/build/model_builder_unittest.cc" "graph/build/mem_assigner_unittest.cc" "graph/build/task_generator_unittest.cc" @@ -792,6 +793,8 @@ set(MULTI_PARTS_TEST_FILES "graph/preprocess/graph_preprocess_unittest.cc" "graph/manager/hcom_util_unittest.cc" "graph/manager/graph_caching_allocator_unittest.cc" + "graph/manager/host_mem_allocator_unittest.cc" + "graph/manager/session_scope_mem_allocator_unittest.cc" "graph/manager/run_graph_unittest.cc" "graph/partition/dynamic_shape_partition_unittest.cc" "graph/manager/graph_manager_unittest.cc" @@ -824,10 +827,14 @@ set(PROFILING_MNG_TEST_FILES set(HYBRID_TEST_FILES "hybrid/ge_hybrid_unittest.cc" "hybrid/known_node_executor_unittest.cc" + "hybrid/executor/node_state_unittest.cc" "hybrid/executor/subgraph_executor_unittest.cc" "hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/model/hybrid_model_builder_unittest.cc" "hybrid/node_executor/rts/rts_node_task_unittest.cc" + "hybrid/executor/hybrid_model_async_executor_unittest.cc" + "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" + "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" ) set(OTHERS_TEST_FILES diff --git a/tests/ut/ge/generator/ge_generator_unittest.cc b/tests/ut/ge/generator/ge_generator_unittest.cc index fb256c7c..1bb4430f 100644 --- a/tests/ut/ge/generator/ge_generator_unittest.cc +++ b/tests/ut/ge/generator/ge_generator_unittest.cc @@ -23,6 +23,7 @@ #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" #include "graph/utils/graph_utils.h" +#include "graph/operator_factory_impl.h" #include "../graph/passes/graph_builder_utils.h" #include "../graph/manager/graph_manager.h" #include "all_ops.h" @@ -79,6 +80,27 @@ TEST_F(UtestGeGenerator, test_build_single_op_offline) { EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, "offline_"), GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); } */ +graphStatus TestFunc(Operator &op) { return 0; } +graphStatus TestFunc1(Operator &op) { return 1; } +TEST_F(UtestGeGenerator, test_infer_format_for_single_op) { + OperatorFactoryImpl::RegisterInferFormatFunc("Add", TestFunc); + shared_ptr op_desc = make_shared("add", "add"); + GeGenerator generator; + EXPECT_EQ(generator.InferFormatForSingleOp(op_desc), SUCCESS); + shared_ptr op_desc1 = make_shared("Add", "Add"); + EXPECT_EQ(generator.InferFormatForSingleOp(op_desc1), SUCCESS); + OperatorFactoryImpl::RegisterInferFormatFunc("MatMulV2", TestFunc1); + shared_ptr op_desc2 = make_shared("MatMulV2", "MatMulV2"); + GeTensorDesc tensor_desc; + EXPECT_EQ(op_desc2->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc2->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc2->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc2->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc2->AddInputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc2->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(op_desc2->AddOutputDesc(tensor_desc), GRAPH_SUCCESS); + EXPECT_EQ(generator.InferFormatForSingleOp(op_desc2), FAILED); +} TEST_F(UtestGeGenerator, test_build_single_op_online) { GeTensorDesc tensor_desc; diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index 2a0f2405..785af2ef 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -44,7 +44,8 @@ using domi::GetContext; class UtestMemoryAssignerTest : public testing::Test { public: - ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", int64_t size = 1024) { + ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", + int64_t size = 1024) { ge::OpDescPtr op_def = make_shared(name, type); auto desc_temp_ptr = make_shared(); auto desc_temp = *desc_temp_ptr; @@ -214,7 +215,8 @@ class UtestMemoryAssignerTest : public testing::Test { return builder.GetGraph(); } - void make_ffts_reuse_graph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId, + + void MakeFftsReuseGraph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId, int32_t thread_scope_id_2 = kInvalidThreadScopeId) { ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); @@ -253,28 +255,119 @@ class UtestMemoryAssignerTest : public testing::Test { graph->TopologicalSorting(); } + void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + std::vector workspace_bytes; + workspace_bytes.push_back(1024); + workspace_bytes.push_back(512); + op_def_c->SetWorkspaceBytes(workspace_bytes); + vector workspace_no_reuse_scope = { 0 , 1 }; + (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + + vector workspace_no_reuse_scope_e = { 1 }; + (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e); + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } + + void MakeContinuousReuseGraph(ge::ComputeGraphPtr graph, bool nopading = false) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + if (nopading) { + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, true); + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_OUTPUT_REUSE_INPUT, true); + (void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, 0); + } else { + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_INPUT, true); + (void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_OUTPUT, true); + } + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } + + void MakeMultiBatchReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + (void)ge::AttrUtils::SetStr(op_def_b, ATTR_NAME_BATCH_LABEL, "Batch_0"); + (void)ge::AttrUtils::SetStr(op_def_c, ATTR_NAME_BATCH_LABEL, "Batch_0"); + (void)ge::AttrUtils::SetStr(op_def_e, ATTR_NAME_BATCH_LABEL, "Batch_1"); + (void)ge::AttrUtils::SetStr(op_def_f, ATTR_NAME_BATCH_LABEL, "Batch_1"); + vector workspace_no_reuse_scope = { 1 }; + (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } + protected: void SetUp() {} void TearDown() { GetContext().out_nodes_map.clear(); } }; -/* -TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { - ge::ComputeGraphPtr graph = make_shared(""); - ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); - ge::NodePtr node_a = graph->AddNode(op_def_a); - MemoryBlock* memory_block = new MemoryBlock(0); - memory_block->Init(1, kOutput, node_a, 0, 1); - memory_block->real_size_list_.clear(); - memory_block->Resize(); - - EXPECT_EQ(memory_block->Size(), 0); - - delete memory_block; -} -*/ - namespace ge { class MockBlockMemAssigner : public BlockMemAssigner { @@ -313,12 +406,44 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); } +TEST_F(UtestMemoryAssignerTest, block_memory_assign_nopading_continuous_memory) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeContinuousReuseGraph(graph, true); + HybridMemAssigner hybridMemAssigner(graph); + ge::Status ret = hybridMemAssigner.Assign(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } + + EXPECT_EQ(offset, 8192); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestMemoryAssignerTest, block_memory_assign_continuous_memory) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeContinuousReuseGraph(graph); + map mem_offset; + size_t zero_copy_mem_size = 0; + MemoryAssigner memoryAssigner(graph); + ge::Status ret = memoryAssigner.AssignMemory(false, mem_offset, zero_copy_mem_size); + size_t offset = 0; + auto it = mem_offset.find(RT_MEMORY_HBM); + if (it != mem_offset.end()) { + offset = it->second; + } + + EXPECT_EQ(offset, 11264); + EXPECT_EQ(ret, SUCCESS); +} + TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) { ge::ComputeGraphPtr graph = make_shared(""); MakeGraph(graph); auto node_f = graph->FindNode("F"); MemoryAssigner memory_assigner(graph); - map mem_offset; + map mem_offset; size_t zero_memory_size = 0; EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); @@ -335,7 +460,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) { std::string value = "A"; (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); MemoryAssigner memory_assigner(graph); - map mem_offset; + map mem_offset; size_t zero_memory_size = 0; VarManager::Instance(0)->Init(0, 0, 0, 0); EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); @@ -356,7 +481,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) { std::string value = "M"; (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); MemoryAssigner memory_assigner(graph); - map mem_offset; + map mem_offset; size_t zero_memory_size = 0; VarManager::Instance(0)->Init(0, 0, 0, 0); EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); @@ -400,6 +525,34 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_update_ref_op_offset_reverse EXPECT_EQ(memoryAssigner.UpdateRefOpOffsetReverse(add), SUCCESS); } +TEST_F(UtestMemoryAssignerTest, graph_memory_assign_var_input_ref_cascade_false) { + ge::ut::GraphBuilder builder("graph"); + auto var = builder.AddNode("var", VARIABLE, 1, 1); + auto broadcast = builder.AddNode("broadcast", HCOMBROADCAST, 1, 1); + auto assign = builder.AddNode("assign", "Assign", 2, 1); + // add link + builder.AddDataEdge(var, 0, assign, 0); + builder.AddDataEdge(var, 0, broadcast, 0); + builder.AddDataEdge(broadcast, 0, assign, 1); + + int reuse_input_index = 0; + auto broadcast_desc = broadcast->GetOpDesc()->MutableOutputDesc(0); + ge::TensorUtils::SetReuseInput(*broadcast_desc, true); + ge::TensorUtils::SetReuseInputIndex(*broadcast_desc, reuse_input_index); + + ge::ComputeGraphPtr graph = builder.GetGraph(); + + GraphMemoryAssigner memory_assigner(graph); + bool ref_cascade = memory_assigner.IsRefFromInputOpCascade(broadcast); + EXPECT_EQ(ref_cascade, false); + ref_cascade = memory_assigner.IsRefFromInputOpCascade(assign); + EXPECT_EQ(ref_cascade, false); + auto ret = memory_assigner.UpdateRefOpOffsetReverse(broadcast); + EXPECT_EQ(ret, SUCCESS); + ret = memory_assigner.UpdateRefOpOffsetReverse(assign); + EXPECT_EQ(ret, SUCCESS); +} + TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace) { ge::ut::GraphBuilder builder("graph"); auto data_input = builder.AddNode("data", "Data", 1, 1); @@ -460,30 +613,86 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace) TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) { ge::ComputeGraphPtr graph = make_shared(""); - make_ffts_reuse_graph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId); + MakeFftsReuseGraph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId); HybridMemAssigner hybridMemAssigner(graph); ge::Status ret = hybridMemAssigner.Assign(); - size_t offset = hybridMemAssigner.GetMemOffset(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } EXPECT_EQ(offset, 5120); EXPECT_EQ(ret, SUCCESS); } TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) { ge::ComputeGraphPtr graph = make_shared(""); - make_ffts_reuse_graph(graph, 0, 1); + MakeFftsReuseGraph(graph, 0, 1); HybridMemAssigner hybridMemAssigner(graph); ge::Status ret = hybridMemAssigner.Assign(); - size_t offset = hybridMemAssigner.GetMemOffset(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } EXPECT_EQ(offset, 6656); EXPECT_EQ(ret, SUCCESS); } TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) { ge::ComputeGraphPtr graph = make_shared(""); - make_ffts_reuse_graph(graph, 0, kInvalidThreadScopeId); + MakeFftsReuseGraph(graph, 0, kInvalidThreadScopeId); HybridMemAssigner hybridMemAssigner(graph); ge::Status ret = hybridMemAssigner.Assign(); - size_t offset = hybridMemAssigner.GetMemOffset(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } EXPECT_EQ(offset, 5632); EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestMemoryAssignerTest, one_session_scope_op) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeSessionScopeReuseGraph(graph); + HybridMemAssigner hybridMemAssigner(graph); + ge::Status ret = hybridMemAssigner.Assign(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } + + auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); + size_t session_scope_offset = 0; + it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + session_scope_offset = it->second; + } + EXPECT_EQ(offset, 5120); + EXPECT_EQ(session_scope_offset, 1536); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestMemoryAssignerTest, multi_batch_reuse) { + ge::ComputeGraphPtr graph = make_shared(""); + MakeMultiBatchReuseGraph(graph); + HybridMemAssigner hybridMemAssigner(graph); + ge::Status ret = hybridMemAssigner.Assign(); + size_t offset = 0; + auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + offset = it->second; + } + + auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); + size_t session_scope_offset = 0; + it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope); + if (it != hybridMemAssigner.GetMemOffsets().end()) { + session_scope_offset = it->second; + } + EXPECT_EQ(offset, 6656); + EXPECT_EQ(session_scope_offset, 1536); + EXPECT_EQ(ret, SUCCESS); } \ No newline at end of file diff --git a/tests/ut/ge/graph/build/model_builder_unittest.cc b/tests/ut/ge/graph/build/model_builder_unittest.cc index d5efc9bb..d544e1a3 100644 --- a/tests/ut/ge/graph/build/model_builder_unittest.cc +++ b/tests/ut/ge/graph/build/model_builder_unittest.cc @@ -17,6 +17,7 @@ #include #include +#include "graph/common/local_context.h" #include "graph/anchor.h" #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" @@ -30,6 +31,7 @@ #define protected public #define private public #include "graph/build/model_builder.h" +#include "memory/memory_assigner.h" #undef protected #undef private @@ -127,9 +129,46 @@ class UtestModelBuilderTest : public testing::Test { graph->TopologicalSorting(); } +void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); + + std::vector workspace_bytes; + workspace_bytes.push_back(1024); + workspace_bytes.push_back(512); + op_def_c->SetWorkspaceBytes(workspace_bytes); + vector workspace_no_reuse_scope = { 0 , 1 }; + (void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); + + vector workspace_no_reuse_scope_e = { 1 }; + (void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e); + + ge::NodePtr node_a = graph->AddNode(op_def_a); + ge::NodePtr node_b = graph->AddNode(op_def_b); + ge::NodePtr node_c = graph->AddNode(op_def_c); + ge::NodePtr node_d = graph->AddNode(op_def_d); + ge::NodePtr node_e = graph->AddNode(op_def_e); + ge::NodePtr node_f = graph->AddNode(op_def_f); + ge::NodePtr node_g = graph->AddNode(op_def_g); + + ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); + ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); + graph->TopologicalSorting(); + } protected: - void SetUp() {} + void SetUp() { + SetLocalOmgContext(domi::GetContext()); + } void TearDown() { GetContext().out_nodes_map.clear(); } }; @@ -162,6 +201,24 @@ TEST_F(UtestModelBuilderTest, test_save_atomic_bin) { EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS); } +TEST_F(UtestModelBuilderTest, build_model_for_get_task) { + Graph2SubGraphInfoList subgraphs; + std::map stream_max_parallel_num; + ge::ComputeGraphPtr graph = make_shared(""); + MakeSessionScopeReuseGraph(graph); + std::map option; + ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false); + + MemoryAssigner mem_assigner(graph); + EXPECT_EQ(mem_assigner.AssignMemory(false, builder.mem_type_to_mem_offset_, builder.zero_copy_mem_size_), SUCCESS); + + ge::Model model; + EXPECT_EQ(builder.BuildModelDef(model), SUCCESS); + int64_t session_scope_mem_offset = 0; + ge::AttrUtils::GetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset); + EXPECT_EQ(session_scope_mem_offset, 1536); +} + TEST_F(UtestModelBuilderTest, test_model_save) { Graph2SubGraphInfoList subgraphs; std::map stream_max_parallel_num; diff --git a/tests/ut/ge/graph/build/stream_allocator_unittest.cc b/tests/ut/ge/graph/build/stream_allocator_unittest.cc new file mode 100644 index 00000000..019e75d1 --- /dev/null +++ b/tests/ut/ge/graph/build/stream_allocator_unittest.cc @@ -0,0 +1,190 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#define protected public +#define private public +#include "graph/build/stream_allocator.h" +#undef protected +#undef private + +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" + +namespace ge { +class UtestStreamAllocator : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} + public: + + /// + /// A + /// / \ + /// B C + /// | | + /// D 400 + /// | | + /// | E + /// \ / + /// F + /// + void make_graph_active(const ComputeGraphPtr &graph) { + const auto &a_desc = std::make_shared("A", DATA); + a_desc->AddInputDesc(GeTensorDesc()); + a_desc->AddOutputDesc(GeTensorDesc()); + a_desc->SetStreamId(0); + const auto &a_node = graph->AddNode(a_desc); + + const auto &b_desc = std::make_shared("B", "testa"); + b_desc->AddInputDesc(GeTensorDesc()); + b_desc->AddOutputDesc(GeTensorDesc()); + b_desc->SetStreamId(1); + AttrUtils::SetListStr(b_desc, ATTR_NAME_ACTIVE_LABEL_LIST, {"1"}); + const auto &b_node = graph->AddNode(b_desc); + + const auto &c_desc = std::make_shared("C", "testa"); + c_desc->AddInputDesc(GeTensorDesc()); + c_desc->AddOutputDesc(GeTensorDesc()); + c_desc->SetStreamId(2); + AttrUtils::SetStr(c_desc, ATTR_NAME_STREAM_LABEL, "1"); + const auto &c_node = graph->AddNode(c_desc); + + const auto &d_desc = std::make_shared("D", "testa"); + d_desc->AddInputDesc(GeTensorDesc()); + d_desc->AddOutputDesc(GeTensorDesc()); + d_desc->SetStreamId(1); + const auto &d_node = graph->AddNode(d_desc); + + const auto &e_desc = std::make_shared("E", "testa"); + e_desc->AddInputDesc(GeTensorDesc()); + e_desc->AddOutputDesc(GeTensorDesc()); + e_desc->SetStreamId(2); + const auto &e_node = graph->AddNode(e_desc); + + const auto &f_desc = std::make_shared("F", "testa"); + f_desc->AddInputDesc(GeTensorDesc()); + f_desc->AddInputDesc(GeTensorDesc()); + f_desc->AddOutputDesc(GeTensorDesc()); + f_desc->SetStreamId(2); + const auto &f_node = graph->AddNode(f_desc); + + std::vector node_list(400); + for (int i = 0; i < 400; i++) { + const auto &op_desc = std::make_shared("X", DATA); + op_desc->AddInputDesc(GeTensorDesc()); + op_desc->AddOutputDesc(GeTensorDesc()); + op_desc->SetStreamId(2); + node_list[i] = graph->AddNode(op_desc); + } + + GraphUtils::AddEdge(a_node->GetOutDataAnchor(0), b_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(a_node->GetOutDataAnchor(0), c_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(b_node->GetOutDataAnchor(0), d_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(d_node->GetOutDataAnchor(0), f_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(c_node->GetOutDataAnchor(0), node_list[0]->GetInDataAnchor(0)); + for (uint32_t i = 0; i < 399; i++) { + GraphUtils::AddEdge(node_list[i]->GetOutDataAnchor(0), node_list[i + 1]->GetInDataAnchor(0)); + } + GraphUtils::AddEdge(node_list[399]->GetOutDataAnchor(0), e_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(e_node->GetOutDataAnchor(0), f_node->GetInDataAnchor(1)); + } +}; + +TEST_F(UtestStreamAllocator, test_split_streams_active) { + const auto &graph = std::make_shared("test_split_streams_active_graph"); + EXPECT_NE(graph, nullptr); + make_graph_active(graph); + + StreamAllocator allocator(graph, Graph2SubGraphInfoList()); + allocator.stream_num_ = 3; + EXPECT_EQ(allocator.SetActiveStreamsByLabel(), SUCCESS); + std::vector> split_stream(3); + EXPECT_EQ(allocator.SplitStreams(split_stream), SUCCESS); + EXPECT_EQ(allocator.UpdateActiveStreams(split_stream), SUCCESS); + EXPECT_EQ(allocator.SetActiveStreamsForLoop(), SUCCESS); + EXPECT_EQ(allocator.specific_activated_streams_.count(3), 1); + + const auto &node_b = graph->FindNode("B"); + EXPECT_NE(node_b, nullptr); + std::vector active_stream_list; + EXPECT_TRUE(AttrUtils::GetListInt(node_b->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list)); + EXPECT_EQ(active_stream_list.size(), 2); + const auto &node_e = graph->FindNode("E"); + EXPECT_NE(node_e, nullptr); + EXPECT_EQ(active_stream_list[0], node_e->GetOpDesc()->GetStreamId()); + EXPECT_EQ(active_stream_list[1], 3); +} + +TEST_F(UtestStreamAllocator, test_update_active_streams_for_subgraph) { + const auto &root_graph = std::make_shared("test_update_active_streams_for_subgraph_root_graph"); + EXPECT_NE(root_graph, nullptr); + root_graph->SetGraphUnknownFlag(false); + const auto &sub_graph1 = std::make_shared("test_update_active_streams_for_subgraph_sub_graph1"); + EXPECT_NE(sub_graph1, nullptr); + root_graph->AddSubGraph(sub_graph1); + const auto &sub_graph2 = std::make_shared("test_update_active_streams_for_subgraph_sub_graph2"); + EXPECT_NE(sub_graph2, nullptr); + root_graph->AddSubGraph(sub_graph2); + + const auto &case_desc = std::make_shared("case", CASE); + EXPECT_NE(case_desc, nullptr); + EXPECT_EQ(case_desc->AddInputDesc(GeTensorDesc()), GRAPH_SUCCESS); + EXPECT_EQ(case_desc->AddOutputDesc(GeTensorDesc()), GRAPH_SUCCESS); + case_desc->AddSubgraphName("branch1"); + case_desc->SetSubgraphInstanceName(0, "test_update_active_streams_for_subgraph_sub_graph1"); + case_desc->AddSubgraphName("branch2"); + case_desc->SetSubgraphInstanceName(1, "test_update_active_streams_for_subgraph_sub_graph2"); + const auto &case_node = root_graph->AddNode(case_desc); + EXPECT_NE(case_node, nullptr); + sub_graph1->SetParentNode(case_node); + sub_graph2->SetParentNode(case_node); + + const auto &active_desc1 = std::make_shared("active1", STREAMACTIVE); + EXPECT_NE(active_desc1, nullptr); + EXPECT_TRUE(AttrUtils::SetListInt(active_desc1, ATTR_NAME_ACTIVE_STREAM_LIST, {0})); + const auto &active_node1 = sub_graph1->AddNode(active_desc1); + EXPECT_NE(active_node1, nullptr); + + const auto &active_desc2 = std::make_shared("active2", STREAMACTIVE); + EXPECT_NE(active_desc2, nullptr); + EXPECT_TRUE(AttrUtils::SetListInt(active_desc2, ATTR_NAME_ACTIVE_STREAM_LIST, {1})); + const auto &active_node2 = sub_graph2->AddNode(active_desc2); + EXPECT_NE(active_node2, nullptr); + + StreamAllocator allocator(root_graph, Graph2SubGraphInfoList()); + allocator.node_split_stream_map_[active_node1] = 2; + allocator.node_split_stream_map_[active_node2] = 3; + allocator.split_ori_stream_map_[2] = 0; + allocator.subgraph_first_active_node_map_[sub_graph1] = active_node1; + allocator.subgraph_first_active_node_map_[sub_graph2] = active_node2; + EXPECT_EQ(allocator.UpdateActiveStreamsForSubgraphs(), SUCCESS); + std::vector active_stream_list1; + EXPECT_TRUE(AttrUtils::GetListInt(active_node1->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list1)); + EXPECT_EQ(active_stream_list1.size(), 1); + EXPECT_EQ(active_stream_list1[0], 0); + std::vector active_stream_list2; + EXPECT_TRUE(AttrUtils::GetListInt(active_node2->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list2)); + EXPECT_EQ(active_stream_list2.size(), 2); + EXPECT_EQ(active_stream_list2[0], 1); + EXPECT_EQ(active_stream_list2[1], 3); + EXPECT_EQ(allocator.specific_activated_streams_.size(), 1); + EXPECT_EQ(allocator.specific_activated_streams_.count(3), 1); +} +} diff --git a/tests/ut/ge/graph/ge_executor_unittest.cc b/tests/ut/ge/graph/ge_executor_unittest.cc index 3969ad9c..bbe29007 100644 --- a/tests/ut/ge/graph/ge_executor_unittest.cc +++ b/tests/ut/ge/graph/ge_executor_unittest.cc @@ -43,6 +43,7 @@ #include "graph/manager/graph_mem_allocator.h" #include "graph/utils/graph_utils.h" #include "proto/ge_ir.pb.h" +#include "graph/manager/graph_var_manager.h" #undef private #undef protected @@ -156,7 +157,7 @@ TEST_F(UtestGeExecutor, InitFeatureMapAndP2PMem_failed) { EXPECT_EQ(model.InitFeatureMapAndP2PMem(nullptr, 0), PARAM_INVALID); } -TEST_F(UtestGeExecutor, kernel_InitDumpTask) { +TEST_F(UtestGeExecutor, kernel_InitDumpArgs) { DavinciModel model(0, g_label_call_back); model.om_name_ = "testom"; model.name_ = "test"; @@ -172,10 +173,10 @@ TEST_F(UtestGeExecutor, kernel_InitDumpTask) { KernelTaskInfo kernel_task_info; kernel_task_info.davinci_model_ = &model; kernel_task_info.op_desc_ = op_desc; - kernel_task_info.InitDumpTask(0); + kernel_task_info.InitDumpArgs(0); } -TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) { +TEST_F(UtestGeExecutor, kernel_ex_InitDumpArgs) { DavinciModel model(0, g_label_call_back); model.om_name_ = "testom"; model.name_ = "test"; @@ -190,10 +191,33 @@ TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) { KernelExTaskInfo kernel_ex_task_info; kernel_ex_task_info.davinci_model_ = &model; - kernel_ex_task_info.InitDumpTask(nullptr, op_desc); + kernel_ex_task_info.InitDumpArgs(nullptr, op_desc); +} + +TEST_F(UtestGeExecutor, kernel_ex_InitDumpFlag) { + DavinciModel model(0, g_label_call_back); + model.om_name_ = "testom"; + model.name_ = "test"; + OpDescPtr op_desc = CreateOpDesc("test", "test"); + + std::map> model_dump_properties_map; + std::set s; + model_dump_properties_map[DUMP_ALL_MODEL] = s; + DumpProperties dp; + dp.model_dump_properties_map_ = model_dump_properties_map; + model.SetDumpProperties(dp); + + KernelExTaskInfo kernel_ex_task_info; + kernel_ex_task_info.davinci_model_ = &model; + kernel_ex_task_info.InitDumpFlag(op_desc); } TEST_F(UtestGeExecutor, execute_graph_with_stream) { + VarManager::Instance(0)->Init(0, 0, 0, 0); + map options; + options[GRAPH_MEMORY_MAX_SIZE] = "1048576"; + VarManager::Instance(0)->SetMemoryMallocSize(options); + DavinciModel model(0, nullptr); ComputeGraphPtr graph = make_shared("default"); @@ -278,7 +302,6 @@ TEST_F(UtestGeExecutor, execute_graph_with_stream) { OutputData output_data; vector outputs; EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS); - GraphExecutor graph_executer; graph_executer.init_flag_ = true; diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc index 7863a70f..5833a13a 100644 --- a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc @@ -28,8 +28,7 @@ #define protected public #define private public -#include "graph/manager/graph_caching_allocator.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #undef protected #undef private diff --git a/tests/ut/ge/graph/manager/graph_manager_unittest.cc b/tests/ut/ge/graph/manager/graph_manager_unittest.cc index fafd7168..f68b5080 100644 --- a/tests/ut/ge/graph/manager/graph_manager_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_manager_unittest.cc @@ -16,6 +16,7 @@ #include #include +#include #define protected public #define private public #include "graph/manager/graph_manager.h" @@ -115,6 +116,7 @@ #include "common/formats/utils/formats_trans_utils.h" #include "register/custom_pass_helper.h" #include "graph/ops_stub.h" +#include "ge_attr_value.h" using namespace std; using namespace testing; @@ -221,6 +223,21 @@ TEST_F(UtestGraphManagerTest, test_add_graph_4) { EXPECT_NE(status, ge::SUCCESS); } +TEST_F(UtestGraphManagerTest, test_add_graph_5) { + Graph graph("test_graph"); + auto data = op::Data("Data").set_attr_index(1); + auto flatten = op::Flatten("Flatten").set_input_x(data, data.name_out_out()); + std::vector inputs{data}; + std::vector outputs{flatten}; + graph.SetInputs(inputs).SetOutputs(outputs); + + std::map options = {{"ge.exec.dataInputsShapeRange", "0:[-1]"}}; + OmgContext context; + GraphId graph_id = 1; + GraphManager graph_manager; + EXPECT_EQ(graph_manager.AddGraph(graph_id, graph, options, context), GRAPH_PARAM_INVALID); +} + TEST_F(UtestGraphManagerTest, test_add_graph_with_copy_1) { GraphId graph_id = 1; GraphManager graph_manager; @@ -443,6 +460,82 @@ TEST_F(UtestGraphManagerTest, ParseInputsDimsForData_success) { graph_manager.ParseInputsDimsForData(input_tensors); } +TEST_F(UtestGraphManagerTest, test_prerunthread_failed_1) { + GraphId graph_id = 1; + GraphManager graph_manager; + graph_manager.thread_run_flag_ = true; + ComputeGraphPtr compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + GraphManager::PreRunArgs args; + error_message::Context error_ctx{1, "1st_stage", "2nd_stage", "log_header"}; + Status st = 0; + args.callback = [&st](Status st_return, std::vector &) { st = st_return; }; + args.graph_id = graph_id; + args.session_id = 1; + args.error_context = error_ctx; + args.context = GetThreadLocalContext(); + // create graph + Graph graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); + std::shared_ptr graph_ptr = MakeShared(graph); + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGraph(graph_ptr); + + graph_manager.options_.local_fmk_op_flag = false; + // need build while buildflag is true, var format changed + graph_node->SetBuildFlag(true); + graph_manager.var_acc_ctrl_.graph_ids_need_rebuild_.insert(graph_id); + + graph_manager.graph_map_.insert({graph_id, graph_node}); + graph_manager.graph_count_.insert({graph_id, 1}); + graph_node->SetRunFlag(false); + // function return. + graph_manager.prerun_args_q_.Push(args); + auto t1 = std::thread(GraphManager::PreRunThread, &graph_manager); + if (t1.joinable()) { + t1.join(); + } + EXPECT_EQ(st, ge::PARAM_INVALID); +} + +TEST_F(UtestGraphManagerTest, test_prerunthread_failed_2) { + GraphId graph_id = 1; + GraphManager graph_manager; + graph_manager.thread_run_flag_ = true; + ComputeGraphPtr compute_graph = MakeShared("test_graph"); + GeRootModelPtr ge_root_model = MakeShared(compute_graph); + GraphManager::PreRunArgs args; + error_message::Context error_ctx{1, "1st_stage", "2nd_stage", "log_header"}; + Status st; + args.callback = [&st, &graph_manager](Status st_return, std::vector &) { st = st_return; + graph_manager.thread_run_flag_ = false;}; + args.graph_id = graph_id; + args.session_id = 1; + args.error_context = error_ctx; + args.context = GetThreadLocalContext(); + // create graph + Graph graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); + std::shared_ptr graph_ptr = MakeShared(graph); + GraphNodePtr graph_node = MakeShared(graph_id); + graph_node->SetGraph(graph_ptr); + + graph_manager.options_.local_fmk_op_flag = false; + // need build while buildflag is true, var format changed + graph_node->SetBuildFlag(true); + graph_manager.var_acc_ctrl_.graph_ids_need_rebuild_.insert(graph_id); + + graph_manager.graph_map_.insert({graph_id, graph_node}); + graph_manager.graph_count_.insert({graph_id, 1}); + graph_node->SetRunFlag(false); + // function continue + int ret = setenv("ENABLE_NETWORK_ANALYSIS_DEBUG", "1", 1); + EXPECT_EQ(ret, 0); + graph_manager.prerun_args_q_.Push(args); + auto t1 = std::thread(GraphManager::PreRunThread, &graph_manager); + if (t1.joinable()) { + t1.join(); + } + EXPECT_EQ(st, ge::PARAM_INVALID); +} // TEST_F(UtestGraphManagerTest, ParseInputsDimsForGetNexNosinkAndData_success) { // GraphManager graph_manager; diff --git a/tests/ut/ge/graph/manager/host_mem_allocator_unittest.cc b/tests/ut/ge/graph/manager/host_mem_allocator_unittest.cc new file mode 100644 index 00000000..3d8e4890 --- /dev/null +++ b/tests/ut/ge/graph/manager/host_mem_allocator_unittest.cc @@ -0,0 +1,40 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#define protected public +#define private public +#include "graph/manager/host_mem_allocator.h" +#undef protected +#undef private + +namespace ge { +class UtestHostMemManagerTest : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UtestHostMemManagerTest, malloc_zero_size) { + HostMemAllocator allocator(RT_MEMORY_HBM); + EXPECT_EQ(allocator.allocated_blocks_.size(), 0); + EXPECT_EQ(allocator.Malloc(nullptr, 0), nullptr); + EXPECT_EQ(allocator.allocated_blocks_.size(), 1); + EXPECT_EQ(allocator.Malloc(nullptr, 1), nullptr); + EXPECT_EQ(allocator.allocated_blocks_.size(), 1); +} +} // namespace ge diff --git a/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc new file mode 100644 index 00000000..87af585a --- /dev/null +++ b/tests/ut/ge/graph/manager/session_scope_mem_allocator_unittest.cc @@ -0,0 +1,94 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/anchor.h" +#include "graph/attr_value.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "omg/omg_inner_types.h" + +#define protected public +#define private public +#include "graph/manager/graph_mem_manager.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +using namespace ge; +using domi::GetContext; + +class UtestSessionScopeMemAllocator : public testing::Test { + protected: + void SetUp() {} + + void TearDown() { GetContext().out_nodes_map.clear(); } +}; + +TEST_F(UtestSessionScopeMemAllocator, initialize_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestSessionScopeMemAllocator, malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(1000, 0); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestSessionScopeMemAllocator, free_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); + EXPECT_NE(nullptr, ptr); + ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); + EXPECT_NE(nullptr, ptr); + + EXPECT_EQ(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0)); + EXPECT_NE(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0)); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestSessionScopeMemAllocator, free_success_session) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + mem_type.push_back(RT_MEMORY_P2P_DDR); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); + EXPECT_NE(nullptr, ptr); + ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); + EXPECT_NE(nullptr, ptr); + for (auto memory_type : MemManager::Instance().GetAllMemoryType()) { + if (RT_MEMORY_P2P_DDR == memory_type) { + EXPECT_NE(MemManager::Instance().SessionScopeMemInstance(memory_type).Free(0), SUCCESS); + } else { + EXPECT_EQ(MemManager::Instance().SessionScopeMemInstance(memory_type).Free(0), SUCCESS); + } + } + MemManager::Instance().Finalize(); +} diff --git a/tests/ut/ge/graph/ops_stub.h b/tests/ut/ge/graph/ops_stub.h index 2a71d80a..c122befa 100644 --- a/tests/ut/ge/graph/ops_stub.h +++ b/tests/ut/ge/graph/ops_stub.h @@ -144,6 +144,17 @@ REG_OP(Data) DT_UINT64, DT_BOOL, DT_DOUBLE})) .OP_END_FACTORY_REG(GuaranteeConst) + REG_OP(MatMulV2) + .INPUT(x1, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .INPUT(x2, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32, DT_INT8})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32})) + .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8})) + .ATTR(transpose_x1, Bool, false) + .ATTR(transpose_x2, Bool, false) + .ATTR(offset_x, Int, 0) + .OP_END_FACTORY_REG(MatMulV2) + IMPLEMT_INFERFUNC(GuaranteeConst, GuaranteeConstInfer) { TensorDesc tensorDesc = op.GetInputDesc("x"); (void)op.UpdateOutputDesc("y", tensorDesc); diff --git a/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc b/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc index b60e0ddd..c8abadb5 100644 --- a/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc +++ b/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc @@ -15,20 +15,17 @@ */ #include + +#define private public +#define protected public #include "graph/partition/dynamic_shape_partition.h" #include "compute_graph.h" #include "inc/framework/common/types.h" #include "utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" - -#define private public -#define protected public - namespace ge { - namespace { - GeTensorDescPtr CreateTensorDesc(std::initializer_list shape, Format format = FORMAT_NCHW, DataType data_type = DT_FLOAT) { GeShape ge_shape{vector(shape)}; @@ -94,4 +91,29 @@ TEST_F(UtestDynamicShapePartition, single_op_scene_success) { DynamicShapePartitioner partitioner(graph); EXPECT_EQ(partitioner.Partition(), SUCCESS); } + +TEST_F(UtestDynamicShapePartition, merge_control_flow_group) { + ComputeGraphPtr graph = std::make_shared("default"); + AttrUtils::SetStr(*graph, ATTR_NAME_SESSION_GRAPH_ID, "session_graph_id"); + + NodePtr data1 = NodeBuilder("data1", DATA).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + NodePtr data2 = NodeBuilder("data2", DATA).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + NodePtr merge = NodeBuilder("node2", MERGE).AddInputDesc({1}).AddInputDesc({1}) + .AddOutputDesc({1}).AddOutputDesc({}).Build(graph); + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), merge->GetInDataAnchor(0)); + GraphUtils::AddEdge(data2->GetOutDataAnchor(0), merge->GetInDataAnchor(1)); + + (void)AttrUtils::SetBool(data1->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); + (void)AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, 3); + (void)AttrUtils::SetBool(data2->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); + (void)AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, 3); + (void)AttrUtils::SetBool(merge->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); + (void)AttrUtils::SetInt(merge->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, 3); + + EXPECT_EQ(graph->sub_graph_.size(), 0); + DynamicShapePartitioner partitioner(graph); + EXPECT_EQ(partitioner.Partition(), SUCCESS); + EXPECT_EQ(graph->sub_graph_.size(), 1); +} } // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/graph/passes/memcpy_addr_async_unittest.cc b/tests/ut/ge/graph/passes/memcpy_addr_async_unittest.cc new file mode 100644 index 00000000..e5bc450e --- /dev/null +++ b/tests/ut/ge/graph/passes/memcpy_addr_async_unittest.cc @@ -0,0 +1,47 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#define private public +#include "graph/passes/memcpy_addr_async_pass.h" +#include "common/ge_inner_error_codes.h" +#include "inc/pass_manager.h" +#undef private + +namespace ge { +class UtestMemcpyAddrAsyncPass : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UtestMemcpyAddrAsyncPass, run) { + ge::ComputeGraphPtr graph = std::make_shared("default"); + ge::OpDescPtr op = std::make_shared(); + op->SetType(STREAMSWITCH); + op->SetName("stream_switch"); + op->AddOutputDesc(ge::GeTensorDesc()); + ge::NodePtr node = graph->AddNode(op); + graph->SetGraphUnknownFlag(true); + MemcpyAddrAsyncPass pass; + Status ret = pass.Run(graph); + EXPECT_EQ(ret, SUCCESS); +} +} // namespace ge diff --git a/tests/ut/ge/graph/passes/subgraph_pass_unittest.cc b/tests/ut/ge/graph/passes/subgraph_pass_unittest.cc new file mode 100644 index 00000000..f11882e1 --- /dev/null +++ b/tests/ut/ge/graph/passes/subgraph_pass_unittest.cc @@ -0,0 +1,129 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "graph/passes/subgraph_pass.h" +#include "inc/pass_manager.h" + +namespace ge { +namespace { +class UtestGraphPassesSubgraphPass : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +OpDescPtr CreateOpDesc(const std::string name, const std::string type, uint32_t input_num, uint32_t output_num) { + OpDescPtr op_desc = std::shared_ptr(new (std::nothrow) OpDesc(name, type)); + if (op_desc == nullptr) { + return nullptr; + } + for (uint32_t i = 0; i < input_num; i++) { + op_desc->AddInputDesc(GeTensorDesc()); + } + for (uint32_t i = 0; i < output_num; i++) { + op_desc->AddOutputDesc(GeTensorDesc()); + } + return op_desc; +} + +bool CheckMemcpyExist(const ComputeGraphPtr &graph) { + for (const auto &node : graph->GetDirectNode()) { + if (node->GetType() == IDENTITY) { + return true; + } + } + return false; +} + +uint32_t CheckMemcpyNum(const ComputeGraphPtr &graph) { + uint32_t num = 0; + for (const auto &node : graph->GetDirectNode()) { + if (node->GetType() == IDENTITY) { + num++; + } + } + return num; +} +} // namespace + +/// +/// ****** root_graph ****** * ****** subgraph branch1 ***** * ****** subgraph branch2 ***** +/// * * +/// Case * Const * Data +/// / \ * | * | +/// data_0 data_1 * NetOutput * NetOutput +/// * * +/// ****** root_graph ****** * ****** subgraph branch1 ***** * ****** subgraph branch2 ***** +/// +TEST(UtestGraphPassesSubgraphPass, add_memcpy_success) { + ComputeGraphPtr graph = std::make_shared("add_memcpy_success"); + NodePtr func_node = graph->AddNode(CreateOpDesc("Case", CASE, 2, 1)); + NodePtr data_node_0 = graph->AddNode(CreateOpDesc("data_0", DATA, 1, 1)); + NodePtr data_node_1 = graph->AddNode(CreateOpDesc("data_1", DATA, 1, 1)); + EXPECT_EQ(GraphUtils::AddEdge(data_node_0->GetOutDataAnchor(0), func_node->GetInDataAnchor(0)), GRAPH_SUCCESS); + EXPECT_EQ(GraphUtils::AddEdge(data_node_1->GetOutDataAnchor(0), func_node->GetInDataAnchor(1)), GRAPH_SUCCESS); + + std::string subgraph_name_1 = "instance_branch_1"; + ComputeGraphPtr subgraph_1 = std::make_shared(subgraph_name_1); + subgraph_1->SetParentNode(func_node); + subgraph_1->SetParentGraph(graph); + size_t index = func_node->GetOpDesc()->GetSubgraphInstanceNames().size(); + EXPECT_EQ(index, 0); + func_node->GetOpDesc()->AddSubgraphName("branch1"); + EXPECT_EQ(func_node->GetOpDesc()->GetSubgraphInstanceNames().size(), 1); + func_node->GetOpDesc()->SetSubgraphInstanceName(index, subgraph_name_1); + EXPECT_EQ(func_node->GetOpDesc()->GetSubgraphInstanceNames().size(), 1); + + std::string subgraph_name_2 = "instance_branch_2"; + ComputeGraphPtr subgraph_2 = std::make_shared(subgraph_name_2); + subgraph_2->SetParentNode(func_node); + subgraph_2->SetParentGraph(graph); + index = func_node->GetOpDesc()->GetSubgraphInstanceNames().size(); + EXPECT_EQ(index, 1); + func_node->GetOpDesc()->AddSubgraphName("branch2"); + EXPECT_EQ(func_node->GetOpDesc()->GetSubgraphInstanceNames().size(), 2); + func_node->GetOpDesc()->SetSubgraphInstanceName(index, subgraph_name_2); + EXPECT_EQ(func_node->GetOpDesc()->GetSubgraphInstanceNames().size(), 2); + + { + // Const->NetOutput in subgraph + NodePtr const_node = subgraph_1->AddNode(CreateOpDesc("const", CONSTANTOP, 0, 1)); + NodePtr output_node = subgraph_1->AddNode(CreateOpDesc(NODE_NAME_NET_OUTPUT, NETOUTPUT, 1, 1)); + EXPECT_EQ(GraphUtils::AddEdge(const_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)), SUCCESS); + } + + { + // Data->NetOutput in subgraph but not while body + NodePtr data_node = subgraph_2->AddNode(CreateOpDesc("sata", DATA, 1, 1)); + NodePtr output_node = subgraph_2->AddNode(CreateOpDesc(NODE_NAME_NET_OUTPUT, NETOUTPUT, 1, 1)); + EXPECT_EQ(GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)), SUCCESS); + EXPECT_TRUE(AttrUtils::SetInt(data_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1)); + } + + PassManager pass_manager; + pass_manager.AddPass("SubgraphPass", new (std::nothrow) SubgraphPass); + EXPECT_EQ(pass_manager.Run(graph), SUCCESS); + EXPECT_FALSE(CheckMemcpyExist(graph)); + EXPECT_EQ(pass_manager.Run(subgraph_1), SUCCESS); + EXPECT_EQ(CheckMemcpyNum(subgraph_1), 1); + EXPECT_EQ(pass_manager.Run(subgraph_2), SUCCESS); + EXPECT_EQ(CheckMemcpyNum(subgraph_2), 1); +} +} // namespace ge diff --git a/tests/ut/ge/graph/passes/unused_and_isolated_op_remove_pass_unittest.cc b/tests/ut/ge/graph/passes/unused_and_isolated_op_remove_pass_unittest.cc deleted file mode 100644 index 21b5d7e3..00000000 --- a/tests/ut/ge/graph/passes/unused_and_isolated_op_remove_pass_unittest.cc +++ /dev/null @@ -1,191 +0,0 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/unused_op_remove_pass.h" - -#include -#include "graph/passes/isolated_op_remove_pass.h" -#include "pass_manager.h" - -using namespace ge; - -class UtestGraphPassesUnusedAndIsolatedOpRemovePass : public testing::Test { - protected: - void SetUp() {} - - void TearDown() {} - - NodePtr AddNode(ComputeGraphPtr graph, const string &name, const string &type, int32_t in_anchors_num = 1, - int32_t out_anchors_num = 1) { - GeTensorDesc tensor_desc; - OpDescPtr op_desc = make_shared(name, type); - for (int32_t i = 0; i < in_anchors_num; i++) { - op_desc->AddInputDesc(tensor_desc); - } - for (int32_t i = 0; i < out_anchors_num; i++) { - op_desc->AddOutputDesc(tensor_desc); - } - - NodePtr node = graph->AddNode(op_desc); - return node; - } -}; - -TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_reshape) { - ComputeGraphPtr graph = std::make_shared("test"); - - NodePtr data_node = AddNode(graph, "DATA", DATA); - NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE); - NodePtr reshape_node = AddNode(graph, "reshape1", RESHAPE); - - GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), reshape_node->GetInDataAnchor(0)); - - ge::UnusedOpRemovePass unused_pass(TENSORFLOW); - ge::IsolatedOpRemovePass isolate_pass; - std::vector> passes; - passes.emplace_back("", &isolate_pass); - passes.emplace_back("", &unused_pass); - Status status = PassManager::Run(graph, passes); - EXPECT_EQ(SUCCESS, status); - NodePtr found_node = graph->FindNode("transpose1"); - EXPECT_EQ(transpose_node, found_node); -} - -TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_squeeze) { - ComputeGraphPtr graph = std::make_shared("test"); - - NodePtr data_node = AddNode(graph, "DATA", DATA); - NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE); - NodePtr squeeze_node = AddNode(graph, "squeeze1", SQUEEZE); - - GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), squeeze_node->GetInDataAnchor(0)); - - ge::UnusedOpRemovePass unused_pass(TENSORFLOW); - ge::IsolatedOpRemovePass isolate_pass; - std::vector> passes; - passes.emplace_back("", &isolate_pass); - passes.emplace_back("", &unused_pass); - Status status = PassManager::Run(graph, passes); - EXPECT_EQ(SUCCESS, status); - NodePtr found_node = graph->FindNode("transpose1"); - EXPECT_EQ(transpose_node, found_node); -} - -TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_conv) { - ComputeGraphPtr graph = std::make_shared("test"); - - NodePtr data_node = AddNode(graph, "DATA", DATA); - - NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE); - vector order_list = {0, 2, 3, 1}; - AttrUtils::SetListInt(transpose_node->GetOpDesc(), PERMUTE_ATTR_ORDER, order_list); - AttrUtils::SetInt(transpose_node->GetOpDesc(), ATTR_NAME_FORMAT, (int64_t)DT_FLOAT); - - NodePtr conv_node = AddNode(graph, "conv1", CONVOLUTION); - - GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), conv_node->GetInDataAnchor(0)); - - NodePtr conv2_node = AddNode(graph, "conv2", CONVOLUTION); - GraphUtils::AddEdge(conv_node->GetOutDataAnchor(0), conv2_node->GetInDataAnchor(0)); - - ge::UnusedOpRemovePass unused_pass(TENSORFLOW); - ge::IsolatedOpRemovePass isolate_pass; - std::vector> passes; - passes.emplace_back("", &isolate_pass); - passes.emplace_back("", &unused_pass); - Status status = PassManager::Run(graph, passes); - EXPECT_EQ(SUCCESS, status); - NodePtr found_node0 = graph->FindNode("transpose1"); - NodePtr found_node = graph->FindNode("conv1"); - EXPECT_EQ(conv_node, found_node); -} - -TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, transpose_and_conv3) { - ComputeGraphPtr graph = std::make_shared("test"); - - NodePtr data_node = AddNode(graph, "DATA", DATA); - - NodePtr transpose_node = AddNode(graph, "transpose1", PERMUTE); - vector order_list = {0, 1, 3, 2}; - AttrUtils::SetListInt(transpose_node->GetOpDesc(), PERMUTE_ATTR_ORDER, order_list); - AttrUtils::SetInt(transpose_node->GetOpDesc(), ATTR_NAME_FORMAT, (int64_t)DT_FLOAT); - - NodePtr conv_node = AddNode(graph, "conv1", CONVOLUTION); - - GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), conv_node->GetInDataAnchor(0)); - - NodePtr conv2_node = AddNode(graph, "conv2", CONVOLUTION); - GraphUtils::AddEdge(conv_node->GetOutDataAnchor(0), conv2_node->GetInDataAnchor(0)); - - ge::UnusedOpRemovePass unused_pass(TENSORFLOW); - ge::IsolatedOpRemovePass isolate_pass; - std::vector> passes; - passes.emplace_back("", &isolate_pass); - passes.emplace_back("", &unused_pass); - Status status = PassManager::Run(graph, passes); - EXPECT_EQ(SUCCESS, status); - NodePtr found_node0 = graph->FindNode("transpose1"); - EXPECT_EQ(transpose_node, found_node0); - NodePtr found_node = graph->FindNode("conv1"); - EXPECT_EQ(conv_node, found_node); -} - -TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, cast_and_cast) { - ComputeGraphPtr graph = std::make_shared("test"); - - NodePtr data_node = AddNode(graph, "DATA", DATA); - NodePtr conv3_node = AddNode(graph, "cast3", CAST); - NodePtr transpose_node = AddNode(graph, "cast1", CAST); - NodePtr transpose_node_1 = AddNode(graph, "cast2", CAST); - - GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), conv3_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(conv3_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), transpose_node_1->GetInDataAnchor(0)); - - ge::UnusedOpRemovePass unused_pass(TENSORFLOW); - ge::IsolatedOpRemovePass isolate_pass; - std::vector> passes; - passes.emplace_back("", &isolate_pass); - passes.emplace_back("", &unused_pass); - Status status = PassManager::Run(graph, passes); - EXPECT_EQ(SUCCESS, status); -} - -TEST_F(UtestGraphPassesUnusedAndIsolatedOpRemovePass, remove_parent_node) { - ComputeGraphPtr graph = std::make_shared("test"); - vector node_vec; - - NodePtr data_node = AddNode(graph, "DATA", DATA); - NodePtr conv3_node = AddNode(graph, "cast3", CAST); - NodePtr transpose_node = AddNode(graph, "cast1", CAST); - NodePtr transpose_node_1 = AddNode(graph, "cast2", CAST); - - GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), conv3_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(conv3_node->GetOutDataAnchor(0), transpose_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(transpose_node->GetOutDataAnchor(0), transpose_node_1->GetInDataAnchor(0)); - - ge::UnusedOpRemovePass unused_pass(TENSORFLOW); - ge::IsolatedOpRemovePass isolate_pass; - std::vector> passes; - passes.emplace_back("", &isolate_pass); - passes.emplace_back("", &unused_pass); - Status status = PassManager::Run(graph, passes); - EXPECT_EQ(SUCCESS, status); -} diff --git a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc index d6af6de9..f1ea7a27 100644 --- a/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/variable_op_pass_unittest.cc @@ -35,7 +35,7 @@ #include "graph/manager/graph_context.h" #include "graph/optimize/graph_optimize.h" #include "graph/manager/util/variable_accelerate_ctrl.h" -#include "graph/manager/graph_mem_allocator.h" +#include "graph/manager/graph_mem_manager.h" #include "graph/manager/graph_var_manager.h" #include "graph_builder_utils.h" #include "cce/dnn.h" diff --git a/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc index 6c5babfc..8ece7564 100644 --- a/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc +++ b/tests/ut/ge/graph/preprocess/graph_preprocess_unittest.cc @@ -223,4 +223,17 @@ TEST_F(UtestGraphPreproces, test_update_dtype_mbatch_case) { auto data1_output = data1_desc->MutableOutputDesc(0); EXPECT_EQ(data1_output->GetDataType(), 1); } + +TEST_F(UtestGraphPreproces, test_prepare_dyn_shape) { + ComputeGraphPtr compute_graph = BuildGraph5(); + GraphPtr graph_ptr = std::make_shared(GraphUtils::CreateGraphFromComputeGraph(compute_graph)); + + GraphNodePtr graph_node = make_shared(0); + graph_node->SetComputeGraph(compute_graph); + graph_node->SetGraph(graph_ptr); + + std::vector user_input; + GraphPrepare graph_prepare; + EXPECT_EQ(graph_prepare.PrepareDynShape(graph_node, user_input, compute_graph, 0), SUCCESS); +} } \ No newline at end of file diff --git a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc index ec7b9488..fb4a5a8d 100644 --- a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc +++ b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc @@ -18,6 +18,9 @@ #include "ir_build/option_utils.h" #include "graph/testcase/ge_graph/graph_builder_utils.h" #include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "ge/ge_ir_build.h" +#include "graph/ops_stub.h" #define protected public #define private public @@ -37,6 +40,13 @@ class UtestIrCommon : public testing::Test { void TearDown() {} }; +class UtestIrBuild : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + static ge::OpDescPtr CreateOpDesc(const std::string &name, const std::string &type) { OpDescPtr op_desc = std::make_shared(name, type); ge::GeTensorDesc ge_tensor_desc; @@ -60,6 +70,59 @@ static ComputeGraphPtr BuildComputeGraph() { return builder.GetGraph(); } +// data not set attr index; +// but becasue of op proto, register attr index. so all data index is zero; +static Graph BuildIrGraph() { + auto data1 = op::Data("data1"); + auto data2 = op::Data("data2"); + auto data3 = op::Data("data3"); + std::vector inputs {data1, data2, data3}; + std::vector outputs; + + Graph graph("test_graph"); + graph.SetInputs(inputs).SetOutputs(outputs); + return graph; +} + +// data set attr index, but is not valid +static Graph BuildIrGraph1() { + auto data1 = op::Data("data1").set_attr_index(0); + auto data2 = op::Data("data2").set_attr_index(1); + auto data3 = op::Data("data3"); + std::vector inputs {data1, data2, data3}; + std::vector outputs; + + Graph graph("test_graph"); + graph.SetInputs(inputs).SetOutputs(outputs); + return graph; +} + +// data set attr index, but is not valid +static Graph BuildIrGraph2() { + auto data1 = op::Data("data1").set_attr_index(0); + auto data2 = op::Data("data2"); + auto data3 = op::Data("data3").set_attr_index(2); + std::vector inputs {data1, data2, data3}; + std::vector outputs; + + Graph graph("test_graph"); + graph.SetInputs(inputs).SetOutputs(outputs); + return graph; +} + +// data set attr index +static Graph BuildIrGraph3() { + auto data1 = op::Data("data1").set_attr_index(0); + auto data2 = op::Data("data2").set_attr_index(1); + auto data3 = op::Data("data3").set_attr_index(2); + std::vector inputs {data1, data2, data3}; + std::vector outputs; + + Graph graph("test_graph"); + graph.SetInputs(inputs).SetOutputs(outputs); + return graph; +} + TEST(UtestIrCommon, update_data_op_shape) { ge::OpDescPtr op_desc = CreateOpDesc("Data", "Data"); map> shape_map; @@ -227,3 +290,63 @@ TEST(UtestIrCommon, check_param_failed) { ret = CheckLogParamValidAndSetLogLevel(param_invalid); } + +// Get attr index failed, when set input shape range +TEST(UtestIrBuild, check_data_op_attr_index_invalid_0) { + ComputeGraphPtr compute_graph = BuildComputeGraph(); + Graph graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); + const map build_options = { + {"input_shape_range", "[1, 2~3, -1],[4~5, 3~5, 10],[1, 2~3, -1]"} + }; + ModelBufferData model; + graphStatus ret = aclgrphBuildModel(graph, build_options, model); + EXPECT_EQ(ret, GRAPH_FAILED); +} + +// not set attr index, when set input shape range +TEST(UtestIrBuild, check_data_op_attr_index_invalid_1) { + Graph graph = BuildIrGraph(); + const map build_options = { + {"input_shape_range", "[1, 2~3, -1],[4~5, 3~5, 10],[1, 2~3, -1]"} + }; + ModelBufferData model; + graphStatus ret = aclgrphBuildModel(graph, build_options, model); + EXPECT_EQ(ret, GRAPH_FAILED); +} + +// set attr index, but not valid, when set input shape range +TEST(UtestIrBuild, check_data_op_attr_index_invalid_2) { + Graph graph = BuildIrGraph1(); + const map build_options = { + {"input_shape_range", "[1, 2~3, -1],[4~5, 3~5, 10],[1, 2~3, -1]"} + }; + ModelBufferData model; + graphStatus ret = aclgrphBuildModel(graph, build_options, model); + EXPECT_EQ(ret, GRAPH_FAILED); + + Graph graph2 = BuildIrGraph2(); + ret = aclgrphBuildModel(graph2, build_options, model); + EXPECT_EQ(ret, GRAPH_FAILED); +} + +// set attr index valid, when set input shape range +// only check data op attr index valid func. +TEST(UtestIrBuild, check_data_op_attr_index_valid) { + Graph graph = BuildIrGraph3(); + const map build_options = { + {"input_shape_range", "[1, 2~3, -1],[4~5, 3~5, 10],[1, 2~3, -1]"} + }; + ModelBufferData model; + graphStatus ret = aclgrphBuildModel(graph, build_options, model); + EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); +} + +// set attr index invalid, when not set input shape range +// only check data op attr index valid func. +TEST(UtestIrBuild, check_data_attr_index_succ_no_input_range) { + Graph graph = BuildIrGraph1(); + const map build_options; + ModelBufferData model; + graphStatus ret = aclgrphBuildModel(graph, build_options, model); + EXPECT_EQ(ret, GE_GENERATOR_GRAPH_MANAGER_BUILD_GRAPH_FAILED); +} \ No newline at end of file diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc new file mode 100644 index 00000000..b4091a50 --- /dev/null +++ b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc @@ -0,0 +1,89 @@ +/** + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#define private public +#define protected public +#include "hybrid/executor/hybrid_model_async_executor.h" +#include "hybrid/executor/hybrid_model_executor.h" +#include "graph/utils/tensor_utils.h" +#include "graph/utils/graph_utils.h" +#include "graph/debug/ge_attr_define.h" + + +using namespace std; +using namespace testing; + + +namespace ge { +using namespace hybrid; + +class UtestHybridModelAsyncExecutor : public testing::Test { + protected: + void SetUp() {} + + void TearDown() { } +}; + +TEST_F(UtestHybridModelAsyncExecutor, CopyOutputs_success) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = make_shared(graph); + ge_root_model->SetModelName("test_name"); + GeModelPtr ge_sub_model = make_shared(); + HybridModel hybrid_model(ge_root_model); + HybridModelAsyncExecutor executor(&hybrid_model); + + TensorValue input_tensor; + HybridModelExecutor::ExecuteArgs args; + args.inputs.emplace_back(input_tensor); + auto desc = MakeShared(); + GeShape geshape({2,2,2,2}); + desc->SetShape(geshape); + + auto allocator = NpuMemoryAllocator::GetAllocator(); + auto tensor_buffer = TensorBuffer::Create(allocator, 100); + auto output_tensor = TensorValue(shared_ptr(tensor_buffer.release())); + args.outputs.emplace_back(output_tensor); + args.output_desc.emplace_back(desc); + + OutputData output_data; + std::vector outputs; + auto ret = executor.CopyOutputs(args, &output_data, outputs); + ASSERT_EQ(ret,SUCCESS); +} + +TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = make_shared(graph); + ge_root_model->SetModelName("test_name"); + GeModelPtr ge_sub_model = make_shared(); + HybridModel hybrid_model(ge_root_model); + HybridModelAsyncExecutor executor(&hybrid_model); + + auto allocator = NpuMemoryAllocator::GetAllocator(); + auto tensor_buffer = TensorBuffer::Create(allocator, 100); + auto tensor = TensorValue(shared_ptr(tensor_buffer.release())); + GeTensorDesc ge_tensor_desc; + int64_t output_size = 100; + std::vector outputs; + executor.BuildDeviceTensor(tensor, ge_tensor_desc, output_size, outputs); + auto size = tensor.GetSize(); + ASSERT_EQ(size, 100); +} +} // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_pipeline_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_pipeline_executor_unittest.cc new file mode 100644 index 00000000..7dc5a191 --- /dev/null +++ b/tests/ut/ge/hybrid/executor/hybrid_model_pipeline_executor_unittest.cc @@ -0,0 +1,68 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#define private public +#define protected public +#include "hybrid/executor/hybrid_model_pipeline_executor.h" +#include "graph/ge_context.h" + +namespace ge { +using namespace hybrid; + +class UtestStageExecutor : public testing::Test { + protected: + void SetUp() {} + void TearDown() { } +}; + +TEST_F(UtestStageExecutor, run_success) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = std::make_shared(graph); + HybridModel hybrid_model(ge_root_model); + hybrid_model.root_graph_item_ = std::unique_ptr(new(std::nothrow)GraphItem()); + + PipeExecutionConfig config; + config.device_id = 0; + config.num_executors = 2; + config.num_stages = 1; + config.iteration_end = 2; + rtCtxGetCurrent(&config.rt_context); + StageExecutor executor(0, &hybrid_model, &config); + StageExecutor next_executor(1, &hybrid_model, &config); + executor.SetNext(&next_executor); + EXPECT_EQ(executor.Init(), SUCCESS); + + auto allocator = NpuMemoryAllocator::GetAllocator(config.device_id); + EXPECT_NE(allocator, nullptr); + StageExecutor::StageTask task_info_1; + task_info_1.stage = 0; + task_info_1.iteration = 0; + EXPECT_EQ(rtEventCreate(&task_info_1.event), RT_ERROR_NONE); + EXPECT_EQ(executor.ExecuteAsync(task_info_1), SUCCESS); + EXPECT_EQ(executor.Start({}, {}, 2), SUCCESS); + + StageExecutor::StageTask task_info_2; + task_info_2.stage = 0; + task_info_2.iteration = 1; + EXPECT_EQ(rtEventCreate(&task_info_2.event), RT_ERROR_NONE); + EXPECT_EQ(executor.ExecuteAsync(task_info_2), SUCCESS); + EXPECT_EQ(executor.Start({}, {}, 2), SUCCESS); + executor.Reset(); +} +} // namespace ge diff --git a/tests/ut/ge/hybrid/executor/node_state_unittest.cc b/tests/ut/ge/hybrid/executor/node_state_unittest.cc new file mode 100644 index 00000000..f33fc601 --- /dev/null +++ b/tests/ut/ge/hybrid/executor/node_state_unittest.cc @@ -0,0 +1,106 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#define private public +#define protected public +#include "hybrid/executor/node_state.h" +#include "hybrid/executor/subgraph_context.h" +#include "hybrid/model/graph_item.h" +#include "graph/utils/graph_utils.h" + +using namespace std; +using namespace testing; + +namespace ge { +using namespace hybrid; + +class UtestNodeState : public testing::Test { + protected: + void SetUp() { + } + void TearDown() { + } +}; + +static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) { + OpDescPtr op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + static int32_t index = 0; + op_desc->SetId(index++); + + GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); + TensorUtils::SetSize(tensor, 64); + vector input_offset; + for (int i = 0; i < in_num; i++) { + op_desc->AddInputDesc(tensor); + input_offset.emplace_back(index * 64 + i * 64); + } + op_desc->SetInputOffset(input_offset); + + vector output_offset; + for (int i = 0; i < out_num; i++) { + op_desc->AddOutputDesc(tensor); + output_offset.emplace_back(index * 64 + in_num * 64 + i * 64); + } + op_desc->SetOutputOffset(output_offset); + + op_desc->SetWorkspace({}); + op_desc->SetWorkspaceBytes({}); + op_desc->SetOpKernelLibName("DNN_VM_RTS_OP_STORE"); + + return graph.AddNode(op_desc); +} + +TEST_F(UtestNodeState, merge_await_shapes_ready) { + ComputeGraphPtr graph = std::make_shared("test"); + + const auto data0 = CreateNode(*graph, "data", DATA, 1, 1); + const auto data1 = CreateNode(*graph, "data1", DATA, 1, 1); + const auto merge1 = CreateNode(*graph, "merge", STREAMMERGE, 2, 2); + const auto output1 = CreateNode(*graph, "net_output", NETOUTPUT, 1, 1); + + GraphUtils::AddEdge(data0->GetOutDataAnchor(0), merge1->GetInDataAnchor(0)); + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), merge1->GetInDataAnchor(1)); + GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), output1->GetInDataAnchor(0)); + + GraphItem graph_item; + GraphExecutionContext graph_context; + SubgraphContext subgraph_context(&graph_item, &graph_context); + + std::unique_ptr node_item; + NodeItem::Create(merge1, node_item); + NodeState node_state(*node_item, &subgraph_context); + + // Not dynamic. + ASSERT_EQ(node_state.shape_inference_state_.AwaitShapesReady(graph_context), SUCCESS); + + // Not set merge index. + node_item->is_dynamic = true; + ASSERT_EQ(node_state.shape_inference_state_.AwaitShapesReady(graph_context), FAILED); + + // merge index out of bound. + AttrUtils::SetInt(merge1->GetOpDesc(), ATTR_NAME_MERGE_INPUT_INDEX, 3); + ASSERT_EQ(node_state.shape_inference_state_.AwaitShapesReady(graph_context), FAILED); + + AttrUtils::SetInt(merge1->GetOpDesc(), ATTR_NAME_MERGE_INPUT_INDEX, 1); + ASSERT_EQ(node_state.shape_inference_state_.AwaitShapesReady(graph_context), SUCCESS); +} + +} // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/hybrid/executor/subgraph_executor_unittest.cc b/tests/ut/ge/hybrid/executor/subgraph_executor_unittest.cc index fbda3776..445382bc 100644 --- a/tests/ut/ge/hybrid/executor/subgraph_executor_unittest.cc +++ b/tests/ut/ge/hybrid/executor/subgraph_executor_unittest.cc @@ -264,4 +264,19 @@ TEST_F(UtestSubgraphExecutor, cond_graph_schedule_tasks) { ASSERT_EQ(state_it_f->second->GetSwitchIndex(), 0); ASSERT_EQ(graph_context.callback_manager->Destroy(), SUCCESS); } -} // namespace ge \ No newline at end of file + +TEST_F(UtestSubgraphExecutor, partial_execution_init) { + ComputeGraphPtr graph = std::make_shared("test"); + ASSERT_NE(graph, nullptr); + GeRootModelPtr ge_root_model = std::make_shared(graph); + ASSERT_NE(ge_root_model, nullptr); + HybridModel hybrid_model(ge_root_model); + hybrid_model.root_graph_item_ = std::unique_ptr(new(std::nothrow)GraphItem()); + hybrid_model.root_graph_item_->is_dynamic_ = false; + GraphExecutionContext graph_context; + SubgraphExecutor executor(hybrid_model.GetRootGraphItem(), &graph_context); + + ASSERT_EQ(executor.Init({}, {}), SUCCESS); + ASSERT_EQ(executor.InitForPartialExecution({}, {}), SUCCESS); +} +} // namespace ge diff --git a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc index 92315448..07022230 100644 --- a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc +++ b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc @@ -120,6 +120,11 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { NodeState node_state(*node_item, &subgraph_context); auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); + uint32_t task_id = 0; + uint32_t stream_id = 1; + std::string task_type = "rts"; + uint32_t block_dim = 0; + task_context->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); auto shared_task_context = std::shared_ptr(task_context.release()); node_state.SetTaskContext(shared_task_context); diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 7cd8a30a..7a2a5dfe 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -109,6 +109,39 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) { ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); } +TEST_F(UtestGeHybrid, aicore_op_task_init_success2) { + // build aicore task + auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); + aicore_task->is_single_op_ = true; + domi::TaskDef task_def; + task_def.set_type(RT_MODEL_TASK_KERNEL); + domi::KernelDef *kernel = task_def.mutable_kernel(); + kernel->set_block_dim(32); + kernel->set_args_size(64); + string args(64, '1'); + kernel->set_args(args.data(), 64); + domi::KernelContext *context = kernel->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + + OpDescPtr op_desc = CreateOpDesc("Add", "Add"); + std::vector kernelBin; + TBEKernelPtr tbe_kernel = std::make_shared("name/Add", std::move(kernelBin)); + op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel); + std::string kernel_name("kernel/Add"); + AttrUtils::SetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name); + ASSERT_EQ(aicore_task->InitWithTaskDef(*op_desc.get(), task_def), SUCCESS); + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); + char *handle = ""; + aicore_task->handle_ = handle; + aicore_task->tiling_key_ = 1; + ASSERT_EQ(aicore_task->LaunchKernel(stream), SUCCESS); +} + TEST_F(UtestGeHybrid, task_update_tiling_info) { auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); auto graph = make_shared("graph"); diff --git a/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc index 9630b193..9c9dab17 100644 --- a/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc +++ b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc @@ -27,6 +27,7 @@ #include "graph/utils/tensor_utils.h" #include "graph/utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" +#include "graph/ge_local_context.h" using namespace std; using namespace testing; @@ -70,6 +71,15 @@ static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string return graph.AddNode(op_desc); } +static NodePtr CreateConstantNode(const ComputeGraphPtr &graph, const string &name, size_t size) { + OpDescPtr op_desc = std::make_shared(name, CONSTANTOP); + op_desc->AddOutputDesc(GeTensorDesc()); + GeTensorPtr value = std::make_shared(GeTensorDesc(), size); + (void)AttrUtils::SetTensor(op_desc, ATTR_NAME_WEIGHTS, value); + + return graph->AddNode(op_desc); +} + TEST_F(UtestHybridModelBuilder, normal_hybrid_model_build) { /******************************************************************************* * Exit Identify @@ -230,4 +240,23 @@ TEST_F(UtestHybridModelBuilder, stream_switch_n_group) { AttrUtils::SetInt(switch_n->GetOpDesc(), ATTR_NAME_BATCH_NUM, batch_num); ASSERT_EQ(hybrid_model_builder.CreateStreamSwitchNGroup(switch_n, &node_item), SUCCESS); } -} // namespace ge \ No newline at end of file + +TEST_F(UtestHybridModelBuilder, init_constant_op_host_) { + ComputeGraphPtr graph = std::make_shared("test"); + GeRootModelPtr ge_root_model = make_shared(graph); + HybridModel hybrid_model(ge_root_model); + HybridModelBuilder hybrid_model_builder(hybrid_model); + + auto const_1 = CreateConstantNode(graph, "const_1", 0); + hybrid_model_builder.constant_op_nodes_.emplace(const_1->GetName(), const_1); + auto const_2 = CreateConstantNode(graph, "const_2", 10); + hybrid_model_builder.constant_op_nodes_.emplace(const_2->GetName(), const_2); + + std::map options; + options["ge.exec.placement"] = "HOST"; + GetThreadLocalContext().SetGraphOption(options); + + EXPECT_EQ(hybrid_model_builder.InitConstantOps(), SUCCESS); + EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 2); +} +} // namespace ge diff --git a/tests/ut/ge/hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc b/tests/ut/ge/hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc new file mode 100644 index 00000000..3371cd5c --- /dev/null +++ b/tests/ut/ge/hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc @@ -0,0 +1,48 @@ +/** + * Copyright 2021-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#define private public +#define protected public +#include "init/gelib.h" +#include "hybrid/node_executor/aicore/aicore_task_compiler.h" +#undef private +#undef protected + +using namespace std; +using namespace testing; + +namespace ge { +using namespace hybrid; + +class UtestAiCoreTaskCompiler : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UtestAiCoreTaskCompiler, test_aicore_task_compiler_init) { + ge::hybrid::AiCoreTaskCompiler aicore_task_compiler; + NodePtr node = MakeShared(); + std::vector tasks{}; + EXPECT_EQ(aicore_task_compiler.Initialize(), ge::PARAM_INVALID); // cause: ge lib is nullptr + EXPECT_EQ(aicore_task_compiler.CompileOp(node, tasks), ge::PARAM_INVALID); // cause: aicore task compiler init failed. +} +} // namespace ge + diff --git a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc index 2da80b32..9c615317 100644 --- a/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc +++ b/tests/ut/ge/profiling/ge_profiling_manager_unittest.cc @@ -24,6 +24,7 @@ #define protected public #define private public #include "common/profiling/profiling_manager.h" +#include "graph/ge_local_context.h" #undef protected #undef private @@ -64,11 +65,12 @@ TEST_F(UtestGeProfilinganager, ParseOptions) { options.profiling_mode = "1"; options.profiling_options = R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})"; - struct MsprofGeOptions prof_conf = {{ 0 }}; - Status ret = ProfilingManager::Instance().ParseOptions(options.profiling_options); EXPECT_EQ(ret, ge::SUCCESS); + EXPECT_EQ(ProfilingManager::Instance().is_training_trace_, true); + EXPECT_EQ(ProfilingManager::Instance().fp_point_, "Data_0"); + EXPECT_EQ(ProfilingManager::Instance().bp_point_, "addn"); } TEST_F(UtestGeProfilinganager, plungin_init_) { @@ -83,4 +85,34 @@ TEST_F(UtestGeProfilinganager, report_data_) { std::string data = "ge is better than tensorflow."; std::string tag_name = "fmk"; ProfilingManager::Instance().ReportData(0, data, tag_name); +} + +TEST_F(UtestGeProfilinganager, get_fp_bp_point_) { + map options_map = { + {OPTION_EXEC_PROFILING_OPTIONS, + R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","fp_point":"Data_0","bp_point":"addn","ai_core_metrics":"ResourceConflictRatio"})"}}; + GEThreadLocalContext &context = GetThreadLocalContext(); + context.SetGraphOption(options_map); + + std::string fp_point; + std::string bp_point; + ProfilingManager::Instance().GetFpBpPoint(fp_point, bp_point); + EXPECT_EQ(fp_point, "Data_0"); + EXPECT_EQ(bp_point, "addn"); +} + +TEST_F(UtestGeProfilinganager, get_fp_bp_point_empty) { + // fp bp empty + map options_map = { + { OPTION_EXEC_PROFILING_OPTIONS, + R"({"result_path":"/data/profiling","training_trace":"on","task_trace":"on","aicpu_trace":"on","ai_core_metrics":"ResourceConflictRatio"})"}}; + GEThreadLocalContext &context = GetThreadLocalContext(); + context.SetGraphOption(options_map); + std::string fp_point = "fp"; + std::string bp_point = "bp"; + ProfilingManager::Instance().bp_point_ = ""; + ProfilingManager::Instance().fp_point_ = ""; + ProfilingManager::Instance().GetFpBpPoint(fp_point, bp_point); + EXPECT_EQ(fp_point, ""); + EXPECT_EQ(bp_point, ""); } \ No newline at end of file diff --git a/tests/ut/ge/single_op/single_op_unittest.cc b/tests/ut/ge/single_op/single_op_unittest.cc index 3519811b..db3de7ec 100644 --- a/tests/ut/ge/single_op/single_op_unittest.cc +++ b/tests/ut/ge/single_op/single_op_unittest.cc @@ -159,6 +159,13 @@ TEST_F(UtestSingleOp, test_singleop_execute_async2) { single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), 0, stream)); EXPECT_EQ(single_op.running_param_->mem_base, nullptr); EXPECT_EQ(single_op.tasks_.size(), 0); + + GeTensorDesc tensor; + int64_t storage_format_val = static_cast(FORMAT_NCHW); + AttrUtils::SetInt(tensor, "storage_format", storage_format_val); + std::vector storage_shape{1, 1, 1, 1}; + AttrUtils::SetListInt(tensor, "storage_shape", storage_shape); + single_op.inputs_desc_.emplace_back(tensor); EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); }