From 2fbf65155003e7fe801577351c29387f677f1d94 Mon Sep 17 00:00:00 2001 From: wjm Date: Wed, 2 Jun 2021 14:41:21 +0800 Subject: [PATCH 01/18] fix sc --- ge/common/profiling/profiling_manager.cc | 10 ++--- ge/generator/ge_generator.cc | 8 +--- ge/graph/build/memory/graph_mem_assigner.cc | 7 +-- ge/graph/build/stream_allocator.cc | 3 +- ge/graph/load/model_manager/model_manager.cc | 45 ++++--------------- ge/graph/manager/graph_manager.cc | 5 +-- ge/graph/optimize/mem_rw_conflict_optimize.cc | 10 ++--- ge/graph/partition/graph_partition.cc | 14 ++---- ge/graph/passes/base_pass.cc | 5 +-- ge/graph/passes/flow_ctrl_pass.cc | 8 +--- .../passes/subgraph_const_migration_pass.cc | 3 +- .../passes/transop_breadth_fusion_pass.cc | 7 ++- ge/host_kernels/slice_kernel.cc | 29 +++++++----- ge/host_kernels/slice_kernel.h | 1 + .../node_executor/hccl/hccl_node_executor.cc | 4 +- ge/ir_build/ge_ir_build.cc | 34 +++++++------- 16 files changed, 76 insertions(+), 117 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index d615187f..8e984bb3 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -961,9 +961,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInp std::vector input_data_type; for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); - if (input_tensor_desc == nullptr) { - continue; - } + GE_IF_BOOL_EXEC(input_tensor_desc == nullptr, continue); + input_format.emplace_back(input_tensor_desc->GetFormat()); input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); input_data_type.emplace_back(input_tensor_desc->GetDataType()); @@ -973,9 +972,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInp std::vector output_data_type; for (size_t j = 0; j < op->GetOutputsSize(); ++j) { GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); - if (output_tensor_desc == nullptr) { - continue; - } + GE_IF_BOOL_EXEC(output_tensor_desc == nullptr, continue); + output_format.emplace_back(output_tensor_desc->GetFormat()); output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); output_data_type.emplace_back(output_tensor_desc->GetDataType()); diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 1796d424..575afb35 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -854,7 +854,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in op_desc->GetName().c_str()); return PARAM_INVALID; } - OmgContext &omg_context = (impl_ == nullptr) ? domi::GetContext() : impl_->omg_context_; + OmgContext &omg_context = impl_->omg_context_; omg_context.is_dynamic_input = ContainsDynamicInpus(*op_desc); if (op_desc->HasAttr(ATTR_NAME_UNREGST_OPPATH)) { @@ -869,11 +869,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in if (!HasShapeRange(inputs) && compile_flag == kFuzzBuildPattern) { fuzz_compile_flag = true; } - if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) { - REPORT_CALL_ERROR("E19999", "set ATTR_NAME_FUZZ_BUILD failed for %s.", op_desc->GetName().c_str()); - GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str()); - return FAILED; - } + (void)AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag); impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag; // 1. Create ComputeGraph. diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f72efe02..db7258d6 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -579,11 +579,8 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { if (continuous_output) { GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "[Get][MemType]fail for node:%s", node->GetName().c_str()); - ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); - if (ret != ge::SUCCESS) { - GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str()); - return ret; - } + GE_CHK_STATUS_RET(AssignContinuousOutputMemory(node, memory_type, continuous_type), + "[Assign][Memory:Continuous:Output]fail for node:%s", node->GetName().c_str()); } } // Assign continuous input memory in `reverse topo order` which stored before diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index a5a1112e..dae36b83 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -1212,7 +1212,8 @@ Status StreamAllocator::SetActiveStreamsForLoop() { for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { GE_CHECK_NOTNULL(node->GetOpDesc()); bool is_loop_active = false; - if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { + (void)AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active); + if (is_loop_active) { vector activated_label_list; NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index d7a6ca25..6a563d2f 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -1668,42 +1668,23 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op }; GE_MAKE_GUARD(release, callback); // malloc sysOpInfoList in SysOpCheckInfo - status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); - GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); - return RT_ERROR_TO_GE_STATUS(status); - } + GE_CHK_RT_RET(rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM)); allocated_mem.push_back(d_req_op_list); // malloc sysOpInfoList in SysOpCheckResp - status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); - GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%zu, ret = 0x%X", op_nums * sizeof(SysOpInfo), status); - return RT_ERROR_TO_GE_STATUS(status); - } + GE_CHK_RT_RET(rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM)); allocated_mem.push_back(d_res_op_list); // malloc returnCodeList in SysOpCheckResp - status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X", op_nums * sizeof(ReturnCode), status); - GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%zu, ret = 0x%X", op_nums * sizeof(ReturnCode), status); - return RT_ERROR_TO_GE_STATUS(status); - } + GE_CHK_RT_RET(rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM)); allocated_mem.push_back(d_ret_code_list); for (const auto &op_type : aicpu_optype_list) { SysOpInfo op_info; // malloc op_type name in SysOpInfo void *d_op_type_name = nullptr; - status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X", op_type.length(), status); - GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%lu, ret = 0x%X", op_type.length(), status); - return RT_ERROR_TO_GE_STATUS(status); - } + GE_CHK_RT_RET(rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM)); + allocated_mem.push_back(d_op_type_name); GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE)); op_info.opType = static_cast(reinterpret_cast(d_op_type_name)); @@ -1716,12 +1697,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op SysOpInfo op_info; // malloc op_type name in SysOpInfo void *d_op_type_name = nullptr; - status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X", op_type.length(), status); - GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%lu, ret = 0x%X", op_type.size(), status); - return RT_ERROR_TO_GE_STATUS(status); - } + GE_CHK_RT_RET(rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM)); + allocated_mem.push_back(d_op_type_name); GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE)); op_info.opType = static_cast(reinterpret_cast(d_op_type_name)); @@ -1745,12 +1722,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op op_check_info_res.sysOpInfoList = static_cast(reinterpret_cast(d_res_op_list)); uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp); - status = rtMalloc(&args, args_size, RT_MEMORY_HBM); - if (status != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X", args_size, status); - GELOGE(RT_FAILED, "[Call][RtMalloc] fail, size:%u, ret = 0x%X", args_size, status); - return RT_ERROR_TO_GE_STATUS(status); - } + GE_CHK_RT_RET(rtMalloc(&args, args_size, RT_MEMORY_HBM)); + allocated_mem.push_back(args); GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast(&op_check_info_req), sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 40a7cf60..ad04fa48 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -3532,9 +3532,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra return ret; } GE_TIMESTAMP_EVENT_END(SetSubgraph, "OptimizeSubgraph::SetSubGraph"); - if ((options_.build_mode == BUILD_MODE_TUNING) && - (options_.build_step == BUILD_STEP_BEFORE_UB_MATCH || options_.build_step == BUILD_STEP_AFTER_BUILDER || - options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB)) { + std::set build_steps = {BUILD_STEP_BEFORE_UB_MATCH, BUILD_STEP_AFTER_BUILDER, BUILD_STEP_AFTER_BUILDER_SUB}; + if ((options_.build_mode == BUILD_MODE_TUNING) && (build_steps.count(options_.build_step) > 0)) { GE_TIMESTAMP_START(ConvertGraphToFile); std::string tuning_path; (void) GetContext().GetOption(TUNING_PATH, tuning_path); diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 6ffd099d..30fcabef 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -743,12 +743,12 @@ Status GraphOptimize::HandleMemoryRWConflict(ComputeGraphPtr &compute_graph) { continue; } // ignore data / netoutput of subgraph - if (node->GetType() == DATA && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX)) { - continue; - } - if (node->GetType() == NETOUTPUT && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX)) { - continue; + if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX)) { + if (node->GetType() == DATA || node->GetType() == NETOUTPUT) { + continue; + } } + bool identity_reserved = false; AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CANNOT_BE_DELETED, identity_reserved); if (identity_reserved) { diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index 503979cb..7df23f89 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -366,11 +366,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr // link input -> end string end_name = kEndType + std::to_string(graph_info_.num_of_pld_end_); auto end_op_desc = MakeShared(end_graph->GetName() + "_" + end_name, END); - if (end_op_desc == nullptr) { - REPORT_CALL_ERROR("E19999", "New Memory for OpDesc failed."); - GELOGE(GRAPH_PARAM_INVALID, "[New][Memory] for OpDesc failed, pld_op_desc is nullptr."); - return FAILED; - } + GE_CHECK_NOTNULL(end_op_desc); + GE_IF_BOOL_EXEC(!AttrUtils::SetInt(end_op_desc, "peerIndex", graph_info_.num_of_pld_end_), GELOGW("SetInt peerIndex failed");) GE_IF_BOOL_EXEC(!AttrUtils::SetStr(end_op_desc, "parentOpType", dst_node->GetType()), @@ -429,11 +426,8 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr int64_t node_id = src_node_opdesc->GetId(); const string pld_name = kPlaceHolderType + std::to_string(graph_info_.num_of_pld_end_); auto pld_op_desc = MakeShared(pld_graph->GetName() + "_" + pld_name, PLACEHOLDER); - if (pld_op_desc == nullptr) { - REPORT_CALL_ERROR("E19999", "New Memory for OpDesc failed."); - GELOGE(GRAPH_PARAM_INVALID, "[New][Memory] for OpDesc failed."); - return FAILED; - } + GE_CHECK_NOTNULL(pld_op_desc); + GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "peerIndex", graph_info_.num_of_pld_end_), GELOGW("SetInt peerIndex failed");) GE_IF_BOOL_EXEC(!AttrUtils::SetStr(pld_op_desc, "_peerNodeName", new_end_node->GetName()), diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 6d933295..16c05647 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -333,11 +333,8 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { during_pass_node_set.nodes_last.clear(); } while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); - if (re_pass_times == kMaxRePassTimes) { - GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); - } + GE_IF_BOOL_EXEC(re_pass_times == kMaxRePassTimes, GELOGW("re_pass_times should not come to %d", kMaxRePassTimes)); GELOGD("All passes runs end"); - return SUCCESS; } Status GEPass::RunPassesOnSubGraph(const NodePtr &node, const NamesToPass &names_to_passes, bool &has_sub_graph) { diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index fe0cb9ea..469d2696 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -41,9 +41,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { bool graph_change = false; // 1. Add FP/BP flow ctrl (big cycle) for (auto &node : compute_graph->GetDirectNode()) { - if (node == nullptr) { - continue; - } + GE_IF_BOOL_EXEC(node == nullptr, continue); GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); uint32_t true_stream_id = 0; bool is_found = AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_TRUE_BRANCH_STREAM, true_stream_id); @@ -65,9 +63,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { // 2. Add special node flow ctrl. eg, IteratorGetNext. (small cycle) // NOTE: Small cycle share the variables with big cycle. for (auto &node : compute_graph->GetDirectNode()) { - if (node == nullptr) { - continue; - } + GE_IF_BOOL_EXEC(node == nullptr, continue); GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); bool need_cycle_flag = false; bool is_found = AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_STREAM_CYCLE_EVENT_FLAG, need_cycle_flag); diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index 7993ba85..7cf75661 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -164,9 +164,10 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra data_nodes[parent_index] = node; GELOGD("%s, index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, node->GetName().c_str()); - } else if ((node->GetType() == CONSTANT) && (node->GetOutDataAnchor(kZeroIndex) != nullptr)) { + } else if (node->GetType() == CONSTANT) { set peer_name_list; const auto &out_anchor = node->GetOutDataAnchor(kZeroIndex); + GE_IF_BOOL_EXEC(out_anchor == nullptr, continue); for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { const auto &peer_node = in_anchor->GetOwnerNode(); // Trim subgraph node name prefix. diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc index d52c3a18..58b40a5f 100644 --- a/ge/graph/passes/transop_breadth_fusion_pass.cc +++ b/ge/graph/passes/transop_breadth_fusion_pass.cc @@ -64,16 +64,19 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, REPORT_INNER_ERROR("E19999", "Param node or its op_desc is nullptr, check invalid"); GELOGE(FAILED, "[Check][Param] Param node or its op_desc is nullptr"); return ""); + + std::set trans_shapes = { RESHAPE, EXPANDDIMS, SQUEEZE }; + std::set trans_shape_and_format = { TRANSPOSE, TRANSPOSED, EXPANDDIMS }; if (node->GetType() == CAST) { trans_data_type = true; - } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) { + } else if (trans_shape_and_format.count(node->GetType()) > 0) { trans_format = true; trans_shape = true; } else if (node->GetType() == TRANSDATA) { trans_data_type = true; trans_format = true; trans_shape = true; - } else if (node->GetType() == RESHAPE || node->GetType() == EXPANDDIMS || node->GetType() == SQUEEZE) { + } else if (trans_shapes.count(node->GetType()) > 0) { trans_shape = true; } else if (node->GetType() == REFORMAT) { trans_format = true; diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index 025d7f66..83b98111 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -71,15 +71,13 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType(); // check supported @@ -92,11 +90,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { - GELOGW("Data type of begin and size for slice are not DT_INT32."); - return NOT_CHANGED; - } + void *data = reinterpret_cast(const_cast(x_->GetData().data())); int32_t *begin_data = const_cast(reinterpret_cast(begin->GetData().GetData())); @@ -145,7 +139,7 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vectorGetTensorDesc().GetDataType() != DT_INT32 || size->GetTensorDesc().GetDataType() != DT_INT32) { + GELOGW("Data type of begin and size for slice are not DT_INT32."); + return NOT_CHANGED; + } + return SUCCESS; +} + Status SliceKernel::CheckOutputDims(const std::vector &output_dims, const OpDescPtr attr) { // check dim not all less than 0 for (auto dim : output_dims) { diff --git a/ge/host_kernels/slice_kernel.h b/ge/host_kernels/slice_kernel.h index 4c059b18..efe85316 100755 --- a/ge/host_kernels/slice_kernel.h +++ b/ge/host_kernels/slice_kernel.h @@ -28,6 +28,7 @@ class SliceKernel : public Kernel { vector &v_output) override; Status CheckOutputDims(const std::vector &output_dims, const OpDescPtr attr); + Status CheckInput(const ConstGeTensorPtr x_, const ConstGeTensorPtr begin, const ConstGeTensorPtr size); }; } // namespace ge diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index c46d5080..0aee5122 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -95,8 +95,8 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function do } op_info.dataType = iter->second; HcclReduceOp op_type = HCCL_REDUCE_SUM; - if (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HCOMREDUCESCATTER || - op_desc->GetType() == HVDCALLBACKALLREDUCE || op_desc->GetType() == HCOMREDUCE) { + std::set hccl_types = { HCOMALLREDUCE, HCOMREDUCESCATTER, HVDCALLBACKALLREDUCE, HCOMREDUCE }; + if (hccl_types.count(op_desc->GetType()) > 0) { GE_CHK_STATUS_RET(HcomOmeUtil::GetHcclOperationType(op_desc, op_type), "[Get][HcclOperationType] failed for %s type:%s", op_desc->GetName().c_str(), op_desc->GetType().c_str()); diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index c7e9522b..6b6fd1f5 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -283,6 +283,7 @@ class Impl { void SetRtSocVersion(); void UpdateThreadContext(); void LoadOpsProto(); + std::string GetParam(const std::string ¶m); public: ge::GeGenerator generator_; std::map options_; @@ -512,6 +513,10 @@ graphStatus Impl::CheckOptions(const std::map &options return GRAPH_SUCCESS; } +std::string Impl::GetParam(const std::string ¶m) { + return options_.find(param) == options_.end() ? "" : options_[param]; +} + graphStatus Impl::Init(const Graph &graph, const std::map &options) { // 1. check options graphStatus ret = CheckOptions(options); @@ -533,20 +538,14 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Wed, 2 Jun 2021 14:41:21 +0800 Subject: [PATCH 02/18] fix sc --- ge/graph/partition/graph_partition.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index 7df23f89..c3f9480d 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -427,7 +427,7 @@ graphStatus ge::GraphPartitioner::AddPlaceHolderEndInSrcDstGraph(const AnchorPtr const string pld_name = kPlaceHolderType + std::to_string(graph_info_.num_of_pld_end_); auto pld_op_desc = MakeShared(pld_graph->GetName() + "_" + pld_name, PLACEHOLDER); GE_CHECK_NOTNULL(pld_op_desc); - + GE_IF_BOOL_EXEC(!AttrUtils::SetInt(pld_op_desc, "peerIndex", graph_info_.num_of_pld_end_), GELOGW("SetInt peerIndex failed");) GE_IF_BOOL_EXEC(!AttrUtils::SetStr(pld_op_desc, "_peerNodeName", new_end_node->GetName()), From c12e327607fe3a8a043a0ecef66d13845afe63c4 Mon Sep 17 00:00:00 2001 From: wjm Date: Wed, 2 Jun 2021 15:44:35 +0800 Subject: [PATCH 03/18] fix --- ge/graph/build/memory/graph_mem_assigner.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index db7258d6..61819f8f 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -543,7 +543,6 @@ Status GraphMemoryAssigner::UpdateRefOpOffsetReverse(const NodePtr &node) { } Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { - Status ret; // Stored nodes which need assign continuous input memory in `reverse topo order` std::vector nodes_stack; std::map node_2_continuous_type; From 6dbe0472bb3d8a03783cc7bf4407555be281b954 Mon Sep 17 00:00:00 2001 From: wjm Date: Wed, 2 Jun 2021 17:37:07 +0800 Subject: [PATCH 04/18] fix more sc --- ge/graph/preprocess/graph_preprocess.cc | 36 ++++++++++++++----------- ge/graph/preprocess/graph_preprocess.h | 1 + 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 2eae6023..5a491f19 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1423,6 +1423,25 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { return SUCCESS; } +Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc, bool tune_flag) { + auto format = desc.GetFormat(); + auto origin_format = desc.GetOriginFormat(); + bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op) && (!tune_flag); + if (need_check_internal_format) { + bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); + if (is_internal) { + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, + {"Input format[" + TypeUtils::FormatToSerialString(format) + "] or origin_format[" + + TypeUtils::FormatToSerialString(origin_format) + "]", "it is not support"}); + GELOGE(PARAM_INVALID, "[Check][Param] Input format %s or origin_format %s is not support.", + TypeUtils::FormatToSerialString(format).c_str(), + TypeUtils::FormatToSerialString(origin_format).c_str()); + return FAILED; + } + } + return SUCCESS; +} + Status GraphPrepare::UpdateInput(const std::vector &user_input, const std::map &graph_option) { // Get shape range of input in dynamic_execute mode @@ -1454,23 +1473,10 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input, continue; } GeTensorDesc desc(user_input[index].GetTensorDesc()); - auto format = desc.GetFormat(); - auto origin_format = desc.GetOriginFormat(); // data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); - bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op) && (!tune_flag); - if (need_check_internal_format) { - bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); - if (is_internal) { - ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, - {"Input format[" + TypeUtils::FormatToSerialString(format) + "] or origin_format[" + - TypeUtils::FormatToSerialString(origin_format) + "]", "it is not support"}); - GELOGE(PARAM_INVALID, "[Check][Param] Input format %s or origin_format %s is not support.", - TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::FormatToSerialString(origin_format).c_str()); - return FAILED; - } - } + GE_CHK_STATUS_RET(CheckInternalFormat(input_node, desc, tune_flag), "[Check][InternalFormat] on %s failed.", + op->GetName().c_str()); auto data_type = desc.GetDataType(); uint32_t length = 1; diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index 3eb5e03a..584f4d16 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -63,6 +63,7 @@ class GraphPrepare { Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); + Status CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc, bool tune_flag); Status UpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckAndUpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckConstOp(); From d30e88ab79fe9cf47f88d7d91d02736a84a01913 Mon Sep 17 00:00:00 2001 From: wjm Date: Wed, 2 Jun 2021 22:41:37 +0800 Subject: [PATCH 05/18] fix more sc --- ge/ge_runtime/task/hccl_task.cc | 6 +- .../node_executor/hccl/hccl_node_executor.cc | 122 +++++++++--------- .../node_executor/hccl/hccl_node_executor.h | 3 + 3 files changed, 68 insertions(+), 63 deletions(-) diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc index 2169f96a..65690683 100644 --- a/ge/ge_runtime/task/hccl_task.cc +++ b/ge/ge_runtime/task/hccl_task.cc @@ -154,10 +154,8 @@ bool HcclTask::SetSecondaryStream() { return false; } stream = std::make_shared(rt_model_handle_, new_stream); - if (stream == nullptr) { - GELOGE(FAILED, "MakeShared failed."); - return false; - } + GE_IF_BOOL_EXEC(stream == nullptr, return false); + secondary_stream_vec[index] = stream; } secondary_stream_list_.push_back(stream); diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 0aee5122..0c6d4eaf 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -20,7 +20,6 @@ #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/util/hcom_util.h" -#include "graph/runtime_inference_context.h" #include "graph/utils/type_utils.h" #include "graph/types.h" #include "hccl/hcom.h" @@ -177,61 +176,8 @@ Status RdmaNodeTask::Init(TaskContext &context) { return SUCCESS; } -Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector &addr_infos) { - RuntimeInferenceContext *ctx = nullptr; - GE_CHK_STATUS_RET( - RuntimeInferenceContext::GetContext(std::to_string(context.GetExecutionContext()->context_id), &ctx)); - - ge::Tensor remote_tensor; - GE_CHK_STATUS_RET(ctx->GetTensor(remote_index_.first, remote_index_.second, remote_tensor)); - auto data = reinterpret_cast(remote_tensor.GetData()); - if (data == nullptr) { - if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { - GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); - return SUCCESS; - } else { - REPORT_INNER_ERROR("E19999", "Tensor data is nullptr. and kRdmaScatterTypes not contain %s", - context.GetNodeItem().NodeType().c_str()); - GELOGE(FAILED, "[Find][NodeType]Tensor data is nullptr. and kRdmaScatterTypes not contain %s", - context.GetNodeItem().NodeType().c_str()); - return FAILED; - } - } - auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); - if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { - REPORT_INNER_ERROR("E19999", "Variable table shape check failed, number of shape dims:%zu not equal expect:%zu" - "and shape dims back:%zu not equal expect:%zu, node:%s(%s)", - dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, - context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); - GELOGE(PARAM_INVALID, "[Check][Param]Variable table shape check failed," - "number of shape dims:%zu not equal expect:%zu and shape dims back:%zu not equal expect:%zu, node:%s(%s)", - dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, - context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); - return PARAM_INVALID; - } - - if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { - size_t remote_size = 0; - for (auto idx = 0; idx < dims.front(); ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - remote_size += data[line_idx + kVarTableIdxLen]; - } - auto allocator = NpuMemoryAllocator::GetAllocator(); - GE_CHECK_NOTNULL(allocator); - AllocationAttr attr; - attr.SetMemType(RDMA_HBM); - for (auto i = 0; i < context.NumOutputs(); ++i) { - GELOGD("Allocate rdma memory for node %s, size: %zu", context.GetNodeName(), remote_size); - auto tensor_buffer = TensorBuffer::Create(allocator, remote_size, &attr); - GE_CHK_STATUS_RET(context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release())))); - } - } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { - AllocationAttr attr; - attr.SetMemType(RDMA_HBM); - GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) - } - +Status RdmaNodeTask::SetAddrInfo(TaskContext &context, RuntimeInferenceContext *ctx, uint64_t *data, int64_t row_num, + vector &addr_infos) { TensorValue *tv; if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { tv = context.MutableOutput(local_index_); @@ -239,7 +185,6 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vectorGetInputIndexByName("local_offset"); @@ -250,10 +195,10 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vectorGetTensor(offset_index_.first, offset_index_.second, offset_tensor)) if (static_cast(offset_tensor.GetSize() / GetSizeByDataType(data_type)) != row_num) { REPORT_INNER_ERROR("E19999", "num of offset and remote addr mismatch, check invalid" - "offset size=%zu, remote_addr size=%ld, dtype=%s", offset_tensor.GetSize(), row_num, + "offset size=%zu, remote_addr size=%ld, dtype=%s", offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); GELOGE(PARAM_INVALID, "[Check][Size]num of offset and remote addr mismatch," - "offset size=%zu, remote_addr size=%ld, dtype=%s", + "offset size=%zu, remote_addr size=%ld, dtype=%s", offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); return PARAM_INVALID; } @@ -294,6 +239,65 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector &addr_infos) { + RuntimeInferenceContext *ctx = nullptr; + GE_CHK_STATUS_RET( + RuntimeInferenceContext::GetContext(std::to_string(context.GetExecutionContext()->context_id), &ctx)); + + ge::Tensor remote_tensor; + GE_CHK_STATUS_RET(ctx->GetTensor(remote_index_.first, remote_index_.second, remote_tensor)); + auto data = reinterpret_cast(remote_tensor.GetData()); + if (data == nullptr) { + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); + return SUCCESS; + } else { + REPORT_INNER_ERROR("E19999", "Tensor data is nullptr. and kRdmaScatterTypes not contain %s", + context.GetNodeItem().NodeType().c_str()); + GELOGE(FAILED, "[Find][NodeType]Tensor data is nullptr. and kRdmaScatterTypes not contain %s", + context.GetNodeItem().NodeType().c_str()); + return FAILED; + } + } + auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); + if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { + REPORT_INNER_ERROR("E19999", "Variable table shape check failed, number of shape dims:%zu not equal expect:%zu" + "and shape dims back:%zu not equal expect:%zu, node:%s(%s)", + dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, + context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); + GELOGE(PARAM_INVALID, "[Check][Param]Variable table shape check failed," + "number of shape dims:%zu not equal expect:%zu and shape dims back:%zu not equal expect:%zu, node:%s(%s)", + dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, + context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); + return PARAM_INVALID; + } + + if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { + size_t remote_size = 0; + for (auto idx = 0; idx < dims.front(); ++idx) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt); + auto line_idx = idx * kVarTableRowCnt; + remote_size += data[line_idx + kVarTableIdxLen]; + } + auto allocator = NpuMemoryAllocator::GetAllocator(); + GE_CHECK_NOTNULL(allocator); + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + for (auto i = 0; i < context.NumOutputs(); ++i) { + GELOGD("Allocate rdma memory for node %s, size: %zu", context.GetNodeName(), remote_size); + auto tensor_buffer = TensorBuffer::Create(allocator, remote_size, &attr); + GE_CHK_STATUS_RET(context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release())))); + } + } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) + } + + auto row_num = dims.front(); + return SetAddrInfo(context, ctx, data, row_num, addr_infos); +} + Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); auto HcomExecEnqueueRemoteAccess = diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 873f259f..9e6d41a4 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -18,6 +18,7 @@ #define HYBRID_HCCL_NODE_EXECUTOR_H_ #include "common/opskernel/ge_task_info.h" #include "graph/op_desc.h" +#include "graph/runtime_inference_context.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/node_executor/node_executor.h" @@ -53,6 +54,8 @@ class RdmaNodeTask : public NodeTask { Status Init(TaskContext &context) override; private: + Status SetAddrInfo(TaskContext &context, RuntimeInferenceContext *ctx, uint64_t *data, int64_t row_num, + vector &addr_infos); Status ExtractTensor(TaskContext &context, vector &addr_infos); std::pair remote_index_; std::pair offset_index_; From ad7234276f5b7de94383448ab0227cb783a05f80 Mon Sep 17 00:00:00 2001 From: wjm Date: Wed, 2 Jun 2021 23:04:22 +0800 Subject: [PATCH 06/18] fix more sc --- ge/hybrid/node_executor/hccl/hccl_node_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 0c6d4eaf..e01d20fd 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -195,10 +195,10 @@ Status RdmaNodeTask::SetAddrInfo(TaskContext &context, RuntimeInferenceContext * GE_CHK_STATUS_RET(ctx->GetTensor(offset_index_.first, offset_index_.second, offset_tensor)) if (static_cast(offset_tensor.GetSize() / GetSizeByDataType(data_type)) != row_num) { REPORT_INNER_ERROR("E19999", "num of offset and remote addr mismatch, check invalid" - "offset size=%zu, remote_addr size=%ld, dtype=%s", offset_tensor.GetSize(), row_num, + "offset size=%zu, remote_addr size=%ld, dtype=%s", offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); GELOGE(PARAM_INVALID, "[Check][Size]num of offset and remote addr mismatch," - "offset size=%zu, remote_addr size=%ld, dtype=%s", + "offset size=%zu, remote_addr size=%ld, dtype=%s", offset_tensor.GetSize(), row_num, TypeUtils::DataTypeToSerialString(data_type).c_str()); return PARAM_INVALID; } From cc6e9349633c98aa81682581cd25d9f16ee96940 Mon Sep 17 00:00:00 2001 From: wjm Date: Thu, 3 Jun 2021 05:15:51 +0800 Subject: [PATCH 07/18] add mem --- ge/graph/build/memory/graph_mem_assigner.cc | 101 +++++++++++--------- ge/graph/build/memory/graph_mem_assigner.h | 3 + 2 files changed, 61 insertions(+), 43 deletions(-) diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 61819f8f..340bfe3e 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -608,6 +608,61 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { return ge::SUCCESS; } +Status GraphMemoryAssigner::SetMemOffset(const ge::NodePtr &node, InDataAnchorPtr in_data_anchor, bool reverse_refresh, + int64_t &mem_offset, int64_t &continuous_mem_start) { + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + vector output_list_this = op_desc->GetOutputOffset(); + if (output_list_this.empty()) { + REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected", + node->GetName().c_str()); + GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str()); + return FAILED; + } + + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); + vector output_list = peer_op_desc->GetOutputOffset(); + if (peer_out_data_anchor->GetIdx() >= static_cast(output_list.size())) { + std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) + + " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + + " is out of range:" + FmtToStr(output_list.size()); + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return FAILED; + } + + // when continuous input has been allocated first input is beginning offset + bool is_continuous_input_allocated = false; + (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); + bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); + if (is_allocated_first_input) { + std::map out2ins; + GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", + node->GetName().c_str()); + // output is beginning offset, set offset for input; only support this case now + if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { + auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); + output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); + peer_op_desc->SetOutputOffset(output_list); + GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld", + node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second, + peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset); + } else { + GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(), + out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); + } + // first input is beginning offset + mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); + continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); + } else { + // set offset for input + output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; + peer_op_desc->SetOutputOffset(output_list); + } + + return SUCCESS; +} + Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { GELOGI("[Assign][Memory:Input:Continuous]start for Current node %s", node->GetName().c_str()); @@ -627,13 +682,6 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, bool is_continuous_input_allocated = false; auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - vector output_list_this = op_desc->GetOutputOffset(); - if (output_list_this.empty()) { - REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected", - node->GetName().c_str()); - GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str()); - return FAILED; - } (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); @@ -665,45 +713,12 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, return FAILED; } } - - bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; - vector output_list = peer_op_desc->GetOutputOffset(); - if (peer_out_data_anchor->GetIdx() >= static_cast(output_list.size())) { - std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) + - " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) + - " is out of range:" + FmtToStr(output_list.size()); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + if (SetMemOffset(node, in_data_anchor, reverse_refresh, mem_offset, continuous_mem_start) != ge::SUCCESS) { return FAILED; } - // when continuous input has been allocated first input is beginning offset - bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); - if (is_allocated_first_input) { - std::map out2ins; - GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", - node->GetName().c_str()); - // output is beginning offset, set offset for input; only support this case now - if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { - auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); - output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); - peer_op_desc->SetOutputOffset(output_list); - GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld", - node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second, - peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset); - } else { - GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(), - out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); - } - // first input is beginning offset - mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); - continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); - } else { - // set offset for input - output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; - peer_op_desc->SetOutputOffset(output_list); - } - int64_t align_size = tensor_desc_size; + bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; if (is_nopadding) { mem_offset += nopadding_size; extra_memory_size += (tensor_desc_size - nopadding_size); @@ -715,7 +730,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, extra_memory_size = MEM_ALIGN_SIZE; real_size = tensor_desc_size; } - + vector output_list = peer_op_desc->GetOutputOffset(); GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " "size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index a6a2a686..b237b5b3 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -146,6 +146,9 @@ class GraphMemoryAssigner { ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); + Status SetMemOffset(const ge::NodePtr &node, InDataAnchorPtr in_data_anchor, bool reverse_refresh, + int64_t &mem_offset, int64_t &continuous_mem_start); + ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh = false); From 69f5eb0be5d42e05cbc2f00a500ff428c5408f53 Mon Sep 17 00:00:00 2001 From: wjm Date: Thu, 3 Jun 2021 05:50:18 +0800 Subject: [PATCH 08/18] remove hccl --- .../node_executor/hccl/hccl_node_executor.cc | 118 +++++++++--------- .../node_executor/hccl/hccl_node_executor.h | 3 - 2 files changed, 57 insertions(+), 64 deletions(-) diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index e01d20fd..0aee5122 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -20,6 +20,7 @@ #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/util/hcom_util.h" +#include "graph/runtime_inference_context.h" #include "graph/utils/type_utils.h" #include "graph/types.h" #include "hccl/hcom.h" @@ -176,8 +177,61 @@ Status RdmaNodeTask::Init(TaskContext &context) { return SUCCESS; } -Status RdmaNodeTask::SetAddrInfo(TaskContext &context, RuntimeInferenceContext *ctx, uint64_t *data, int64_t row_num, - vector &addr_infos) { +Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector &addr_infos) { + RuntimeInferenceContext *ctx = nullptr; + GE_CHK_STATUS_RET( + RuntimeInferenceContext::GetContext(std::to_string(context.GetExecutionContext()->context_id), &ctx)); + + ge::Tensor remote_tensor; + GE_CHK_STATUS_RET(ctx->GetTensor(remote_index_.first, remote_index_.second, remote_tensor)); + auto data = reinterpret_cast(remote_tensor.GetData()); + if (data == nullptr) { + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); + return SUCCESS; + } else { + REPORT_INNER_ERROR("E19999", "Tensor data is nullptr. and kRdmaScatterTypes not contain %s", + context.GetNodeItem().NodeType().c_str()); + GELOGE(FAILED, "[Find][NodeType]Tensor data is nullptr. and kRdmaScatterTypes not contain %s", + context.GetNodeItem().NodeType().c_str()); + return FAILED; + } + } + auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); + if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { + REPORT_INNER_ERROR("E19999", "Variable table shape check failed, number of shape dims:%zu not equal expect:%zu" + "and shape dims back:%zu not equal expect:%zu, node:%s(%s)", + dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, + context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); + GELOGE(PARAM_INVALID, "[Check][Param]Variable table shape check failed," + "number of shape dims:%zu not equal expect:%zu and shape dims back:%zu not equal expect:%zu, node:%s(%s)", + dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, + context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); + return PARAM_INVALID; + } + + if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { + size_t remote_size = 0; + for (auto idx = 0; idx < dims.front(); ++idx) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt); + auto line_idx = idx * kVarTableRowCnt; + remote_size += data[line_idx + kVarTableIdxLen]; + } + auto allocator = NpuMemoryAllocator::GetAllocator(); + GE_CHECK_NOTNULL(allocator); + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + for (auto i = 0; i < context.NumOutputs(); ++i) { + GELOGD("Allocate rdma memory for node %s, size: %zu", context.GetNodeName(), remote_size); + auto tensor_buffer = TensorBuffer::Create(allocator, remote_size, &attr); + GE_CHK_STATUS_RET(context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release())))); + } + } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) + } + TensorValue *tv; if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { tv = context.MutableOutput(local_index_); @@ -185,6 +239,7 @@ Status RdmaNodeTask::SetAddrInfo(TaskContext &context, RuntimeInferenceContext * tv = context.MutableInput(local_index_); } GE_CHECK_NOTNULL(tv); + auto row_num = dims.front(); addr_infos.resize(row_num); if (skip_flag_) { int32_t offset_idx = context.GetNodeItem().op_desc->GetInputIndexByName("local_offset"); @@ -239,65 +294,6 @@ Status RdmaNodeTask::SetAddrInfo(TaskContext &context, RuntimeInferenceContext * return SUCCESS; } -Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector &addr_infos) { - RuntimeInferenceContext *ctx = nullptr; - GE_CHK_STATUS_RET( - RuntimeInferenceContext::GetContext(std::to_string(context.GetExecutionContext()->context_id), &ctx)); - - ge::Tensor remote_tensor; - GE_CHK_STATUS_RET(ctx->GetTensor(remote_index_.first, remote_index_.second, remote_tensor)); - auto data = reinterpret_cast(remote_tensor.GetData()); - if (data == nullptr) { - if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { - GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); - return SUCCESS; - } else { - REPORT_INNER_ERROR("E19999", "Tensor data is nullptr. and kRdmaScatterTypes not contain %s", - context.GetNodeItem().NodeType().c_str()); - GELOGE(FAILED, "[Find][NodeType]Tensor data is nullptr. and kRdmaScatterTypes not contain %s", - context.GetNodeItem().NodeType().c_str()); - return FAILED; - } - } - auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); - if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { - REPORT_INNER_ERROR("E19999", "Variable table shape check failed, number of shape dims:%zu not equal expect:%zu" - "and shape dims back:%zu not equal expect:%zu, node:%s(%s)", - dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, - context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); - GELOGE(PARAM_INVALID, "[Check][Param]Variable table shape check failed," - "number of shape dims:%zu not equal expect:%zu and shape dims back:%zu not equal expect:%zu, node:%s(%s)", - dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, - context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); - return PARAM_INVALID; - } - - if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { - size_t remote_size = 0; - for (auto idx = 0; idx < dims.front(); ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - remote_size += data[line_idx + kVarTableIdxLen]; - } - auto allocator = NpuMemoryAllocator::GetAllocator(); - GE_CHECK_NOTNULL(allocator); - AllocationAttr attr; - attr.SetMemType(RDMA_HBM); - for (auto i = 0; i < context.NumOutputs(); ++i) { - GELOGD("Allocate rdma memory for node %s, size: %zu", context.GetNodeName(), remote_size); - auto tensor_buffer = TensorBuffer::Create(allocator, remote_size, &attr); - GE_CHK_STATUS_RET(context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release())))); - } - } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { - AllocationAttr attr; - attr.SetMemType(RDMA_HBM); - GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) - } - - auto row_num = dims.front(); - return SetAddrInfo(context, ctx, data, row_num, addr_infos); -} - Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); auto HcomExecEnqueueRemoteAccess = diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 9e6d41a4..873f259f 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -18,7 +18,6 @@ #define HYBRID_HCCL_NODE_EXECUTOR_H_ #include "common/opskernel/ge_task_info.h" #include "graph/op_desc.h" -#include "graph/runtime_inference_context.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/node_executor/node_executor.h" @@ -54,8 +53,6 @@ class RdmaNodeTask : public NodeTask { Status Init(TaskContext &context) override; private: - Status SetAddrInfo(TaskContext &context, RuntimeInferenceContext *ctx, uint64_t *data, int64_t row_num, - vector &addr_infos); Status ExtractTensor(TaskContext &context, vector &addr_infos); std::pair remote_index_; std::pair offset_index_; From e6253a449b870d967a0636ad8f84b708d24c0b1f Mon Sep 17 00:00:00 2001 From: wjm Date: Thu, 3 Jun 2021 06:25:53 +0800 Subject: [PATCH 09/18] add pass --- .../passes/subgraph_const_migration_pass.cc | 46 ++++++++++--------- .../passes/subgraph_const_migration_pass.h | 2 + 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index 7cf75661..d8ad41e1 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -164,29 +164,9 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra data_nodes[parent_index] = node; GELOGD("%s, index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, node->GetName().c_str()); - } else if (node->GetType() == CONSTANT) { + } else if (node->GetType() == CONSTANT && (node->GetOutDataAnchor(kZeroIndex) != nullptr)) { set peer_name_list; - const auto &out_anchor = node->GetOutDataAnchor(kZeroIndex); - GE_IF_BOOL_EXEC(out_anchor == nullptr, continue); - for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { - const auto &peer_node = in_anchor->GetOwnerNode(); - // Trim subgraph node name prefix. - string node_full_name = peer_node->GetName(); - size_t pos = node_full_name.find(kMbatchNodeNameMark); - if (pos == string::npos) { - GELOGI("Can not find: %s of multi-batch in node: %s", kMbatchNodeNameMark.c_str(), node_full_name.c_str()); - continue; - } - - string fixed_name = node_full_name.substr(0, pos); - pos = node_full_name.find("_", pos + kMbatchNodeNameMark.length()); - if (pos != string::npos) { - fixed_name += node_full_name.substr(pos); - } - - peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx())); - } - + GetPeerNameList(node, peer_name_list); if (peer_name_list.empty()) { GELOGI("%s, Const: %s, no data output", subgraph->GetName().c_str(), node->GetName().c_str()); const auto in_all_nodes = node->GetInAllNodes(); @@ -217,6 +197,28 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra return SUCCESS; } +void SubgraphConstMigrationPass::GetPeerNameList(const NodePtr &node, set &peer_name_list) { + const auto &out_anchor = node->GetOutDataAnchor(kZeroIndex); + for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) { + const auto &peer_node = in_anchor->GetOwnerNode(); + // Trim subgraph node name prefix. + string node_full_name = peer_node->GetName(); + size_t pos = node_full_name.find(kMbatchNodeNameMark); + if (pos == string::npos) { + GELOGI("Can not find: %s of multi-batch in node: %s", kMbatchNodeNameMark.c_str(), node_full_name.c_str()); + continue; + } + + string fixed_name = node_full_name.substr(0, pos); + pos = node_full_name.find("_", pos + kMbatchNodeNameMark.length()); + if (pos != string::npos) { + fixed_name += node_full_name.substr(pos); + } + + peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx())); + } +} + /// /// @ingroup ge /// @brief Get parent_index for Const node migration. diff --git a/ge/graph/passes/subgraph_const_migration_pass.h b/ge/graph/passes/subgraph_const_migration_pass.h index d93da839..2834fd66 100755 --- a/ge/graph/passes/subgraph_const_migration_pass.h +++ b/ge/graph/passes/subgraph_const_migration_pass.h @@ -133,6 +133,8 @@ class SubgraphConstMigrationPass : public GraphPass { /// Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, const NodePtr &const_node, uint32_t parent_index); + + void GetPeerNameList(const NodePtr &node, set &peer_name_list); }; } // namespace ge #endif // GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_ \ No newline at end of file From a7c1cd37bf197b0f32ef6439b7d4718bf617c3da Mon Sep 17 00:00:00 2001 From: wjm Date: Thu, 3 Jun 2021 06:27:49 +0800 Subject: [PATCH 10/18] add pass --- ge/graph/passes/subgraph_const_migration_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/passes/subgraph_const_migration_pass.cc b/ge/graph/passes/subgraph_const_migration_pass.cc index d8ad41e1..d15e60cf 100644 --- a/ge/graph/passes/subgraph_const_migration_pass.cc +++ b/ge/graph/passes/subgraph_const_migration_pass.cc @@ -164,7 +164,7 @@ Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &gra data_nodes[parent_index] = node; GELOGD("%s, index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, node->GetName().c_str()); - } else if (node->GetType() == CONSTANT && (node->GetOutDataAnchor(kZeroIndex) != nullptr)) { + } else if ((node->GetType() == CONSTANT) && (node->GetOutDataAnchor(kZeroIndex) != nullptr)) { set peer_name_list; GetPeerNameList(node, peer_name_list); if (peer_name_list.empty()) { From bb79263adc652657ded9c7000b8e462d71bb958b Mon Sep 17 00:00:00 2001 From: wjm Date: Fri, 4 Jun 2021 00:39:38 +0800 Subject: [PATCH 11/18] cov --- .../node_executor/hccl/hccl_node_executor.cc | 118 +++++++++-------- .../node_executor/hccl/hccl_node_executor.h | 3 + tests/ut/ge/CMakeLists.txt | 4 +- .../subgraph_const_migration_pass_unittest.cc | 125 ++++++++++++++++++ .../hccl/hccl_node_executor_unittest.cc | 108 +++++++++++++++ 5 files changed, 300 insertions(+), 58 deletions(-) create mode 100644 tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc create mode 100644 tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 0aee5122..e01d20fd 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -20,7 +20,6 @@ #include "graph/attr_value.h" #include "graph/debug/ge_attr_define.h" #include "graph/manager/util/hcom_util.h" -#include "graph/runtime_inference_context.h" #include "graph/utils/type_utils.h" #include "graph/types.h" #include "hccl/hcom.h" @@ -177,61 +176,8 @@ Status RdmaNodeTask::Init(TaskContext &context) { return SUCCESS; } -Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector &addr_infos) { - RuntimeInferenceContext *ctx = nullptr; - GE_CHK_STATUS_RET( - RuntimeInferenceContext::GetContext(std::to_string(context.GetExecutionContext()->context_id), &ctx)); - - ge::Tensor remote_tensor; - GE_CHK_STATUS_RET(ctx->GetTensor(remote_index_.first, remote_index_.second, remote_tensor)); - auto data = reinterpret_cast(remote_tensor.GetData()); - if (data == nullptr) { - if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { - GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); - return SUCCESS; - } else { - REPORT_INNER_ERROR("E19999", "Tensor data is nullptr. and kRdmaScatterTypes not contain %s", - context.GetNodeItem().NodeType().c_str()); - GELOGE(FAILED, "[Find][NodeType]Tensor data is nullptr. and kRdmaScatterTypes not contain %s", - context.GetNodeItem().NodeType().c_str()); - return FAILED; - } - } - auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); - if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { - REPORT_INNER_ERROR("E19999", "Variable table shape check failed, number of shape dims:%zu not equal expect:%zu" - "and shape dims back:%zu not equal expect:%zu, node:%s(%s)", - dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, - context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); - GELOGE(PARAM_INVALID, "[Check][Param]Variable table shape check failed," - "number of shape dims:%zu not equal expect:%zu and shape dims back:%zu not equal expect:%zu, node:%s(%s)", - dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, - context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); - return PARAM_INVALID; - } - - if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { - size_t remote_size = 0; - for (auto idx = 0; idx < dims.front(); ++idx) { - FMK_INT64_MULCHECK(idx, kVarTableRowCnt); - auto line_idx = idx * kVarTableRowCnt; - remote_size += data[line_idx + kVarTableIdxLen]; - } - auto allocator = NpuMemoryAllocator::GetAllocator(); - GE_CHECK_NOTNULL(allocator); - AllocationAttr attr; - attr.SetMemType(RDMA_HBM); - for (auto i = 0; i < context.NumOutputs(); ++i) { - GELOGD("Allocate rdma memory for node %s, size: %zu", context.GetNodeName(), remote_size); - auto tensor_buffer = TensorBuffer::Create(allocator, remote_size, &attr); - GE_CHK_STATUS_RET(context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release())))); - } - } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { - AllocationAttr attr; - attr.SetMemType(RDMA_HBM); - GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) - } - +Status RdmaNodeTask::SetAddrInfo(TaskContext &context, RuntimeInferenceContext *ctx, uint64_t *data, int64_t row_num, + vector &addr_infos) { TensorValue *tv; if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { tv = context.MutableOutput(local_index_); @@ -239,7 +185,6 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vectorGetInputIndexByName("local_offset"); @@ -294,6 +239,65 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector &addr_infos) { + RuntimeInferenceContext *ctx = nullptr; + GE_CHK_STATUS_RET( + RuntimeInferenceContext::GetContext(std::to_string(context.GetExecutionContext()->context_id), &ctx)); + + ge::Tensor remote_tensor; + GE_CHK_STATUS_RET(ctx->GetTensor(remote_index_.first, remote_index_.second, remote_tensor)); + auto data = reinterpret_cast(remote_tensor.GetData()); + if (data == nullptr) { + if (kRdmaScatterTypes.count(context.GetNodeItem().NodeType()) > 0) { + GELOGD("data is null, no need to do rdma read/write, node=%s", context.GetNodeName()); + return SUCCESS; + } else { + REPORT_INNER_ERROR("E19999", "Tensor data is nullptr. and kRdmaScatterTypes not contain %s", + context.GetNodeItem().NodeType().c_str()); + GELOGE(FAILED, "[Find][NodeType]Tensor data is nullptr. and kRdmaScatterTypes not contain %s", + context.GetNodeItem().NodeType().c_str()); + return FAILED; + } + } + auto dims = remote_tensor.GetTensorDesc().GetShape().GetDims(); + if (dims.size() != kVarTableDims && dims.back() != kVarTableRowCnt) { + REPORT_INNER_ERROR("E19999", "Variable table shape check failed, number of shape dims:%zu not equal expect:%zu" + "and shape dims back:%zu not equal expect:%zu, node:%s(%s)", + dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, + context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); + GELOGE(PARAM_INVALID, "[Check][Param]Variable table shape check failed," + "number of shape dims:%zu not equal expect:%zu and shape dims back:%zu not equal expect:%zu, node:%s(%s)", + dims.size(), kVarTableDims, dims.back(), kVarTableRowCnt, + context.GetNodeName(), context.GetNodeItem().NodeType().c_str()); + return PARAM_INVALID; + } + + if (context.GetNodeItem().NodeType() == HCOMREMOTEREAD) { + size_t remote_size = 0; + for (auto idx = 0; idx < dims.front(); ++idx) { + FMK_INT64_MULCHECK(idx, kVarTableRowCnt); + auto line_idx = idx * kVarTableRowCnt; + remote_size += data[line_idx + kVarTableIdxLen]; + } + auto allocator = NpuMemoryAllocator::GetAllocator(); + GE_CHECK_NOTNULL(allocator); + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + for (auto i = 0; i < context.NumOutputs(); ++i) { + GELOGD("Allocate rdma memory for node %s, size: %zu", context.GetNodeName(), remote_size); + auto tensor_buffer = TensorBuffer::Create(allocator, remote_size, &attr); + GE_CHK_STATUS_RET(context.SetOutput(i, TensorValue(std::shared_ptr(tensor_buffer.release())))); + } + } else if (context.GetNodeItem().NodeType() == HCOMREMOTEREFREAD) { + AllocationAttr attr; + attr.SetMemType(RDMA_HBM); + GE_CHK_STATUS_RET(context.AllocateOutputs(&attr)) + } + + auto row_num = dims.front(); + return SetAddrInfo(context, ctx, data, row_num, addr_infos); +} + Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); auto HcomExecEnqueueRemoteAccess = diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.h b/ge/hybrid/node_executor/hccl/hccl_node_executor.h index 873f259f..9e6d41a4 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.h +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.h @@ -18,6 +18,7 @@ #define HYBRID_HCCL_NODE_EXECUTOR_H_ #include "common/opskernel/ge_task_info.h" #include "graph/op_desc.h" +#include "graph/runtime_inference_context.h" #include "hybrid/model/hybrid_model.h" #include "hybrid/node_executor/node_executor.h" @@ -53,6 +54,8 @@ class RdmaNodeTask : public NodeTask { Status Init(TaskContext &context) override; private: + Status SetAddrInfo(TaskContext &context, RuntimeInferenceContext *ctx, uint64_t *data, int64_t row_num, + vector &addr_infos); Status ExtractTensor(TaskContext &context, vector &addr_infos); std::pair remote_index_; std::pair offset_index_; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 5bff0f98..fe95bd85 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -710,6 +710,7 @@ set(PASS_TEST_FILES "graph/passes/infershape_pass_unittest.cc" "graph/passes/mark_force_unknown_for_cond_pass_unittest.cc" "graph/passes/multi_batch_clone_pass_unittest.cc" + "graph/passes/subgraph_const_migration_pass_unittest.cc" "graph/passes/replace_with_empty_const_pass_unittest.cc" "graph/passes/link_gen_mask_nodes_pass_unittest.cc" "graph/passes/transpose_transdata_pass_unittest.cc" @@ -718,7 +719,7 @@ set(PASS_TEST_FILES "graph/passes/mark_node_unknown_shape_pass_unittest.cc" "graph/passes/reshape_recovery_pass_unittest.cc" "graph/passes/cast_remove_pass_unittest.cc" - "graph/passes/memcpy_addr_async_unittest.cc" + "graph/passes/memcpy_addr_async_unittest.cc" "graph/passes/hccl_continuous_pass_unittest.cc" "graph/passes/hccl_memcpy_pass_unittest.cc" @@ -843,6 +844,7 @@ set(HYBRID_TEST_FILES "hybrid/model/hybrid_model_builder_unittest.cc" "hybrid/node_executor/rts/rts_node_task_unittest.cc" "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" + "hybrid/node_executor/hccl/hccl_node_executor_unittest.cc" "hybrid/executor/hybrid_model_async_executor_unittest.cc" "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" diff --git a/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc b/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc new file mode 100644 index 00000000..7d3a754d --- /dev/null +++ b/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc @@ -0,0 +1,125 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include "framework/omg/omg_inner_types.h" +#include "graph/common/local_context.h" +#include "graph/passes/subgraph_const_migration_pass.h" +#include "inc/pass_manager.h" +#include "register/op_registry.h" + +namespace ge { +class UtestSubgraphConstMigrationPass : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} + + public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + if (type == "Const") { + uint64_t const_value = 101; + auto weight = make_shared(op_desc->GetOutputDesc(0), (uint8_t *)&const_value, sizeof(uint64_t)); + AttrUtils::SetTensor(op_desc, ge::ATTR_NAME_WEIGHTS, weight); + } + return graph->AddNode(op_desc); + } + + void make_original_graph(const ComputeGraphPtr &graph) { + auto data = MakeNode(graph, 1, 1, "data", "Data"); + { + AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_INDEX, 0); + AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1); + } + auto const1 = MakeNode(graph, 0, 1, "const1", "Const"); + { + auto data1 = MakeNode(graph, 1, 1, "data1", "Data"); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 1); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 2); + GraphUtils::AddEdge(data1->GetOutControlAnchor(), const1->GetInControlAnchor()); + } + + auto const2 = MakeNode(graph, 0, 1, "const2", "Const"); + { + auto data2 = MakeNode(graph, 1, 1, "data2", "Data"); + AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_INDEX, 2); + AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 3); + GraphUtils::AddEdge(data2->GetOutControlAnchor(), const2->GetInControlAnchor()); + } + + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + GraphUtils::AddEdge(data->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + void make_multibatch_graph(const ComputeGraphPtr &graph) { + auto index = MakeNode(graph, 1, 1, "index", "Data"); + auto data = MakeNode(graph, 1, 1, "data", "Data"); + auto data1 = MakeNode(graph, 1, 1, "data1", "Data"); + auto data2 = MakeNode(graph, 1, 1, "data2", "Data"); + AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_INDEX, 0); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 1); + AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_INDEX, 2); + + auto case1 = MakeNode(graph, 4, 1, "case", "Case"); + GraphUtils::AddEdge(index->GetOutDataAnchor(0), case1->GetInDataAnchor(0)); + GraphUtils::AddEdge(data->GetOutDataAnchor(0), case1->GetInDataAnchor(1)); + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), case1->GetInDataAnchor(2)); + GraphUtils::AddEdge(data2->GetOutDataAnchor(0), case1->GetInDataAnchor(3)); + auto output_node = MakeNode(graph, 1, 0, "output", "NetOutput"); + GraphUtils::AddEdge(case1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + + AttrUtils::SetInt(case1->GetOpDesc(), ATTR_NAME_BATCH_NUM, 2); + case1->GetOpDesc()->RegisterSubgraphIrName("branches", kDynamic); + ComputeGraphPtr branch = std::make_shared("test_branch"); + make_original_graph(branch); + for (int i = 0; i < 2; ++i) { + std::string name("_ascend_mbatch_batch_" + std::to_string(i)); + std::vector input_nodes; + std::vector output_nodes; + ComputeGraphPtr subgraph = GraphUtils::CloneGraph(branch, name, input_nodes, output_nodes); + + subgraph->SetName(name); + subgraph->SetParentNode(case1); + subgraph->SetParentGraph(graph); + graph->AddSubgraph(subgraph->GetName(), subgraph); + + case1->GetOpDesc()->AddSubgraphName(name); + case1->GetOpDesc()->SetSubgraphInstanceName(i, subgraph->GetName()); + } + } +}; + +TEST_F(UtestSubgraphConstMigrationPass, graph_nullptr) { + PassManager pass_manager; + pass_manager.AddPass("SubgraphConstMigrationPass", new (std::nothrow) SubgraphConstMigrationPass); + ComputeGraphPtr graph = std::make_shared("test_graph"); + make_multibatch_graph(graph); + pass_manager.Run(graph); +} +} // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc new file mode 100644 index 00000000..c36d6ea5 --- /dev/null +++ b/tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc @@ -0,0 +1,108 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#define private public +#define protected public +#include "graph/runtime_inference_context.h" +#include "hybrid/executor/subgraph_context.h" +#include "hybrid/node_executor/hccl/hccl_node_executor.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +namespace ge { +using namespace hybrid; + +class UtestHcclNodeExecutor : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) { + OpDescPtr op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + static int32_t index = 0; + op_desc->SetId(index++); + + GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_INT64); + TensorUtils::SetSize(tensor, 64); + vector input_offset; + for (int i = 0; i < in_num; i++) { + op_desc->AddInputDesc(tensor); + input_offset.emplace_back(i * 64); + } + op_desc->SetInputOffset(input_offset); + + vector output_offset; + for (int i = 0; i < out_num; i++) { + op_desc->AddOutputDesc(tensor); + output_offset.emplace_back(in_num * 64 + i * 64); + } + op_desc->SetOutputOffset(output_offset); + + return graph.AddNode(op_desc); +} + +TEST_F(UtestHcclNodeExecutor, test_rdmatask_extract_tensor) { + ComputeGraphPtr graph = std::make_shared("test"); + NodePtr node = CreateNode(*graph, "hcom", HCOMREMOTEREAD, 0, 0); + std::unique_ptr new_node; + ASSERT_EQ(NodeItem::Create(node, new_node), SUCCESS); + NodeItem *node_item = new_node.get(); + node_item->input_start = 0; + node_item->output_start = 0; + + GraphItem graph_item; + GraphExecutionContext graph_context; + SubgraphContext subgraph_context(&graph_item, &graph_context); + ASSERT_EQ(subgraph_context.Init(), SUCCESS); + + auto node_state = subgraph_context.GetOrCreateNodeState(node_item); + ASSERT_NE(node_state, nullptr); + + RuntimeInferenceContext::CreateContext(std::to_string(graph_context.context_id)); + RuntimeInferenceContext *ctx = nullptr; + RuntimeInferenceContext::GetContext(std::to_string(graph_context.context_id), &ctx); + + Shape s({1, 3}); + TensorDesc tensor_desc(s); + Tensor tensor(tensor_desc); + std::vector data = {1, 2, 3, 4}; + tensor.SetData(data); + ctx->SetTensor(1, 0, tensor.Clone()); + + auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); + vector addr_infos; + shared_ptr task = MakeShared(); + task->remote_index_ = {1, 0}; + ASSERT_EQ(task->ExtractTensor(*unique_task_context, addr_infos), PARAM_INVALID); + + Shape s2({1}); + TensorDesc tensor_desc2(s2); + Tensor tensor2(tensor_desc2); + ctx->SetTensor(1, 0, tensor2.Clone()); + task->ExtractTensor(*unique_task_context, addr_infos); + ASSERT_EQ(task->ExtractTensor(*unique_task_context, addr_infos), PARAM_INVALID); + RuntimeInferenceContext::DestroyContext(std::to_string(graph_context.context_id)); +} +} // namespace ge \ No newline at end of file From 0de08b01b59d7b57275b086be1111028faf682f6 Mon Sep 17 00:00:00 2001 From: wjm Date: Fri, 4 Jun 2021 01:34:46 +0800 Subject: [PATCH 12/18] fix format --- ge/graph/build/memory/graph_mem_assigner.cc | 4 ++-- ge/graph/build/memory/graph_mem_assigner.h | 2 +- ge/graph/preprocess/graph_preprocess.cc | 10 +++++----- ge/host_kernels/slice_kernel.cc | 3 ++- ge/host_kernels/slice_kernel.h | 2 +- ge/ir_build/ge_ir_build.cc | 4 ++-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 340bfe3e..ae023c96 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -608,8 +608,8 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { return ge::SUCCESS; } -Status GraphMemoryAssigner::SetMemOffset(const ge::NodePtr &node, InDataAnchorPtr in_data_anchor, bool reverse_refresh, - int64_t &mem_offset, int64_t &continuous_mem_start) { +Status GraphMemoryAssigner::SetMemOffset(const ge::NodePtr &node, const InDataAnchorPtr &in_data_anchor, + bool reverse_refresh, int64_t &mem_offset, int64_t &continuous_mem_start) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); vector output_list_this = op_desc->GetOutputOffset(); diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index b237b5b3..46e27b32 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -146,7 +146,7 @@ class GraphMemoryAssigner { ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); - Status SetMemOffset(const ge::NodePtr &node, InDataAnchorPtr in_data_anchor, bool reverse_refresh, + Status SetMemOffset(const ge::NodePtr &node, const InDataAnchorPtr &in_data_anchor, bool reverse_refresh, int64_t &mem_offset, int64_t &continuous_mem_start); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 5a491f19..d5c261f5 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1430,12 +1430,12 @@ Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTens if (need_check_internal_format) { bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); if (is_internal) { - ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, - {"Input format[" + TypeUtils::FormatToSerialString(format) + "] or origin_format[" + - TypeUtils::FormatToSerialString(origin_format) + "]", "it is not support"}); + ErrorManager::GetInstance().ATCReportErrMessage("E19025", {"situation", "reason"}, {"Input format[" + + TypeUtils::FormatToSerialString(format) + "] or origin_format[" + + TypeUtils::FormatToSerialString(origin_format) + "]", + "it is not support"}); GELOGE(PARAM_INVALID, "[Check][Param] Input format %s or origin_format %s is not support.", - TypeUtils::FormatToSerialString(format).c_str(), - TypeUtils::FormatToSerialString(origin_format).c_str()); + TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str()); return FAILED; } } diff --git a/ge/host_kernels/slice_kernel.cc b/ge/host_kernels/slice_kernel.cc index 83b98111..6e398e96 100644 --- a/ge/host_kernels/slice_kernel.cc +++ b/ge/host_kernels/slice_kernel.cc @@ -155,7 +155,8 @@ Status SliceKernel::Compute(const OpDescPtr attr, const std::vector &v_output) override; Status CheckOutputDims(const std::vector &output_dims, const OpDescPtr attr); - Status CheckInput(const ConstGeTensorPtr x_, const ConstGeTensorPtr begin, const ConstGeTensorPtr size); + Status CheckInput(const ConstGeTensorPtr &x_, const ConstGeTensorPtr &begin, const ConstGeTensorPtr &size); }; } // namespace ge diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 6b6fd1f5..bce72c78 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -538,8 +538,8 @@ graphStatus Impl::Init(const Graph &graph, const std::map Date: Fri, 4 Jun 2021 02:28:19 +0800 Subject: [PATCH 13/18] fix format --- ge/hybrid/node_executor/hccl/hccl_node_executor.cc | 2 +- ge/ir_build/ge_ir_build.cc | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index e01d20fd..150e8ed2 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -178,7 +178,7 @@ Status RdmaNodeTask::Init(TaskContext &context) { Status RdmaNodeTask::SetAddrInfo(TaskContext &context, RuntimeInferenceContext *ctx, uint64_t *data, int64_t row_num, vector &addr_infos) { - TensorValue *tv; + TensorValue *tv = nullptr; if (kRdmaReadTypes.count(context.GetNodeItem().NodeType()) > 0) { tv = context.MutableOutput(local_index_); } else { diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index bce72c78..0c8ff6dd 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -272,8 +272,7 @@ class Impl { graphStatus Init(const Graph &graph, const std::map &options); graphStatus BuildModel(const Graph &graph, const std::map &options, ModelBufferData &ge_models); - graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, - bool is_dynamic_input); + graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, bool is_dynamic_input); graphStatus GetInputShapeRange(const string &input_shape_range, std::map>> &name_shape_range_map, std::vector>> &index_shape_range_map); @@ -540,7 +539,6 @@ graphStatus Impl::Init(const Graph &graph, const std::mapInitDomiOmgContext(input_shape, input_format, net_format, is_dynamic_input_); + return this->InitDomiOmgContext(input_shape, input_format, is_dynamic_input_); } void Impl::SetRtSocVersion() { @@ -687,8 +685,7 @@ graphStatus Impl::BuildModel(const Graph &graph, const std::map Date: Fri, 4 Jun 2021 03:16:26 +0800 Subject: [PATCH 14/18] fix format --- .../subgraph_const_migration_pass_unittest.cc | 248 +++++++++--------- 1 file changed, 124 insertions(+), 124 deletions(-) diff --git a/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc b/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc index 7d3a754d..00157395 100644 --- a/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/subgraph_const_migration_pass_unittest.cc @@ -1,125 +1,125 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include "framework/omg/omg_inner_types.h" -#include "graph/common/local_context.h" -#include "graph/passes/subgraph_const_migration_pass.h" -#include "inc/pass_manager.h" -#include "register/op_registry.h" - -namespace ge { -class UtestSubgraphConstMigrationPass : public testing::Test { - protected: - void SetUp() {} - void TearDown() {} - - public: - NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { - GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); - auto op_desc = std::make_shared(name, type); - for (auto i = 0; i < in_num; ++i) { - op_desc->AddInputDesc(test_desc); - } - for (auto i = 0; i < out_num; ++i) { - op_desc->AddOutputDesc(test_desc); - } - if (type == "Const") { - uint64_t const_value = 101; - auto weight = make_shared(op_desc->GetOutputDesc(0), (uint8_t *)&const_value, sizeof(uint64_t)); - AttrUtils::SetTensor(op_desc, ge::ATTR_NAME_WEIGHTS, weight); - } - return graph->AddNode(op_desc); - } - - void make_original_graph(const ComputeGraphPtr &graph) { - auto data = MakeNode(graph, 1, 1, "data", "Data"); - { - AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_INDEX, 0); - AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1); - } - auto const1 = MakeNode(graph, 0, 1, "const1", "Const"); - { - auto data1 = MakeNode(graph, 1, 1, "data1", "Data"); - AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 1); - AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 2); - GraphUtils::AddEdge(data1->GetOutControlAnchor(), const1->GetInControlAnchor()); - } - - auto const2 = MakeNode(graph, 0, 1, "const2", "Const"); - { - auto data2 = MakeNode(graph, 1, 1, "data2", "Data"); - AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_INDEX, 2); - AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 3); - GraphUtils::AddEdge(data2->GetOutControlAnchor(), const2->GetInControlAnchor()); - } - - auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); - GraphUtils::AddEdge(data->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); - GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); - GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); - } - - void make_multibatch_graph(const ComputeGraphPtr &graph) { - auto index = MakeNode(graph, 1, 1, "index", "Data"); - auto data = MakeNode(graph, 1, 1, "data", "Data"); - auto data1 = MakeNode(graph, 1, 1, "data1", "Data"); - auto data2 = MakeNode(graph, 1, 1, "data2", "Data"); - AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_INDEX, 0); - AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 1); - AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_INDEX, 2); - - auto case1 = MakeNode(graph, 4, 1, "case", "Case"); - GraphUtils::AddEdge(index->GetOutDataAnchor(0), case1->GetInDataAnchor(0)); - GraphUtils::AddEdge(data->GetOutDataAnchor(0), case1->GetInDataAnchor(1)); - GraphUtils::AddEdge(data1->GetOutDataAnchor(0), case1->GetInDataAnchor(2)); - GraphUtils::AddEdge(data2->GetOutDataAnchor(0), case1->GetInDataAnchor(3)); - auto output_node = MakeNode(graph, 1, 0, "output", "NetOutput"); - GraphUtils::AddEdge(case1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); - - AttrUtils::SetInt(case1->GetOpDesc(), ATTR_NAME_BATCH_NUM, 2); - case1->GetOpDesc()->RegisterSubgraphIrName("branches", kDynamic); - ComputeGraphPtr branch = std::make_shared("test_branch"); - make_original_graph(branch); - for (int i = 0; i < 2; ++i) { - std::string name("_ascend_mbatch_batch_" + std::to_string(i)); - std::vector input_nodes; - std::vector output_nodes; - ComputeGraphPtr subgraph = GraphUtils::CloneGraph(branch, name, input_nodes, output_nodes); - - subgraph->SetName(name); - subgraph->SetParentNode(case1); - subgraph->SetParentGraph(graph); - graph->AddSubgraph(subgraph->GetName(), subgraph); - - case1->GetOpDesc()->AddSubgraphName(name); - case1->GetOpDesc()->SetSubgraphInstanceName(i, subgraph->GetName()); - } - } -}; - -TEST_F(UtestSubgraphConstMigrationPass, graph_nullptr) { - PassManager pass_manager; - pass_manager.AddPass("SubgraphConstMigrationPass", new (std::nothrow) SubgraphConstMigrationPass); - ComputeGraphPtr graph = std::make_shared("test_graph"); - make_multibatch_graph(graph); - pass_manager.Run(graph); -} +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include "framework/omg/omg_inner_types.h" +#include "graph/common/local_context.h" +#include "graph/passes/subgraph_const_migration_pass.h" +#include "inc/pass_manager.h" +#include "register/op_registry.h" + +namespace ge { +class UtestSubgraphConstMigrationPass : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} + + public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + if (type == "Const") { + uint64_t const_value = 101; + auto weight = make_shared(op_desc->GetOutputDesc(0), (uint8_t *)&const_value, sizeof(uint64_t)); + AttrUtils::SetTensor(op_desc, ge::ATTR_NAME_WEIGHTS, weight); + } + return graph->AddNode(op_desc); + } + + void make_original_graph(const ComputeGraphPtr &graph) { + auto data = MakeNode(graph, 1, 1, "data", "Data"); + { + AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_INDEX, 0); + AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 1); + } + auto const1 = MakeNode(graph, 0, 1, "const1", "Const"); + { + auto data1 = MakeNode(graph, 1, 1, "data1", "Data"); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 1); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 2); + GraphUtils::AddEdge(data1->GetOutControlAnchor(), const1->GetInControlAnchor()); + } + + auto const2 = MakeNode(graph, 0, 1, "const2", "Const"); + { + auto data2 = MakeNode(graph, 1, 1, "data2", "Data"); + AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_INDEX, 2); + AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, 3); + GraphUtils::AddEdge(data2->GetOutControlAnchor(), const2->GetInControlAnchor()); + } + + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + GraphUtils::AddEdge(data->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + void make_multibatch_graph(const ComputeGraphPtr &graph) { + auto index = MakeNode(graph, 1, 1, "index", "Data"); + auto data = MakeNode(graph, 1, 1, "data", "Data"); + auto data1 = MakeNode(graph, 1, 1, "data1", "Data"); + auto data2 = MakeNode(graph, 1, 1, "data2", "Data"); + AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_INDEX, 0); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 1); + AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_INDEX, 2); + + auto case1 = MakeNode(graph, 4, 1, "case", "Case"); + GraphUtils::AddEdge(index->GetOutDataAnchor(0), case1->GetInDataAnchor(0)); + GraphUtils::AddEdge(data->GetOutDataAnchor(0), case1->GetInDataAnchor(1)); + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), case1->GetInDataAnchor(2)); + GraphUtils::AddEdge(data2->GetOutDataAnchor(0), case1->GetInDataAnchor(3)); + auto output_node = MakeNode(graph, 1, 0, "output", "NetOutput"); + GraphUtils::AddEdge(case1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + + AttrUtils::SetInt(case1->GetOpDesc(), ATTR_NAME_BATCH_NUM, 2); + case1->GetOpDesc()->RegisterSubgraphIrName("branches", kDynamic); + ComputeGraphPtr branch = std::make_shared("test_branch"); + make_original_graph(branch); + for (int i = 0; i < 2; ++i) { + std::string name("_ascend_mbatch_batch_" + std::to_string(i)); + std::vector input_nodes; + std::vector output_nodes; + ComputeGraphPtr subgraph = GraphUtils::CloneGraph(branch, name, input_nodes, output_nodes); + + subgraph->SetName(name); + subgraph->SetParentNode(case1); + subgraph->SetParentGraph(graph); + graph->AddSubgraph(subgraph->GetName(), subgraph); + + case1->GetOpDesc()->AddSubgraphName(name); + case1->GetOpDesc()->SetSubgraphInstanceName(i, subgraph->GetName()); + } + } +}; + +TEST_F(UtestSubgraphConstMigrationPass, subgraph_const_migration) { + PassManager pass_manager; + pass_manager.AddPass("SubgraphConstMigrationPass", new (std::nothrow) SubgraphConstMigrationPass); + ComputeGraphPtr graph = std::make_shared("test_graph"); + make_multibatch_graph(graph); + EXPECT_EQ(pass_manager.Run(graph), SUCCESS); +} } // namespace ge \ No newline at end of file From 015073abefdb75be07e678f12f058f7914c7a8c4 Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 5 Jun 2021 02:18:43 +0800 Subject: [PATCH 15/18] remove --- ge/common/profiling/profiling_manager.cc | 32 +++++++++++++++++------- ge/common/profiling/profiling_manager.h | 2 ++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 8e984bb3..61210de6 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -954,26 +954,37 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs static_cast(&reporter_data), sizeof(ReporterData)); } -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( - const OpDescPtr &op, TaskDescInfo &task_desc_info) const { +void ProfilingManager::GetOpInputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const { std::vector input_format; std::vector> input_shape; std::vector input_data_type; for (size_t i = 0; i < op->GetAllInputsSize(); ++i) { GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i); - GE_IF_BOOL_EXEC(input_tensor_desc == nullptr, continue); - + if (input_tensor_desc == nullptr) { + continue; + } input_format.emplace_back(input_tensor_desc->GetFormat()); input_shape.emplace_back(input_tensor_desc->GetShape().GetDims()); input_data_type.emplace_back(input_tensor_desc->GetDataType()); } + + std::vector format_default = { FORMAT_NULL }; + std::vector> shape_default = { {0} }; + std::vector data_type_default = { DT_UNDEFINED }; + task_desc_info.input_format = input_format.empty() ? format_default : input_format; + task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; + task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; +} + +void ProfilingManager::GetOpOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const { std::vector output_format; std::vector> output_shape; std::vector output_data_type; for (size_t j = 0; j < op->GetOutputsSize(); ++j) { GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j); - GE_IF_BOOL_EXEC(output_tensor_desc == nullptr, continue); - + if (output_tensor_desc == nullptr) { + continue; + } output_format.emplace_back(output_tensor_desc->GetFormat()); output_shape.emplace_back(output_tensor_desc->GetShape().GetDims()); output_data_type.emplace_back(output_tensor_desc->GetDataType()); @@ -982,14 +993,17 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInp std::vector format_default = { FORMAT_NULL }; std::vector> shape_default = { {0} }; std::vector data_type_default = { DT_UNDEFINED }; - task_desc_info.input_format = input_format.empty() ? format_default : input_format; - task_desc_info.input_shape = input_shape.empty() ? shape_default : input_shape; - task_desc_info.input_data_type = input_data_type.empty() ? data_type_default : input_data_type; task_desc_info.output_format = output_format.empty() ? format_default : output_format; task_desc_info.output_shape = output_shape.empty() ? shape_default : output_shape; task_desc_info.output_data_type = output_data_type.empty() ? data_type_default : output_data_type; } +FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetOpInputOutputInfo( + const OpDescPtr &op, TaskDescInfo &task_desc_info) const { + GetOpInputInfo(op, task_desc_info); + GetOpOutputInfo(op, task_desc_info); +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint( std::string &fp_point, std::string &bp_point) { // Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init diff --git a/ge/common/profiling/profiling_manager.h b/ge/common/profiling/profiling_manager.h index 12293fc5..049a4df4 100755 --- a/ge/common/profiling/profiling_manager.h +++ b/ge/common/profiling/profiling_manager.h @@ -111,6 +111,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { uint64_t GetProfilingModule(); void UpdateDeviceIdModuleMap(string prof_type, uint64_t module, const vector &device_list); void UpdateSubscribeDeviceModuleMap(std::string prof_type, uint32_t device_id, uint64_t module); + void GetOpInputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; + void GetOpOutputInfo(const OpDescPtr &op, TaskDescInfo &task_desc_info) const; bool is_load_profiling_; bool is_execute_profiling_; From 67de533b5a4c8234af83f664ed5328e5a8367f6d Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 5 Jun 2021 04:19:56 +0800 Subject: [PATCH 16/18] modify --- ge/ge_runtime/task/hccl_task.cc | 2 +- ge/graph/passes/base_pass.cc | 33 +++++++++++++++++-------- ge/graph/passes/flow_ctrl_pass.cc | 12 ++++++--- ge/graph/preprocess/graph_preprocess.cc | 8 +++--- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc index 65690683..ac39412d 100644 --- a/ge/ge_runtime/task/hccl_task.cc +++ b/ge/ge_runtime/task/hccl_task.cc @@ -154,7 +154,7 @@ bool HcclTask::SetSecondaryStream() { return false; } stream = std::make_shared(rt_model_handle_, new_stream); - GE_IF_BOOL_EXEC(stream == nullptr, return false); + GE_RT_FALSE_CHECK_NOTNULL(stream); secondary_stream_vec[index] = stream; } diff --git a/ge/graph/passes/base_pass.cc b/ge/graph/passes/base_pass.cc index 16c05647..165e7e81 100755 --- a/ge/graph/passes/base_pass.cc +++ b/ge/graph/passes/base_pass.cc @@ -199,6 +199,24 @@ void ClearOption(NamesToPass names_to_pass) { name_to_pass.second->ClearOptions(); } } + +bool CheckNode(const NodePtr &node, const DuringPassNodeSets &during_pass_node_set) { + if (node == nullptr) { + GELOGW("node is null"); + return false; + } + if (during_pass_node_set.nodes_deleted.count(node) > 0) { + GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); + return false; + } + if (during_pass_node_set.nodes_suspend.count(node) > 0) { + GELOGD("The node %s has been added to suspend-iteration nodes list, the iteration of it will be suspend.", + node->GetName().c_str()); + return false; + } + + return true; +} } // namespace Status BaseNodePass::IsolateAndDeleteNode(NodePtr &node, const std::vector &io_map) { @@ -277,17 +295,9 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { nodes.pop_front(); (void)during_pass_node_set.nodes_re_pass.erase(node); - GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue); - if (during_pass_node_set.nodes_deleted.count(node) > 0) { - GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str()); - continue; - } - if (during_pass_node_set.nodes_suspend.count(node) > 0) { - GELOGD("The node %s has been added to suspend-iteration nodes list, the iteration of it will be suspend.", - node->GetName().c_str()); + if (!CheckNode(node, during_pass_node_set)) { continue; } - AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set); auto ret = RunPasses(node, names_to_passes, during_pass_node_set); @@ -333,8 +343,11 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) { during_pass_node_set.nodes_last.clear(); } while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes); - GE_IF_BOOL_EXEC(re_pass_times == kMaxRePassTimes, GELOGW("re_pass_times should not come to %d", kMaxRePassTimes)); + if (re_pass_times == kMaxRePassTimes) { + GELOGW("re_pass_times should not come to %d", kMaxRePassTimes); + } GELOGD("All passes runs end"); + return SUCCESS; } Status GEPass::RunPassesOnSubGraph(const NodePtr &node, const NamesToPass &names_to_passes, bool &has_sub_graph) { diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index 469d2696..87896dc3 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -41,7 +41,9 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { bool graph_change = false; // 1. Add FP/BP flow ctrl (big cycle) for (auto &node : compute_graph->GetDirectNode()) { - GE_IF_BOOL_EXEC(node == nullptr, continue); + if (node == nullptr) { + continue; + } GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); uint32_t true_stream_id = 0; bool is_found = AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_TRUE_BRANCH_STREAM, true_stream_id); @@ -63,12 +65,14 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { // 2. Add special node flow ctrl. eg, IteratorGetNext. (small cycle) // NOTE: Small cycle share the variables with big cycle. for (auto &node : compute_graph->GetDirectNode()) { - GE_IF_BOOL_EXEC(node == nullptr, continue); + if (node == nullptr) { + continue; + } GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, continue); bool need_cycle_flag = false; - bool is_found = AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_STREAM_CYCLE_EVENT_FLAG, need_cycle_flag); + (void)AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_STREAM_CYCLE_EVENT_FLAG, need_cycle_flag); // small cycle flag is need_stream_cycle_event == true - if (is_found && need_cycle_flag) { + if (need_cycle_flag) { Status ret = AddSpecialNodeIteratorCtrl(compute_graph, node); if (ret != SUCCESS) { GELOGE(ret, "[Add][SpecialNodeIteratorCtrl] failed, node:%s, graph:%s.", diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index d5c261f5..0719adc6 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1475,9 +1475,11 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input, GeTensorDesc desc(user_input[index].GetTensorDesc()); // data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); - GE_CHK_STATUS_RET(CheckInternalFormat(input_node, desc, tune_flag), "[Check][InternalFormat] on %s failed.", - op->GetName().c_str()); - + ret = CheckInternalFormat(input_node, desc, tune_flag); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "[Check][InternalFormat] on %s failed", op->GetName().c_str()); + return ret; + } auto data_type = desc.GetDataType(); uint32_t length = 1; bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); From 88809a3e006e38bca0a15ca385854263d4bd1351 Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 5 Jun 2021 04:25:19 +0800 Subject: [PATCH 17/18] fix --- ge/ge_runtime/task/hccl_task.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/ge_runtime/task/hccl_task.cc b/ge/ge_runtime/task/hccl_task.cc index ac39412d..2ffe5185 100644 --- a/ge/ge_runtime/task/hccl_task.cc +++ b/ge/ge_runtime/task/hccl_task.cc @@ -155,7 +155,6 @@ bool HcclTask::SetSecondaryStream() { } stream = std::make_shared(rt_model_handle_, new_stream); GE_RT_FALSE_CHECK_NOTNULL(stream); - secondary_stream_vec[index] = stream; } secondary_stream_list_.push_back(stream); From 0cec9f8ddefd469380951828229f68d83d7ed697 Mon Sep 17 00:00:00 2001 From: wjm Date: Sat, 5 Jun 2021 07:25:44 +0800 Subject: [PATCH 18/18] fix --- ge/graph/optimize/mem_rw_conflict_optimize.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ge/graph/optimize/mem_rw_conflict_optimize.cc b/ge/graph/optimize/mem_rw_conflict_optimize.cc index 30fcabef..7e7ab908 100644 --- a/ge/graph/optimize/mem_rw_conflict_optimize.cc +++ b/ge/graph/optimize/mem_rw_conflict_optimize.cc @@ -743,10 +743,8 @@ Status GraphOptimize::HandleMemoryRWConflict(ComputeGraphPtr &compute_graph) { continue; } // ignore data / netoutput of subgraph - if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX)) { - if (node->GetType() == DATA || node->GetType() == NETOUTPUT) { - continue; - } + if (IsSubgraphInputNode(node) || IsSubgraphOutputNode(node)) { + continue; } bool identity_reserved = false;