From e0a5b21daa071812c2a87111a7c24c457090a8fb Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Wed, 30 Dec 2020 16:05:37 +0800 Subject: [PATCH 01/54] Memory optimization during model loading --- .../load/new_model_manager/data_dumper.cc | 7 ++++ ge/graph/load/new_model_manager/data_dumper.h | 10 +++--- .../load/new_model_manager/davinci_model.cc | 32 ++++++++++++------- .../load/new_model_manager/davinci_model.h | 12 +++++-- ...ew_model_manager_davinci_model_unittest.cc | 11 +++++++ 5 files changed, 54 insertions(+), 18 deletions(-) diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc index b33a062d..f7f23dc1 100644 --- a/ge/graph/load/new_model_manager/data_dumper.cc +++ b/ge/graph/load/new_model_manager/data_dumper.cc @@ -830,6 +830,13 @@ Status DataDumper::UnloadDumpInfo() { return SUCCESS; } +void DataDumper::DumpShrink() { + compute_graph_.reset(); + input_map_.clear(); + ref_info_.clear(); + op_list_.clear(); +} + void DataDumper::PrintCheckLog(string &dump_list_key) { std::set model_list = dump_properties_.GetAllDumpModel(); if (model_list.empty()) { diff --git a/ge/graph/load/new_model_manager/data_dumper.h b/ge/graph/load/new_model_manager/data_dumper.h index 46ead310..8e612688 100755 --- a/ge/graph/load/new_model_manager/data_dumper.h +++ b/ge/graph/load/new_model_manager/data_dumper.h @@ -83,6 +83,8 @@ class DataDumper { Status UnloadDumpInfo(); + void DumpShrink(); + void SetDumpProperties(const DumpProperties &dump_properties) { dump_properties_ = dump_properties; } const DumpProperties &GetDumpProperties() const { return dump_properties_; } bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const; @@ -112,18 +114,18 @@ class DataDumper { struct InnerInputMapping; std::vector op_desc_info_; - std::vector op_list_; + std::vector op_list_; // release after DavinciModel::Init uint32_t end_graph_task_id_ = 0; uint32_t end_graph_stream_id_ = 0; bool is_end_graph_ = false; - std::multimap input_map_; + std::multimap input_map_; // release after DavinciModel::Init bool load_flag_; uint32_t device_id_; uintptr_t global_step_; uintptr_t loop_per_iter_; uintptr_t loop_cond_; - ComputeGraphPtr compute_graph_; - std::map ref_info_; + ComputeGraphPtr compute_graph_; // release after DavinciModel::Init + std::map ref_info_; // release after DavinciModel::Init void *l1_fusion_addr_ = nullptr; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 7721739b..7427489b 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -150,14 +150,7 @@ DavinciModel::~DavinciModel() { GELOGW("UnloadDumpInfo failed, ret: %u.", ret); } - for (const auto &op_and_addr : saved_task_addrs_) { - auto addr = op_and_addr.second; - if (addr != nullptr) { - GE_CHK_RT(rtFree(addr)); - } - addr = nullptr; - } - saved_task_addrs_.clear(); + ClearTaskAddrs(); GE_CHK_STATUS(ModelRunStop()); @@ -221,6 +214,17 @@ DavinciModel::~DavinciModel() { } } +void DavinciModel::ClearTaskAddrs() { + for (const auto &op_and_addr : saved_task_addrs_) { + auto addr = op_and_addr.second; + if (addr != nullptr) { + GE_CHK_RT(rtFree(addr)); + } + addr = nullptr; + } + saved_task_addrs_.clear(); +} + void DavinciModel::UnbindHcomStream() { if (!all_hccl_stream_list_.empty()) { for (size_t i = 0; i < all_hccl_stream_list_.size(); i++) { @@ -263,7 +267,10 @@ Status DavinciModel::Assign(const GeModelPtr &ge_model) { /// void DavinciModel::Shrink() { skt_info_ = {0, 0, 0, 0, nullptr, nullptr, {}, {}, {}, {}, {}, RT_KERNEL_DEFAULT, -1, 0, nullptr}; + DumperShrink(); ge_model_.reset(); // delete object. + op_list_.clear(); + ClearTaskAddrs(); } Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) { @@ -738,7 +745,6 @@ Status DavinciModel::ReportProfilingData() { } ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info); GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed."); - op_list_.clear(); return SUCCESS; } @@ -963,7 +969,9 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod } data_by_index[data_index] = op_desc; - data_op_list_.push_back(op_desc); + auto data_op = AttrUtils::CopyOpDesc(op_desc); + GE_CHECK_NOTNULL(data_op); + data_op_list_.push_back(data_op); if (known_node_) { return SUCCESS; } @@ -1019,7 +1027,9 @@ Status DavinciModel::OptInputOutputInfo(const map &data_by_ data_op_list_.clear(); for (auto &item : data_by_index) { - data_op_list_.emplace_back(item.second); + auto data_op = AttrUtils::CopyOpDesc(item.second); + GE_CHECK_NOTNULL(data_op); + data_op_list_.emplace_back(data_op); auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); input_addrs_list_.emplace_back(output_addrs); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 906c0548..10cda88d 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -481,6 +481,10 @@ class DavinciModel { data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); } + void DumperShrink() { + data_dumper_.DumpShrink(); + } + void SetEndGraphId(uint32_t task_id, uint32_t stream_id); DavinciModel &operator=(const DavinciModel &model) = delete; @@ -644,6 +648,8 @@ class DavinciModel { void ReleaseTask(); + void ClearTaskAddrs(); + void UnbindTaskSinkStream(); bool IsAicpuKernelConnectSpecifiedLayer(); @@ -875,12 +881,12 @@ class DavinciModel { string om_name_; uint32_t version_; - GeModelPtr ge_model_; + GeModelPtr ge_model_; // release after DavinciModel::Init bool need_destroy_aicpu_kernel_{false}; vector out_node_name_; - map op_list_; + map op_list_; // release after DavinciModel::Init // data op_desc vector data_op_list_; @@ -975,7 +981,7 @@ class DavinciModel { DataDumper data_dumper_; uint64_t iterator_count_; bool is_l1_fusion_enable_; - map saved_task_addrs_; + map saved_task_addrs_; // release after DavinciModel::Init void *l1_fusion_addr_ = nullptr; bool known_node_ = false; diff --git a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc index 44642f93..00069930 100644 --- a/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/new_model_manager_davinci_model_unittest.cc @@ -254,6 +254,17 @@ TEST_F(UtestModelManagerDavinciModel, eventlist_success) { delete model; } +// test Shrink +TEST_F(UtestModelManagerDavinciModel, shrink_success) { + DavinciModel model(0, g_label_call_back); + OpDescPtr op_desc_ptr = make_shared("Cast", "Cast"); + void *addr = nullptr; + rtMalloc(&addr, 128, RT_MEMORY_HBM); + model.saved_task_addrs_.emplace(op_desc_ptr, addr); + model.Shrink(); + EXPECT_EQ(model.saved_task_addrs_.isEmpty(), true); +} + // test rtMalloc TEST_F(UtestModelManagerDavinciModel, failed_reset_device) { DavinciModel model(0, g_label_call_back); From 7d336c66a6088af8681f1c52da7a1ca12a139a3f Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Tue, 29 Dec 2020 21:43:50 +0800 Subject: [PATCH 02/54] Free memory before return --- ge/common/profiling/profiling_manager.cc | 6 +- ge/graph/build/model_builder.cc | 12 ++-- .../load/new_model_manager/model_manager.cc | 36 ++++++---- tests/ut/ge/CMakeLists.txt | 1 + ...el_manager_model_manager_aicpu_unittest.cc | 66 +++++++++++++++++++ 5 files changed, 101 insertions(+), 20 deletions(-) create mode 100644 tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index abc4a6df..92417286 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -94,7 +94,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt if (options.profiling_mode == "1" && !options.profiling_options.empty()) { // enable profiling by ge option if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(), - MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { + MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { GELOGE(INTERNAL_ERROR, "copy profiling_options failed."); return INTERNAL_ERROR; } @@ -124,8 +124,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt return ge::PARAM_INVALID; } - if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), - MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) { + if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(), MSPROF_OPTIONS_DEF_LEN_MAX - 1) != + EOK) { GELOGE(INTERNAL_ERROR, "copy job_id failed."); return INTERNAL_ERROR; } diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 77f8f237..de586275 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -805,7 +805,7 @@ Status ModelBuilder::CompileSingleOp() { } void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &aicpu_op_types, - std::set &aicpu_tf_op_types) { + std::set &aicpu_tf_op_types) { std::string aicpu_optype; bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype); std::vector tf_optypes; @@ -822,7 +822,7 @@ void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set &aicpu_op_types, - std::set &aicpu_tf_op_types) { + std::set &aicpu_tf_op_types) { std::vector aicpu_optype_list; std::vector aicpu_tf_optype_list; if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) { @@ -839,10 +839,10 @@ void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::setGetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(), - aicpu_tf_optype_list.size()); + "Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, " + "aicpu_tf_optype_list:%zu.", + compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(), + aicpu_tf_optype_list.size()); GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return, "Set attr needCheckCpu fail."); diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 4b0dbee0..01075255 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1563,6 +1563,11 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op size_t aicpu_op_nums = aicpu_optype_list.size(); size_t tf_op_nums = aicpu_tf_optype_list.size(); size_t op_nums = aicpu_op_nums + tf_op_nums; + std::function callback = [&]() { + for (auto mem : allocated_mem) { + GE_CHK_RT(rtFree(mem)); + } + }; // malloc sysOpInfoList in SysOpCheckInfo status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { @@ -1575,6 +1580,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_res_op_list); @@ -1583,6 +1589,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_ret_code_list); @@ -1594,6 +1601,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1611,6 +1619,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1639,37 +1648,39 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&args, args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); + GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); - GE_CHK_RT( - rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast(&op_check_info_req), sizeof(SysOpCheckInfo), RT_MEMCPY_HOST_TO_DEVICE)); - GE_CHK_RT(rtMemcpy(reinterpret_cast(static_cast(static_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen)), - sizeof(SysOpCheckResp), reinterpret_cast(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast(&op_check_info_req), sizeof(SysOpCheckInfo), + RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT(rtMemcpy( + reinterpret_cast(static_cast(static_cast(reinterpret_cast(args)) + + op_check_info_req.offSetLen)), sizeof(SysOpCheckResp), reinterpret_cast(&op_check_info_res), + sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream)); status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); + GE_MAKE_GUARD(release, callback); + GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(status); } // Check the response - SysOpCheckResp *d_op_check_info_res = reinterpret_cast(reinterpret_cast(static_cast(static_cast(reinterpret_cast(args)) + op_check_info_req.offSetLen))); + SysOpCheckResp *d_op_check_info_res = + reinterpret_cast(reinterpret_cast(static_cast(static_cast( + reinterpret_cast(args)) + op_check_info_req.offSetLen))); (void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp)); GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp), RT_MEMCPY_DEVICE_TO_HOST)); - std::function callback = [&]() { - for (auto mem : allocated_mem) { - GE_CHK_RT(rtFree(mem)); - } - GE_CHK_RT(rtStreamDestroy(stream)); - }; if (op_check_info_res.isWithoutJson) { GELOGI("No need to check aicpu in this scenoria."); GE_MAKE_GUARD(release, callback); + GE_CHK_RT(rtStreamDestroy(stream)); return SUCCESS; } uint64_t res_op_nums = op_check_info_res.opListNum; @@ -1688,6 +1699,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { GELOGE(FAILED, "Number of retcode is not equal to number of op type."); GE_MAKE_GUARD(release, callback); + GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } std::string fail_reason; @@ -1711,10 +1723,12 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op fail_reason += "not support."; GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); GE_MAKE_GUARD(release, callback); + GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } GE_MAKE_GUARD(release, callback); + GE_CHK_RT(rtStreamDestroy(stream)); GELOGI("Cpu kernel launch check optype task success."); return SUCCESS; } diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 175774bb..fbeb9867 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -562,6 +562,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES #"graph/load/new_model_manager_davinci_model_unittest.cc" #"graph/load/new_model_manager_model_manager_unittest.cc" #"graph/load/new_model_manager_task_build_unittest.cc" + "graph/load/new_model_manager_model_manager_aicpu_unittest.cc" "graph/load/end_graph_task_unittest.cc" "graph/load/new_model_manager_event_manager_unittest.cc" #"graph/load/output_net_output_unittest.cc" diff --git a/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc new file mode 100644 index 00000000..0539bcdb --- /dev/null +++ b/tests/ut/ge/graph/load/new_model_manager_model_manager_aicpu_unittest.cc @@ -0,0 +1,66 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include "common/debug/log.h" +#include "common/model_parser/base.h" +#include "common/properties_manager.h" +#include "common/types.h" +#include "common/l2_cache_optimize.h" + +#define private public +#define protected public +#include "graph/load/new_model_manager/model_manager.h" + +#include "common/helper/om_file_helper.h" +#include "common/op/ge_op_utils.h" +#include "graph/load/graph_loader.h" +#include "graph/load/new_model_manager/davinci_model.h" +#include "graph/load/new_model_manager/davinci_model_parser.h" +//#include "new_op_test_utils.h" +#undef private +#undef protected + +using namespace std; +using namespace testing; + +namespace ge { + +const static std::string ENC_KEY = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"; + +class UtestModelManagerModelManagerAicpu : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestModelManagerModelManagerAicpu, checkAicpuOptype) { + ModelManager model_manager; + uint32_t model_id = 0; + std::vector aicpu_op_list; + std::vector aicpu_tf_list; + aicpu_tf_list.emplace_back("FrameworkOp"); + aicpu_tf_list.emplace_back("Unique"); + + model_manager.LaunchKernelCheckAicpuOp(aicpu_op_list, aicpu_tf_list); + // Load allow listener is null + //EXPECT_EQ(ge::FAILED, mm.LoadModelOffline(model_id, data, nullptr, nullptr)); +} + +} // namespace ge From 0074b0b48f295ede76586cc187e2b12c26121a5d Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Wed, 30 Dec 2020 16:29:29 +0800 Subject: [PATCH 03/54] revert broadcast in train graph related --- ge/graph/build/memory/block_mem_assigner.cc | 95 +---- ge/graph/build/memory/block_mem_assigner.h | 6 +- .../load/new_model_manager/davinci_model.cc | 12 + ge/graph/manager/graph_manager.cc | 3 - ge/graph/passes/hccl_memcpy_pass.cc | 333 ++---------------- ge/graph/passes/hccl_memcpy_pass.h | 17 - ge/graph/preprocess/graph_preprocess.cc | 3 + 7 files changed, 61 insertions(+), 408 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 3acd4a7f..9dc0cf73 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -551,31 +551,11 @@ void GetMaxBatchAllMemorySize(std::map> &batch_all_ } } -void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) { - auto node_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(node_op_desc == nullptr, return); - // if input size just one and from variable, no need to reassign continuous memory - bool is_input_continuous = false; - (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) { - auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor(); - GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return); - auto in_node = peer_out_anchor->GetOwnerNode(); - GE_IF_BOOL_EXEC(in_node == nullptr, return); - if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) { - GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str()); - (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); - } - } -} - void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { vector temp; std::map> batch_all_memory_size; std::map batch_total_size; for (const NodePtr &n : compute_graph_->GetAllNodes()) { - MarkContinuousAllocedForOneInputFromVariable(n); - auto node_op_desc = n->GetOpDesc(); GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); @@ -1081,53 +1061,18 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, return block; } -void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, - const NodePtr &n) { - const auto node_op_desc = n->GetOpDesc(); - for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { - int32_t reuse_in_index = -1; - if (!GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) { - isAllOutputRef = false; - break; - } else { - zero_memory_list_.emplace_back(n, kOutput, index); - isOutputHasRef = true; - } - } -} - - -Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, +MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem) { - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); auto node_op_desc = n->GetOpDesc(); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); - - // continuous output support ref only when all output ref input - bool isAllOutputRef = true; - bool isOutputHasRef = false; - - ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n); - - if (isAllOutputRef) { - GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str()); - return SUCCESS; - } - - if (!isAllOutputRef && isOutputHasRef) { - GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", - n->GetName().c_str()); - return INTERNAL_ERROR; - } - + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); MemoryBlock *block = nullptr; int64_t total_size = 0; int64_t memory_type = RT_MEMORY_HBM; for (uint32_t index = 0; index < static_cast(node_op_desc->GetOutputsSize()); index++) { auto output_op_desc = node_op_desc->GetOutputDescPtr(index); if (output_op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); - return INTERNAL_ERROR; + return nullptr; } if (CheckIsZeroMemNodeType(n->GetType())) { @@ -1137,8 +1082,8 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorGetName().c_str(), index); - return INTERNAL_ERROR; + GELOGI("Get size failed"); + return nullptr; } size_t align_size = static_cast(size); AlignMemOffset(align_size); @@ -1161,7 +1106,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vectorfirst_continuous_block_ = true; block->last_continuous_block_ = true; - } else { - GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); - return INTERNAL_ERROR; } - return SUCCESS; + return block; } MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector &ranges, @@ -1191,8 +1133,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, NodeIndexIO node_index_io(n, index, kOut); int64_t size = 0; auto output_op_desc = node_op_desc->GetOutputDescPtr(index); - GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); + if (output_op_desc != nullptr) { + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); + } size_t no_align_size = 0; GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, return nullptr, "Get no align size failed"); @@ -1203,14 +1146,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); block->ref_count_++; } else { - // if ref input is variable, can not find symbol, must judge alone - int32_t reuse_in_index = -1; - if (GraphUtils::IsRefFromInput(n->GetOutDataAnchor(index), reuse_in_index)) { - zero_memory_list_.emplace_back(n, kOutput, index, false); - GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index); - return nullptr; - } - int64_t max_size = size; int64_t memory_type = RT_MEMORY_HBM; auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); @@ -1458,7 +1393,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); if (IsContinuousOutput(node)) { - return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); + (void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); + return SUCCESS; } for (uint32_t i = 0; i < static_cast(op_desc->GetOutputsSize()); i++) { int64_t size = 0; @@ -1952,8 +1888,9 @@ Status BlockMemAssigner::Assign() { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || - (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || - (node_type == ASSIGN) || (node_type == HVDWAIT); + (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || + (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || + (node_type == HVDCALLBACKBROADCAST); } bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index 7e76081d..d514ca34 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -420,11 +420,7 @@ class BlockMemAssigner : public MemAssigner { bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); - void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); - - Status ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); - - void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); + MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector &ranges, const bool is_op_reuse_mem); std::unordered_map>> reusable_blocks_; diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 7721739b..b7714c4a 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2099,6 +2099,12 @@ Status DavinciModel::SyncVarData() { RT_MEMCPY_HOST_TO_DEVICE)); } + for (auto op_desc : variable_op_list_) { + ret = + VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, + op_desc->GetName().c_str()); + } return ret; } @@ -2571,6 +2577,12 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b /// Status DavinciModel::ReturnNoOutput(uint32_t data_id) { GELOGI("ReturnNoOutput model id:%u", model_id_); + for (auto op_desc : variable_op_list_) { + Status ret = VarManager::Instance(session_id_) + ->SyncBroadCastData2Var(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_); + GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, + op_desc->GetName().c_str()); + } GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); std::vector outputs; diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 030b864e..beb7cd42 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -93,7 +93,6 @@ #include "graph/passes/unused_args_clean_pass.h" #include "graph/passes/global_step_insert_pass.h" #include "graph/passes/memcpy_addr_async_pass.h" -#include "graph/passes/hccl_memcpy_pass.h" #include "graph/build/label_allocator.h" #include "graph/utils/tensor_adapter.h" #include "inc/pass_manager.h" @@ -2122,8 +2121,6 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { new (std::nothrow) TransOpWithoutReshapeFusionPass)) GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", new (std::nothrow) TransOpBreadthFusionPass)) - GE_CHK_STATUS_RET( - after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); GE_TIMESTAMP_START(after_merge_passes); auto ret = after_merge_passes.Run(compute_graph); diff --git a/ge/graph/passes/hccl_memcpy_pass.cc b/ge/graph/passes/hccl_memcpy_pass.cc index a67b917f..21747f42 100755 --- a/ge/graph/passes/hccl_memcpy_pass.cc +++ b/ge/graph/passes/hccl_memcpy_pass.cc @@ -28,157 +28,50 @@ namespace { const int32_t kAnchorSize = 1; const int kAnchorNum = 0; -const int32_t kAnchorAssignRefIndex = 0; -const int32_t kAnchorAssignValueIndex = 1; const char *const kInputMutable = "_input_mutable"; } // namespace namespace ge { Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) { - Status ret = SUCCESS; GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID); for (const auto &node : graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str()); - return INTERNAL_ERROR; - } - - ret = ContinuousInputProcess(graph, node); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str()); - return ret; - } - - ret = MutableInputProcess(graph, node); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str()); - return ret; - } - - ret = P2pmemInputProcess(graph, node); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str()); - return ret; - } - - } - return ret; -} - -// If node has _input_mutable attr, means input mem may be modified when op execute. -// In order to avoid to affect another op execute with same input when data modified, -// need to inset memcpy node between. -// also works on situation that input is variable or const. -Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { - auto op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(op_desc == nullptr, continue); - bool node_input_mutable = false; - if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { - return SUCCESS; - } - - if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) { - GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); - return FAILED; - } - if (!node_input_mutable) { - return SUCCESS; - } - - GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str()); - for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { - if (hccl_in_anchor == nullptr) { + bool node_input_mutable = false; + if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { continue; } - auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(src_out_anchor); - int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); - if (src_out_anchor_size == kAnchorSize) { - // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. - if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } - } + GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable), + GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED); + if (!node_input_mutable) { continue; } - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; - } - } - return SUCCESS; -} - -// If broadcast input size is bigger than 1, and input from variable, -// cause by broadcast input memory should be continuous, -// another featuremap mem will be allocated for broadcast input. -// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step. -// In order to avoid move action out of model, use memcpy node instead of move action code. -Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { - auto op_desc = node->GetOpDesc(); - - bool is_input_continuous = false; - (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - - if (is_input_continuous && op_desc->GetInputsSize() > 1) { - GELOGI("continuous input op is:%s.", op_desc->GetName().c_str()); - // if input size bigger than one, insert memcpy between var data for support continous mem alloc + GELOGI("hcom op is:%s.", op_desc->GetName().c_str()); for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) { if (hccl_in_anchor == nullptr) { continue; } auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - if (src_out_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - - if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { - Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); - return ret; + GE_CHECK_NOTNULL(src_out_anchor); + + int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size(); + if (src_out_anchor_size == kAnchorSize) { + // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. + NodePtr src_node = src_out_anchor->GetOwnerNode(); + std::string src_type = src_node->GetType(); + bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT); + if (check_src_type) { + Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); + if (ret != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); + return ret; + } } + continue; } - } - } - return SUCCESS; -} - -// if input is var type, and node input need p2p mem, then memcpy should be insert between the two -Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) { - auto op_desc = node->GetOpDesc(); - - vector input_memory_types; - (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types); - if (input_memory_types.empty()) { - return SUCCESS; - } - - for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) { - if (input_memory_types[index] != RT_MEMORY_P2P_DDR) { - continue; - } - - GELOGD("p2p input op is:%s.", op_desc->GetName().c_str()); - auto hccl_in_anchor = node->GetInDataAnchor(index); - if (hccl_in_anchor == nullptr) { - continue; - } - auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor(); - if (src_out_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - - if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) { Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "Failed to modify the connection."); @@ -189,12 +82,8 @@ Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const No return SUCCESS; } -bool HcclMemcpyPass::IsDataNode(const std::string& node_type) { - return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT); -} - /// -/// @brief Add Identity Node +/// @brief Add MemcpyAsync Node /// @param [in] ge::ComputeGraphPtr graph /// @param [in] ge::OutDataAnchorPtr in_node /// @return ge::NodePtr @@ -212,20 +101,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O node_name = CheckDuplicateName(node_name); OpDescPtr op_desc = MakeShared(node_name.c_str(), IDENTITY); if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail."); + GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail."); return nullptr; } - GELOGI("Create Identity op:%s.", op_desc->GetName().c_str()); + GELOGI("Create identity op:%s.", op_desc->GetName().c_str()); graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail."); + GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail."); return nullptr; } ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail."); + GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail."); return nullptr; } // because history reason ,this pass can not do work after constant fold so mark it @@ -233,7 +122,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O NodePtr memcpy_node = graph->AddNode(op_desc); if (memcpy_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); + GELOGE(INTERNAL_ERROR, "Insert identity node fail."); return nullptr; } @@ -266,38 +155,7 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) { /// Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor) { - GE_CHECK_NOTNULL(src_out_anchor->GetOwnerNode()); - GE_CHECK_NOTNULL(hccl_in_anchor->GetOwnerNode()); - - Status ret = InsertIdentityBeforeHccl(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add identity failed, var_node:%s, hccl_node:%s.", - src_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - return ret; - } - - ret = InsertAssignAfterBroadcastIfNeed(graph, src_out_anchor, hccl_in_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "add assign failed, var_node:%s, hccl_node:%s.", - src_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - return ret; - } - return SUCCESS; -} - -/// -/// @brief Insert Identity node Between Hccl node and variable -/// @param [in] ComputeGraphPtr graph -/// @param [in] OutDataAnchorPtr src_out_anchor -/// @param [in] InDataAnchorPtr hccl_in_anchor -/// @return status -/// -Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor) { - GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); + GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str()); NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor); GE_CHECK_NOTNULL(memcpy_node); @@ -324,139 +182,6 @@ Status HcclMemcpyPass::InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, co } return SUCCESS; } - -/// -/// @brief Insert assign node after broadcast node and variable to refresh variable data -/// @param [in] ComputeGraphPtr graph -/// @param [in] OutDataAnchorPtr var_out_anchor -/// @param [in] InDataAnchorPtr hccl_in_anchor -/// @return status -/// -Status HcclMemcpyPass::InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, - const OutDataAnchorPtr &var_out_anchor, - const InDataAnchorPtr &hccl_in_anchor) { - if (hccl_in_anchor->GetOwnerNode()->GetType() != HCOMBROADCAST) { - GELOGD("%s not broadcast, no need to insert assign node", hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - return SUCCESS; - } - - if (var_out_anchor->GetOwnerNode()->GetType() != VARIABLE) { - GELOGD("%s not variable, no need to insert assign node", var_out_anchor->GetOwnerNode()->GetName().c_str()); - return SUCCESS; - } - - GELOGI("after op %s and op %s need insert assign op.", var_out_anchor->GetOwnerNode()->GetName().c_str(), - hccl_in_anchor->GetOwnerNode()->GetName().c_str()); - - for (auto peer_in_anchor : var_out_anchor->GetPeerInDataAnchors()) { - if (peer_in_anchor->GetOwnerNode()->GetType() == ASSIGN) { - GELOGD("variable %s out assign node is exist.", var_out_anchor->GetOwnerNode()->GetName().c_str()); - return SUCCESS; - } - } - - NodePtr assign_node = CreateAssignNode(graph, var_out_anchor); - GE_CHECK_NOTNULL(assign_node); - - OutDataAnchorPtr hccl_out_anchor = hccl_in_anchor->GetOwnerNode()->GetOutDataAnchor(hccl_in_anchor->GetIdx()); - GE_CHECK_NOTNULL(hccl_out_anchor); - - Status ret = hccl_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignValueIndex)); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", hccl_out_anchor->GetOwnerNode()->GetName().c_str(), - assign_node->GetName().c_str()); - return FAILED; - } - - ret = var_out_anchor->LinkTo(assign_node->GetInDataAnchor(kAnchorAssignRefIndex)); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link anchor %s fail.", var_out_anchor->GetOwnerNode()->GetName().c_str(), - assign_node->GetName().c_str()); - return FAILED; - } - - // add control edge between assign node and node after broadcast node - OutControlAnchorPtr assign_out_control_anchor = assign_node->GetOutControlAnchor(); - GE_CHECK_NOTNULL(assign_out_control_anchor); - - for (auto in_data_anchor : hccl_out_anchor->GetPeerInDataAnchors()) { - if (in_data_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { - continue; - } - ret = assign_out_control_anchor->LinkTo(in_data_anchor->GetOwnerNode()->GetInControlAnchor()); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), - in_data_anchor->GetOwnerNode()->GetName().c_str()); - return FAILED; - } - } - - for (auto in_control_anchor : hccl_out_anchor->GetOwnerNode()->GetOutControlAnchor()->GetPeerInControlAnchors()) { - if (in_control_anchor->GetOwnerNode()->GetName() == assign_node->GetName()) { - continue; - } - ret = assign_out_control_anchor->LinkTo(in_control_anchor); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "The op %s link control anchor %s fail.", assign_out_control_anchor->GetOwnerNode()->GetName().c_str(), - in_control_anchor->GetOwnerNode()->GetName().c_str()); - return FAILED; - } - } - return SUCCESS; -} - -/// -/// @brief create assign Node, add to graph -/// @param [in] ge::ComputeGraphPtr graph -/// @param [in] ge::OutDataAnchorPtr variable node out anchor -/// @return ge::NodePtr -/// -NodePtr HcclMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor) { - GE_IF_BOOL_EXEC(graph == nullptr, return nullptr); - NodePtr pre_node = out_data_anchor->GetOwnerNode(); - OpDescPtr pre_op_desc = pre_node->GetOpDesc(); - if (pre_op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "OpDesc of pre node is invalid."); - return nullptr; - } - - std::string node_name = pre_node->GetName() + "_" + ASSIGN; - node_name = CheckDuplicateName(node_name); - OpDescPtr op_desc = MakeShared(node_name.c_str(), ASSIGN); - if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "Create Assign op: MakeShared op_desc fail."); - return nullptr; - } - GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); - - graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); - return nullptr; - } - - ret = op_desc->AddInputDesc("value", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Assign op: add value input desc fail."); - return nullptr; - } - - ret = op_desc->AddOutputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Create Assign op: add output desc fail."); - return nullptr; - } - - NodePtr assign_node = graph->AddNode(op_desc); - if (assign_node == nullptr) { - GELOGE(INTERNAL_ERROR, "Insert Identity node fail."); - return nullptr; - } - - return assign_node; -} - - /// /// @brief Clear Status, used for subgraph pass /// @return SUCCESS diff --git a/ge/graph/passes/hccl_memcpy_pass.h b/ge/graph/passes/hccl_memcpy_pass.h index 7e52708a..e73a5483 100755 --- a/ge/graph/passes/hccl_memcpy_pass.h +++ b/ge/graph/passes/hccl_memcpy_pass.h @@ -32,28 +32,11 @@ class HcclMemcpyPass : public GraphPass { private: NodePtr CreateIdentityNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); - NodePtr CreateAssignNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor); - std::string CheckDuplicateName(const std::string &node_name); Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, const InDataAnchorPtr &hccl_in_anchor); - Status InsertIdentityBeforeHccl(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor); - - Status InsertAssignAfterBroadcastIfNeed(const ComputeGraphPtr &graph, - const OutDataAnchorPtr &src_out_anchor, - const InDataAnchorPtr &hccl_in_anchor); - - Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node); - - Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node); - - Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node); - - bool IsDataNode(const std::string& node_type); - std::unordered_map node_num_map_; }; } // namespace ge diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 32f877cf..6bb3105c 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -51,6 +51,7 @@ #include "graph/passes/for_pass.h" #include "graph/passes/guarantee_const_pass.h" #include "graph/passes/hccl_group_pass.h" +#include "graph/passes/hccl_memcpy_pass.h" #include "graph/passes/identity_pass.h" #include "graph/passes/infershape_pass.h" #include "graph/passes/net_output_pass.h" @@ -1732,6 +1733,8 @@ Status GraphPrepare::PrepareOptimize() { PassManager graph_pass; try { (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass); + // todo 临时把hccl的memcpy插入放到图准备,为了防止其多插memcpy + (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass); } catch (std::bad_alloc &e) { GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); return INTERNAL_ERROR; From daefed5c5c8adda0215320fce522e658973e8ff9 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Wed, 30 Dec 2020 17:02:10 +0800 Subject: [PATCH 04/54] fix unknown shape aicpu --- ge/graph/load/new_model_manager/davinci_model.cc | 6 +++--- ge/graph/load/new_model_manager/davinci_model.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 7721739b..a367d334 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2859,8 +2859,8 @@ void DavinciModel::SetTotalIOAddrs(const vector &io_addrs) { } } -Status DavinciModel::UpdateKnownZeroCopyAddr(vector &total_io_addrs) { - if (fixed_mem_base_ != reinterpret_cast(mem_base_)) { +Status DavinciModel::UpdateKnownZeroCopyAddr(vector &total_io_addrs, bool update_args) { + if (fixed_mem_base_ != reinterpret_cast(mem_base_) && update_args) { for (size_t i = 0; i < total_io_addrs.size(); ++i) { total_io_addrs[i] = GetRunAddress(total_io_addrs[i]); } @@ -2904,7 +2904,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector &inputs, const vec } else { total_io_addrs_ = orig_total_io_addrs_; } - GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); + GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed."); if (total_args_size_ == 0) { GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 906c0548..76c5c8f0 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -531,7 +531,7 @@ class DavinciModel { Status MallocKnownArgs(); Status UpdateKnownNodeArgs(const vector &inputs, const vector &outputs); Status CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs); - Status UpdateKnownZeroCopyAddr(vector &total_io_addrs); + Status UpdateKnownZeroCopyAddr(vector &total_io_addrs, bool update_args = true); void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); From 610828561cbd1c0a37b8c2ca505f22884d7b0533 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Wed, 30 Dec 2020 17:42:38 +0800 Subject: [PATCH 05/54] fill ut --- .../ge/graph/load/davinci_model_unittest.cc | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 3cd0455d..34d45269 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -282,4 +282,42 @@ TEST_F(UtestDavinciModel, init_unknown) { const vector outputs = { &virtual_addr }; EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); } + +TEST_F(UtestDavinciModel, ReturnNoOutput_test) { + DavinciModel model(0, nullptr); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); + var1->AddInputDesc(tensor); + var1->AddOutputDesc(tensor); + var1->SetInputOffset({1024}); + var1->SetOutputOffset({1024}); + + model.variable_op_list_.push_back(var1); + + + EXPECT_EQ(model.ReturnNoOutput(model), PARAM_INVALID); +} + +TEST_F(UtestDavinciModel, SyncVarData_test) { + DavinciModel model(0, nullptr); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr var1 = CreateOpDesc("var1", VARIABLE); + var1->AddInputDesc(tensor); + var1->AddOutputDesc(tensor); + var1->SetInputOffset({1024}); + var1->SetOutputOffset({1024}); + + model.variable_op_list_.push_back(var1); + + EXPECT_NE(model.SyncVarData(model), SUCCESS); + +} + + } // namespace ge From 6d94878eaf69bfe43e28385ef373d381ccd06d6e Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Wed, 30 Dec 2020 17:56:28 +0800 Subject: [PATCH 06/54] fix ut --- tests/ut/ge/graph/load/davinci_model_unittest.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 34d45269..eda3cb15 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -298,7 +298,7 @@ TEST_F(UtestDavinciModel, ReturnNoOutput_test) { model.variable_op_list_.push_back(var1); - EXPECT_EQ(model.ReturnNoOutput(model), PARAM_INVALID); + EXPECT_EQ(model.ReturnNoOutput(1), PARAM_INVALID); } TEST_F(UtestDavinciModel, SyncVarData_test) { @@ -315,8 +315,7 @@ TEST_F(UtestDavinciModel, SyncVarData_test) { model.variable_op_list_.push_back(var1); - EXPECT_NE(model.SyncVarData(model), SUCCESS); - + EXPECT_NE(model.SyncVarData(), SUCCESS); } From 2f4351652426af191eb6c718a8923f69bee6289f Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 30 Dec 2020 19:44:50 +0800 Subject: [PATCH 07/54] For dynamic shape compile. --- ge/graph/build/graph_builder.cc | 84 +++++++++++++++++++++++++++++---- ge/graph/build/graph_builder.h | 1 + ge/graph/build/model_builder.h | 4 +- ge/single_op/single_op_model.cc | 1 + 4 files changed, 79 insertions(+), 11 deletions(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index 87d2a206..ee9be124 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -15,6 +15,7 @@ */ #include "graph/build/graph_builder.h" +#include "graph/build/memory/graph_mem_assigner.h" #include "common/ge/ge_util.h" #include "common/helper/model_helper.h" #include "graph/build/logical_stream_allocator.h" @@ -197,10 +198,8 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vectorGetGraphUnknownFlag()) { GE_CHK_STATUS_RET( BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), "Build for dynamic shape graph failed."); @@ -270,16 +269,78 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v return SUCCESS; } +Status GraphBuilder::SetConstantInputOffset(ComputeGraphPtr &comp_graph) { + for (auto &node : comp_graph->GetDirectNode()) { + GE_CHECK_NOTNULL(node); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + auto num_inputs = op_desc->GetInputsSize(); + std::vector input_offsets(num_inputs, 0); + int valid_input_index = -1; + for (uint32_t i = 0; i < node->GetAllInDataAnchorsSize(); ++i) { + auto in_anchor = node->GetInDataAnchor(i); + auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); + if (peer_out_anchor == nullptr) { + continue; + } + + ++valid_input_index; + auto peer_node = peer_out_anchor->GetOwnerNode(); + if (peer_node == nullptr) { + continue; + } + + if (peer_node->GetType() != CONSTANT) { + continue; + } + + std::vector weights = OpDescUtils::MutableWeights(peer_node); + if (weights.empty()) { + GELOGE(FAILED, "weights size of node %s is empty", node->GetName().c_str()); + return FAILED; + } + GeTensorPtr weight = weights[0]; + GE_CHECK_NOTNULL(weight); + int64_t input_offset = 0; + (void) TensorUtils::GetDataOffset(weight->MutableTensorDesc(), input_offset); + // valid_input_index must smaller than num_inputs + input_offsets[valid_input_index] = input_offset; + GELOGD("[%s] input[%u] is const, offset = %ld", node->GetName().c_str(), valid_input_index, input_offset); + } + + op_desc->SetInputOffset(input_offsets); + std::vector output_offsets(op_desc->GetOutputsSize(), 0); + op_desc->SetOutputOffset(output_offsets); + } + return SUCCESS; +} + Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id) { GELOGI("Begin to build unknown shape graph[%s].", comp_graph->GetName().c_str()); + Graph2SubGraphInfoList subgraph_map; + ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); + GE_DUMP(comp_graph, "BeforePreBuildModel"); + GE_TIMESTAMP_START(PreBuildModel); + GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", + comp_graph->GetName().c_str()); + GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel"); + GE_DUMP(comp_graph, "AfterPreBuildModel"); + GE_TIMESTAMP_START(CalcOpParam); GE_CHK_STATUS_RET(CalcOpParam(comp_graph), "Graph[%s] builder CalcOpParam() return fail.", comp_graph->GetName().c_str()); GE_TIMESTAMP_END(CalcOpParam, "GraphBuilder::CalcOpParam"); GE_DUMP(comp_graph, "AfterCalcOpParam"); - Graph2SubGraphInfoList subgraph_map; - ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); + + GE_TIMESTAMP_START(SetConstantInputOffset); + GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph), + "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str()); + GE_TIMESTAMP_END(SetConstantInputOffset); + GE_TIMESTAMP_START(MergeWeights); + GE_CHK_STATUS_RET(MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str()); + GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights"); + ModelPtr model_ptr = MakeShared(); if (model_ptr == nullptr) { return MEMALLOC_FAILED; @@ -375,10 +436,15 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, op_desc->GetName().c_str()); } } - // - for (auto &sub_graph : comp_graph->GetAllSubgraphs()) { + + auto all_graphs = comp_graph->GetAllSubgraphs(); + if (all_graphs.empty()) { + all_graphs.push_back(comp_graph); + } + for (auto &sub_graph : all_graphs) { // exclude functional subgraph in known subgraph - if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { + if (sub_graph->GetParentGraph() != nullptr && sub_graph->GetParentGraph() != comp_graph && + !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) { continue; } diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h index 329f3ebc..b828a80d 100644 --- a/ge/graph/build/graph_builder.h +++ b/ge/graph/build/graph_builder.h @@ -67,6 +67,7 @@ class GraphBuilder { GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); + Status SetConstantInputOffset(ComputeGraphPtr &comp_graph); Status AddOutputMemTypeForNode(const NodePtr &node); Status BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/build/model_builder.h b/ge/graph/build/model_builder.h index de079768..12420614 100644 --- a/ge/graph/build/model_builder.h +++ b/ge/graph/build/model_builder.h @@ -55,13 +55,13 @@ class ModelBuilder { ge::Buffer GetWeightBuffer() const; + Status MergeWeights(); + protected: void AddNodeInputProperty(); void ClearOriginalFormat(); - Status MergeWeights(); - private: bool SetInputConst(const OpDescPtr &op_desc, const NodePtr &src_node, size_t index, vector &is_input_const); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 25bf6855..2a1a14e6 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -477,6 +477,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & single_op.num_inputs_ = data_ops_.size(); single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); + model_params_.memory_size = UINT_MAX; return BuildTaskListForDynamicOp(single_op); } } // namespace ge From 77b2d66ec7e980fb044772bee20d59133b46a74e Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 30 Dec 2020 19:49:30 +0800 Subject: [PATCH 08/54] For dynamic shape compile. --- ge/graph/build/graph_builder.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index ee9be124..dce40c3e 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -322,7 +322,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo ge::ModelBuilder builder(session_id, comp_graph, subgraph_map, stream_max_parallel_num_, hcom_parallel_, build_mode_); GE_DUMP(comp_graph, "BeforePreBuildModel"); GE_TIMESTAMP_START(PreBuildModel); - GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", + GE_CHK_STATUS_RET(builder.PreBuildModel(), "Graph[%s] builder PreBuildModel() return fail.", comp_graph->GetName().c_str()); GE_TIMESTAMP_END(PreBuildModel, "GraphBuilder::PreBuildModel"); GE_DUMP(comp_graph, "AfterPreBuildModel"); @@ -336,9 +336,9 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo GE_TIMESTAMP_START(SetConstantInputOffset); GE_CHK_STATUS_RET(SetConstantInputOffset(comp_graph), "Graph[%s] failed to set constant input offset.", comp_graph->GetName().c_str()); - GE_TIMESTAMP_END(SetConstantInputOffset); + GE_TIMESTAMP_END(SetConstantInputOffset, "GraphBuilder::SetConstantInputOffset"); GE_TIMESTAMP_START(MergeWeights); - GE_CHK_STATUS_RET(MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str()); + GE_CHK_STATUS_RET(builder.MergeWeights(), "Graph[%s] failed to merge weights.", comp_graph->GetName().c_str()); GE_TIMESTAMP_END(MergeWeights, "GraphBuilder::MergeWeights"); ModelPtr model_ptr = MakeShared(); From 7b87558f2fc8d6eff511d9745516293bf8188539 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Wed, 30 Dec 2020 21:36:26 +0800 Subject: [PATCH 09/54] modified: ge/graph/manager/graph_manager.cc modified: ge/graph/preprocess/graph_preprocess.cc modified: ge/graph/preprocess/graph_preprocess.h modified: inc/external/ge/ge_api_types.h --- ge/graph/manager/graph_manager.cc | 4 +- ge/graph/preprocess/graph_preprocess.cc | 135 ++++++++++++++++++++++-- ge/graph/preprocess/graph_preprocess.h | 6 +- inc/external/ge/ge_api_types.h | 5 + 4 files changed, 138 insertions(+), 12 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 030b864e..1aee79a4 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -687,7 +687,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); GM_RUN_AND_DUMP_PERF("OptimizeGraphPrepare", stages.optimizer.OptimizeOriginalGraphForQuantize, compute_graph); GM_RUN_AND_DUMP_PERF("HandleSummaryOp", stages.optimizer.HandleSummaryOp, compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node->GetGraph(), inputs, compute_graph, + GM_RUN_AND_DUMP_PERF("Prepare", stages.preparer.PrepareDynShape, graph_node, inputs, compute_graph, session_id); GM_RUN_AND_DUMP_PERF("OptimizeOriginalGraph", stages.optimizer.OptimizeOriginalGraph, compute_graph); @@ -1173,7 +1173,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); - GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node->GetGraph(), inputs, + GM_RUN_AND_DUMP_PERF("Prepare", GetCompilerStages(graph_id).preparer.PrepareDynShape, graph_node, inputs, compute_graph, session_id); for (auto &node : compute_graph->GetAllNodes()) { diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 32f877cf..3f4c7f16 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -898,6 +898,117 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { } return SUCCESS; } +/** + * Parser shape_range from string to vector + * shape_range from option normally is "[1~20],[3],[3~6],[-1]" + * @param shape_range + */ +void ParseDynamicInputShapeRange(const std::string &shape_range, + std::vector>> &range) { + if (shape_range.empty() || shape_range.size() < 2) { + GELOGW("Shape range %s is invalid.", shape_range); + return; + } + // different parameter sets are split by ';' + vector shape_set = ge::StringUtils::Split(shape_range, ']'); + if (shape_set.empty()) { + return; + } + for (auto shape_str : shape_set) { + if (shape_str.empty()) { + continue; + } + if (ge::StringUtils::StartWith(shape_str, "[")) { + shape_str = shape_str.substr(1, shape_str.size()); + } + if (ge::StringUtils::StartWith(shape_str, ",")) { + shape_str = shape_str.substr(2, shape_str.size()); + } + std::vector> range_of_single; + vector range_set = ge::StringUtils::Split(shape_str, ','); + for (auto range_str : range_set) { + vector pair_set = ge::StringUtils::Split(range_str, '~'); + pair range_pair; + if (pair_set.size() == 1) { + auto range_value = atoi(pair_set.at(0).c_str()); + if (range_value < 0) { + range_pair = std::make_pair(1, range_value); + } else { + range_pair = std::make_pair(range_value, range_value); + } + } else if (pair_set.size() == 2) { + auto range_left = atoi(pair_set.at(0).c_str()); + auto range_right = atoi(pair_set.at(1).c_str()); + range_pair = std::make_pair(range_left, range_right); + } + range_of_single.emplace_back(range_pair); + } + range.emplace_back(range_of_single); + } +} + +Status GetDynamicInputShapeRange(const std::vector &user_input, const std::map &graph_option, + vector>> &range_vec) { + auto mode_iter = graph_option.find(OPTION_EXEC_DYNAMIC_EXECUTE_MODE); + if (mode_iter == graph_option.end()) { + GELOGD("Graph Option: Can not find %s option in graph options.", OPTION_EXEC_DYNAMIC_EXECUTE_MODE); + return SUCCESS; + } + GELOGD("Graph Option: dynamic_input_mode value is %s.", mode_iter->second.c_str()); + if (mode_iter->second != "dynamic_execute") { + return SUCCESS; + } + auto iter = graph_option.find(OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE); + if (iter == graph_option.end()) { + GELOGE(PARAM_INVALID, "Graph option %s is required when %s is dynamic_execute", OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE, + OPTION_EXEC_DYNAMIC_EXECUTE_MODE); + return PARAM_INVALID; + } + GELOGD("GraphOption: dynamic_inputs_shape_range value is %s.", iter->second.c_str()); + ParseDynamicInputShapeRange(iter->second, range_vec); + if (range_vec.size() != user_input.size()) { + GELOGE(PARAM_INVALID, "Dynamic input shape range size is %zu, inputs size is %zu. Not match.", range_vec.size(), + user_input.size()); + return PARAM_INVALID; + } + return SUCCESS; +} + +Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, + const ector>> &range_vec, OpDescPtr &op, + GeTensorDesc &desc) { + auto unkown_shape = desc.GetShape(); + auto shape_range = range_vec.at(index); + for (size_t i = 0; i < unkown_shape.GetDimNum(); ++i) { + if (shape_range.at(i).first == shape_range.at(i).second) { + unkown_shape.SetDim(i, shape_range.at(i).first); + } else { + unkown_shape.SetDim(i, -1); + } + } + desc.SetShape(unkown_shape); + desc.SetShapeRange(shape_range); + int64_t dynamic_shape_size = 1; + for (const auto range_pair : range_vec.at(index)) { + FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second); + dynamic_shape_size *= range_pair.second; + } + auto data_type_size = GetSizeByDataType(desc.GetDataType()); + if (data_type_size < 0) { + GELOGE(PARAM_INVALID, "Input data type is %s, is not supported.", + TypeUtils::DataTypeToSerialString(desc.GetDataType()).c_str()); + return PARAM_INVALID; + } + FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size); + dynamic_shape_size *= data_type_size; + GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size); + ge::TensorUtils::SetSize(desc, dynamic_shape_size); + graphStatus graph_ret = op->UpdateInputDesc(0, desc); + GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); + graph_ret = op->UpdateOutputDesc(0, desc); + GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); + return SUCCESS; +} } // namespace GraphPrepare::GraphPrepare() : compute_graph_(nullptr) {} @@ -1102,7 +1213,11 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { return SUCCESS; } -Status GraphPrepare::UpdateInput(const std::vector &user_input) { +Status GraphPrepare::UpdateInput(const std::vector &user_input, const std::map &graph_option) { + // Get shape range of input in dynamic_execute mode + vector>> dynamic_shape_range_vec; + auto ret = GetDynamicInputShapeRange(user_input, graph_option, dynamic_shape_range_vec); + GE_CHK_STATUS_RET(ret, "Graph option is not right on Dynamic execute mode."); compute_graph_->SaveDataFormat(ge::TypeUtils::DomiFormatToFormat(GetLocalOmgContext().format)); for (NodePtr &input_node : compute_graph_->GetDirectNode()) { GE_CHECK_NOTNULL(input_node); @@ -1185,6 +1300,12 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input) { return graph_ret; } + if (!dynamic_shape_range_vec.empty()) { + ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); + GE_CHK_STATUS_RET(ret, "Fail to update dynamic input shape range on %s.", op->GetName().c_str()); + continue; + } + if (!options_.train_graph_flag) { Status ret = AdjustDataOpOutput(input_node); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(ret, "AdjustDataOpOutput fail, ret:%u", ret); return ret); @@ -1358,17 +1479,17 @@ Status GraphPrepare::SaveOriginalGraphToOmModel() { GELOGI("Prepare %s on graph %s success.", name, compute_graph->GetName().c_str()); \ } while (0) -Status GraphPrepare::PrepareDynShape(ConstGraphPtr graph, const std::vector &user_input, +Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std::vector &user_input, ge::ComputeGraphPtr &compute_graph, uint64_t session_id) { - GE_CHECK_NOTNULL(graph); + GE_CHECK_NOTNULL(graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); GetLocalOmgContext().type = static_cast(options_.framework_type); - const Graph &const_graph = *graph; + const Graph &const_graph = *graph_node->GetGraph(); PP_RUN("Init", Init, const_graph, session_id); PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE); - PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input); + PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetGraph()); PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation); PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput); PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_); @@ -1831,7 +1952,7 @@ Status GraphPrepare::ProcessNetOutput() { return SUCCESS; } -Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input) { +Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input,const std::map &graph_option) { compute_graph_->SetInputSize(user_input.size()); if (user_input.empty()) { return SUCCESS; @@ -1843,7 +1964,7 @@ Status GraphPrepare::CheckAndUpdateInput(const std::vector &user_input return ret; } - ret = UpdateInput(user_input); + ret = UpdateInput(user_input, graph_option); if (ret != SUCCESS) { GELOGE(ret, "UpdateInput fail, ret:%u", ret); return ret; diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index a3bbf433..de755418 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -45,7 +45,7 @@ class GraphPrepare { virtual ~GraphPrepare(); GraphPrepare(const GraphPrepare &in) = delete; GraphPrepare &operator=(const GraphPrepare &in) = delete; - Status PrepareDynShape(ConstGraphPtr graph, + Status PrepareDynShape(const GraphNodePtr &graph_node, const std::vector &user_input, ge::ComputeGraphPtr &compute_graph, uint64_t session_id = 0); @@ -63,8 +63,8 @@ class GraphPrepare { Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); - Status UpdateInput(const std::vector &user_input); - Status CheckAndUpdateInput(const std::vector &user_input); + Status UpdateInput(const std::vector &user_input, const std::map &graph_option); + Status CheckAndUpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckConstOp(); Status VerifyConstOp(const NodePtr &node); Status CheckUserInput(const std::vector &user_input); diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index d0f2105f..250252f9 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -61,6 +61,11 @@ const char *const OPTION_EXEC_HCCL_FLAG = "ge.exec.hcclFlag"; const char *const OPTION_EXEC_ATOMIC_FLAG = "ge.exec.enable_atomic"; const char *const OPTION_EXEC_DISABLE_REUSED_MEMORY = "ge.exec.disableReuseMemory"; const char *const OPTION_EXEC_ENABLE_TAILING_OPTIMIZATION = "ge.exec.isTailingOptimization"; +// Dynamic input flag. ge.exec.dynamicInput=1, means enable dynaimc input, +// ge.exec.dynamicGraphExecuteMode, dynamic_execute[default] +const char *const OPTION_EXEC_DYNAMIC_INPUT = "ge.exec.dynamicInput"; +const char *const OPTION_EXEC_DYNAMIC_EXECUTE_MODE = "ge.exec.dynamicGraphExecuteMode"; +const char *const OPTION_EXEC_DATA_INPUTS_SHAPE_RANGE = "ge.exec.dataInputsShapeRange"; // Option key: memory init const char *const GRAPH_MEMORY_MAX_SIZE = "ge.graphMemoryMaxSize"; From 7bf75b0f6722199da29ac915e99212a238986af9 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Wed, 30 Dec 2020 21:48:45 +0800 Subject: [PATCH 10/54] modified: ge/graph/preprocess/graph_preprocess.cc --- ge/graph/preprocess/graph_preprocess.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 3f4c7f16..d12be957 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -906,7 +906,7 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { void ParseDynamicInputShapeRange(const std::string &shape_range, std::vector>> &range) { if (shape_range.empty() || shape_range.size() < 2) { - GELOGW("Shape range %s is invalid.", shape_range); + GELOGW("Shape range %s is invalid.", shape_range.c_str()); return; } // different parameter sets are split by ';' @@ -975,7 +975,7 @@ Status GetDynamicInputShapeRange(const std::vector &user_input, const } Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, - const ector>> &range_vec, OpDescPtr &op, + const vector>> &range_vec, OpDescPtr &op, GeTensorDesc &desc) { auto unkown_shape = desc.GetShape(); auto shape_range = range_vec.at(index); From e88abcf961e33ee9bc5d3da234b0355586224868 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Wed, 30 Dec 2020 22:18:01 +0800 Subject: [PATCH 11/54] modified: ge/graph/preprocess/graph_preprocess.cc --- ge/graph/preprocess/graph_preprocess.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index d12be957..c45f4db6 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -37,6 +37,7 @@ #include "graph/passes/addn_pass.h" #include "graph/passes/aicpu_constant_folding_pass.h" #include "graph/passes/assert_pass.h" +#include "ge/ge_api_types.h" #ifdef ONLY_COMPILE_OPEN_SRC #include "graph/passes/assign_remove_pass.h" #endif @@ -1489,7 +1490,7 @@ Status GraphPrepare::PrepareDynShape(const GraphNodePtr &graph_node, const std:: PP_RUN("Init", Init, const_graph, session_id); PP_RUN("SetRtContext", SetRtContext, rtContext_t(), RT_CTX_GEN_MODE); - PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetGraph()); + PP_RUN_AND_DUMP("CheckAndUpdateInput", CheckAndUpdateInput, user_input, graph_node->GetOptions()); PP_RUN_AND_DUMP("GraphEquivalentTransformation", GraphEquivalentTransformation); PP_RUN_AND_DUMP("ProcessOutput", ProcessNetOutput); PP_RUN_AND_DUMP("ProcessMultiBatch", multibatch::ProcessMultiBatch, compute_graph_); From fc748110910c7bda20386da578d418b2e59774e4 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Dec 2020 10:36:41 +0800 Subject: [PATCH 12/54] Custom pass register. --- ge/graph/manager/graph_manager.cc | 15 +++++++++++++++ ge/graph/manager/graph_manager.h | 1 + metadef | 2 +- parser | 2 +- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index beb7cd42..46aeaebc 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -101,6 +101,7 @@ #include "graph/common/local_context.h" #include "graph/common/omg_util.h" #include "common/formats/utils/formats_trans_utils.h" +#include "external/register/register_pass.h" namespace { const char *const kSummary = "Summary"; @@ -765,10 +766,24 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint return SUCCESS; } +Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { + ConstGraphPtr const_graph = graph->node->GetGraph(); + auto comp_graph = GraphUtils::GetComputeGraph(*const_graph); + GE_DUMP(compute_graph, "RunCustomPassBegin"); + + GE_TIMESTAMP_START(RunCustomPass); + GraphPtr graph = std::const_pointer_cast(const_graph); + GE_CHK_STATUS_RET(CustomPassHelper::Instance()->Run(graph), "Graph[%s] run custom pass fail.", + comp_graph->GetName().c_str()); + GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); + return SUCCESS; +} + Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(graph_node->GetGraph()); + GE_CHK_STATUS_RET_NOLOG(RunCustomPass(graph_node)); auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph()); GE_CHECK_NOTNULL(compute_graph); compute_graph->SetSessionID(session_id); diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index d2887c4c..32de7eac 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -226,6 +226,7 @@ class GraphManager { void ParseInputsDimsForData(const std::vector &input_tensor); Status ParseInputsDimsForGetNexNosinkAndData(const vector &dynamic_nodes, const std::vector &input_tensor); + Status RunCustomPass(const GraphNodePtr &graph_node); Status PreRun(const GraphNodePtr &graph_node, const std::vector &inputs, GeRootModelPtr &ge_root_model, uint64_t session_id = INVALID_SESSION_ID); diff --git a/metadef b/metadef index 11c6cf29..37a90f0d 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 11c6cf2921b6a385616a3ebc601b4431b55b07db +Subproject commit 37a90f0dfd797306e99ec32a688be32a9ad835a4 diff --git a/parser b/parser index 99437c39..5b93b050 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 99437c39d26624a14060307366a96b79b1d439c3 +Subproject commit 5b93b050dd7ca5b77c3001a790031d877fa10956 From 0566c6dc3f91e72425d405ae45e55cfd1bc5fb46 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Dec 2020 10:52:24 +0800 Subject: [PATCH 13/54] Custom pass register. --- ge/graph/manager/graph_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 46aeaebc..c102ec2e 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -769,7 +769,7 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { ConstGraphPtr const_graph = graph->node->GetGraph(); auto comp_graph = GraphUtils::GetComputeGraph(*const_graph); - GE_DUMP(compute_graph, "RunCustomPassBegin"); + GE_DUMP(comp_graph, "RunCustomPassBegin"); GE_TIMESTAMP_START(RunCustomPass); GraphPtr graph = std::const_pointer_cast(const_graph); From b706aa1da3044d6fb2c02951b190e70f1683433d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Dec 2020 11:00:04 +0800 Subject: [PATCH 14/54] Custom pass register. --- ge/graph/manager/graph_manager.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index c102ec2e..706908af 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -767,7 +767,7 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint } Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { - ConstGraphPtr const_graph = graph->node->GetGraph(); + ConstGraphPtr const_graph = graph_node->GetGraph(); auto comp_graph = GraphUtils::GetComputeGraph(*const_graph); GE_DUMP(comp_graph, "RunCustomPassBegin"); From af230762e14c1cdfa31e7c691115e6e51bc9ec83 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Dec 2020 11:17:24 +0800 Subject: [PATCH 15/54] Custom pass register. --- tests/ut/ge/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 1f6c6837..dcf389c0 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -121,6 +121,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/metadef/graph/opsproto/opsproto_manager.cc" "${GE_CODE_DIR}/metadef/ops/op_imp.cpp" "${GE_CODE_DIR}/metadef/register/register.cpp" + "${GE_CODE_DIR}/metadef/register/register_pass.cpp" "${GE_CODE_DIR}/metadef/register/op_kernel_registry.cpp" "${GE_CODE_DIR}/metadef/register/auto_mapping_util.cpp" "${GE_CODE_DIR}/metadef/register/tensor_assign.cpp" From 7b6461170d3ce03b400ea975f75eac3a22479cba Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Thu, 31 Dec 2020 14:31:59 +0800 Subject: [PATCH 16/54] fix for json dump --- ge/analyzer/analyzer.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ge/analyzer/analyzer.cc b/ge/analyzer/analyzer.cc index 29181384..c63a6008 100755 --- a/ge/analyzer/analyzer.cc +++ b/ge/analyzer/analyzer.cc @@ -217,10 +217,15 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_ json jsn; GraphInfoToJson(jsn, *graph_info); - json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; + bool ret_failed = false; + try { + json_file_ << jsn.dump(kJsonDumpLevel) << std::endl; + } catch (nlohmann::detail::type_error &e) { + GELOGE(FAILED, "analyzer file [%s] failed because [%s]", json_file_name_.c_str(), e.what()); + ret_failed = true; + } json_file_.close(); - - return SUCCESS; + return ret_failed ? FAILED : SUCCESS; } ge::Status Analyzer::DoAnalyze(DataInfo &data_info) { From 3e6b21f6c17b54a40cd0e59c7b321f71775a402f Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Thu, 31 Dec 2020 14:47:44 +0800 Subject: [PATCH 17/54] modified: ge/graph/preprocess/graph_preprocess.cc --- ge/graph/preprocess/graph_preprocess.cc | 114 ++++++++++++++++-------- 1 file changed, 75 insertions(+), 39 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index c45f4db6..57c2542a 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -901,51 +901,74 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { } /** * Parser shape_range from string to vector - * shape_range from option normally is "[1~20],[3],[3~6],[-1]" + * shape_range from option normally is "[1~20,3,3~6,-1],[1~20,3,3~6,-1]" * @param shape_range */ -void ParseDynamicInputShapeRange(const std::string &shape_range, - std::vector>> &range) { - if (shape_range.empty() || shape_range.size() < 2) { +Status ParseDynamicInputShapeRange(const std::string &shape_range, + std::vector>> &range) { + if (shape_range.size() < 2) { GELOGW("Shape range %s is invalid.", shape_range.c_str()); return; } - // different parameter sets are split by ';' - vector shape_set = ge::StringUtils::Split(shape_range, ']'); - if (shape_set.empty()) { - return; + // different shape_ragne of single input are split by ']' + vector shape_range_set = ge::StringUtils::Split(shape_range, ']'); + if (shape_range_set.empty()) { + GELOGE("Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str()); + return PARAM_INVALID; } - for (auto shape_str : shape_set) { - if (shape_str.empty()) { - continue; - } - if (ge::StringUtils::StartWith(shape_str, "[")) { - shape_str = shape_str.substr(1, shape_str.size()); + for (const auto &shape_range_str : shape_range_set) { + if (shape_range_str.empty()) { + GELOGE("Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + shape_range.c_str()); + return PARAM_INVALID; } - if (ge::StringUtils::StartWith(shape_str, ",")) { - shape_str = shape_str.substr(2, shape_str.size()); + // trim start bytes, after that, single input should be "1~20,3,3~6,-1" + if (ge::StringUtils::StartWith(shape_range_str, "[")) { + shape_range_str = shape_range_str.substr(1, shape_range_str.size()); + } else if (ge::StringUtils::StartWith(shape_range_str, ",")) { + shape_range_str = shape_range_str.substr(2, shape_range_str.size()); + } else { + GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + shape_range.c_str()); + return PARAM_INVALID; } - std::vector> range_of_single; - vector range_set = ge::StringUtils::Split(shape_str, ','); - for (auto range_str : range_set) { - vector pair_set = ge::StringUtils::Split(range_str, '~'); + // parse shape_range of single input. eg. "1~20,3,3~6,-1" + std::vector> range_of_single_input; + vector dim_range_set = ge::StringUtils::Split(shape_range_str, ','); + for (const auto &range_pair_str : dim_range_set) { + vector range_pair_set = ge::StringUtils::Split(range_pair_str, '~'); pair range_pair; - if (pair_set.size() == 1) { - auto range_value = atoi(pair_set.at(0).c_str()); + if (range_pair_set.size() == 1) { + // fix dim + auto range_value = stol(range_pair_set.at(0).c_str()); if (range_value < 0) { range_pair = std::make_pair(1, range_value); } else { range_pair = std::make_pair(range_value, range_value); } - } else if (pair_set.size() == 2) { - auto range_left = atoi(pair_set.at(0).c_str()); - auto range_right = atoi(pair_set.at(1).c_str()); - range_pair = std::make_pair(range_left, range_right); + } else if (range_pair_set.size() == 2) { + // unknown dim, should get range. + try { + auto range_left = stol(range_pair_set.at(0).c_str()); + auto range_right = stol(range_pair_set.at(1).c_str()); + range_pair = std::make_pair(range_left, range_right); + } catch (const std::invalid_argument) { + GELOGE( + "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " + "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + shape_range.c_str()); + return PARAM_INVALID; + } + } else { + GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + shape_range.c_str()); + return PARAM_INVALID; } - range_of_single.emplace_back(range_pair); + range_of_single_input.emplace_back(range_pair); } - range.emplace_back(range_of_single); + range.emplace_back(range_of_single_input); } + return SUCCESS; } Status GetDynamicInputShapeRange(const std::vector &user_input, const std::map &graph_option, @@ -966,7 +989,8 @@ Status GetDynamicInputShapeRange(const std::vector &user_input, const return PARAM_INVALID; } GELOGD("GraphOption: dynamic_inputs_shape_range value is %s.", iter->second.c_str()); - ParseDynamicInputShapeRange(iter->second, range_vec); + auto ret = ParseDynamicInputShapeRange(iter->second, range_vec); + GE_CHK_STATUS_RET(ret, "Parse dynamic input shape range failed."); if (range_vec.size() != user_input.size()) { GELOGE(PARAM_INVALID, "Dynamic input shape range size is %zu, inputs size is %zu. Not match.", range_vec.size(), user_input.size()); @@ -978,18 +1002,30 @@ Status GetDynamicInputShapeRange(const std::vector &user_input, const Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, const vector>> &range_vec, OpDescPtr &op, GeTensorDesc &desc) { - auto unkown_shape = desc.GetShape(); - auto shape_range = range_vec.at(index); - for (size_t i = 0; i < unkown_shape.GetDimNum(); ++i) { - if (shape_range.at(i).first == shape_range.at(i).second) { - unkown_shape.SetDim(i, shape_range.at(i).first); + auto origin_shape = desc.GetShape(); + auto current_shape_range_vec = range_vec.at(index); + if (current_shape_range_vec.size() != origin_shape.GetDimNum()) { + GELOGE(PARAM_INVALID, "Given shape_range dim num is %zu, current dim num is %zu, not match.Pleace Check.", + current_shape_range_vec.size(), origin_shape.GetDimNum()); + return PARAM_INVALID; + } + for (size_t i = 0; i < origin_shape.GetDimNum(); ++i) { + if (current_shape_range_vec.at(i).first == current_shape_range_vec.at(i).second) { + // given shape_range is known dim, check is same as origin or not + if (origin_shape.GetDim(i) != current_shape_range_vec.at(i).first) { + GELOGE(PARAM_INVALID, "Given shape range is %ld, current dim shape is %ld, not match.Pleace Check.", + current_shape_range_vec.at(i).first, origin_shape.GetDim(i)); + return PARAM_INVALID; + } + origin_shape.SetDim(i, current_shape_range_vec.at(i).first); } else { - unkown_shape.SetDim(i, -1); + origin_shape.SetDim(i, -1); } } - desc.SetShape(unkown_shape); - desc.SetShapeRange(shape_range); - int64_t dynamic_shape_size = 1; + desc.SetShape(origin_shape); + desc.SetShapeRange(current_shape_range_vec); + + /*int64_t dynamic_shape_size = 1; for (const auto range_pair : range_vec.at(index)) { FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second); dynamic_shape_size *= range_pair.second; @@ -1003,7 +1039,7 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size); dynamic_shape_size *= data_type_size; GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size); - ge::TensorUtils::SetSize(desc, dynamic_shape_size); + ge::TensorUtils::SetSize(desc, dynamic_shape_size);*/ graphStatus graph_ret = op->UpdateInputDesc(0, desc); GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); graph_ret = op->UpdateOutputDesc(0, desc); From c07359baedff338e46fc7f54dd259c1aaa556deb Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Thu, 31 Dec 2020 14:57:14 +0800 Subject: [PATCH 18/54] modified: ge/graph/preprocess/graph_preprocess.cc --- ge/graph/preprocess/graph_preprocess.cc | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 57c2542a..f6a9ea80 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -907,18 +907,20 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { Status ParseDynamicInputShapeRange(const std::string &shape_range, std::vector>> &range) { if (shape_range.size() < 2) { - GELOGW("Shape range %s is invalid.", shape_range.c_str()); - return; + GELOGE(PARAM_INVALID, "Shape range %s is invalid.", shape_range.c_str()); + return PARAM_INVALID; } // different shape_ragne of single input are split by ']' vector shape_range_set = ge::StringUtils::Split(shape_range, ']'); if (shape_range_set.empty()) { - GELOGE("Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str()); + GELOGE(PARAM_INVALID, "Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + shape_range.c_str()); return PARAM_INVALID; } - for (const auto &shape_range_str : shape_range_set) { + for (auto &shape_range_str : shape_range_set) { if (shape_range_str.empty()) { - GELOGE("Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + GELOGE(PARAM_INVALID, + "Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str()); return PARAM_INVALID; } @@ -928,7 +930,8 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, } else if (ge::StringUtils::StartWith(shape_range_str, ",")) { shape_range_str = shape_range_str.substr(2, shape_range_str.size()); } else { - GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + GELOGE(PARAM_INVALID, + "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str()); return PARAM_INVALID; } @@ -940,7 +943,7 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, pair range_pair; if (range_pair_set.size() == 1) { // fix dim - auto range_value = stol(range_pair_set.at(0).c_str()); + auto range_value = std::stol(range_pair_set.at(0).c_str()); if (range_value < 0) { range_pair = std::make_pair(1, range_value); } else { @@ -949,18 +952,20 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, } else if (range_pair_set.size() == 2) { // unknown dim, should get range. try { - auto range_left = stol(range_pair_set.at(0).c_str()); - auto range_right = stol(range_pair_set.at(1).c_str()); + auto range_left = std::stol(range_pair_set.at(0).c_str()); + auto range_right = std::stol(range_pair_set.at(1).c_str()); range_pair = std::make_pair(range_left, range_right); } catch (const std::invalid_argument) { GELOGE( + PARAM_INVALID, "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str()); return PARAM_INVALID; } } else { - GELOGE("Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + GELOGE(PARAM_INVALID, + "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", shape_range.c_str()); return PARAM_INVALID; } From 0c14832647575c92adc1a53b21d42669955bbf53 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Thu, 31 Dec 2020 15:02:03 +0800 Subject: [PATCH 19/54] Fix dynamic getnext --- .../load/new_model_manager/davinci_model.cc | 49 ++++++++++++------- .../load/new_model_manager/davinci_model.h | 2 +- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 1d465441..09c27918 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -2456,19 +2456,10 @@ Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) { GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(ret, "Get size from TensorDesc failed, op:%s, input id:%zu", op_desc->GetName().c_str(), i); return ret); - std::vector output_shape = input_desc->GetShape().GetDims(); - if (is_online_infer_dynamic_) { - if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) { - auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; - size = gear_and_real_out_size_info[cur_dynamic_dims_]; - auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; - output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; - is_dynamic_ = true; - } - } - GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); - output_buffer_size_.push_back(size); - output_shape_info_.push_back(output_shape); + const GeShape &shape = input_desc->GetShape(); + GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(shape.GetDims()).c_str()); + output_buffer_size_.emplace_back(size); + output_shape_info_.emplace_back(shape); } return SUCCESS; @@ -2481,18 +2472,38 @@ Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector output_buffer_size; + vector> output_shape_info; + size_t output_num = output_buffer_size_.size(); + for (size_t i = 0; i < output_num; ++i) { + int64_t output_size = output_buffer_size_[i]; + vector output_shape = output_shape_info_[i].GetDims(); + if (is_online_infer_dynamic_) { + if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) { + auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; + output_size = gear_and_real_out_size_info[cur_dynamic_dims_]; + auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; + output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; + is_dynamic_ = true; + } + } + GELOGI("Output size is %ld, output shape is %s.", output_size, formats::JoinToString(output_shape).c_str()); + output_buffer_size.push_back(output_size); + output_shape_info.push_back(output_shape); + } + GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_); - for (size_t i = 0; i < output_buffer_size_.size(); ++i) { - std::unique_ptr data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]); + for (size_t i = 0; i < output_buffer_size.size(); ++i) { + std::unique_ptr data_buf(new (std::nothrow) uint8_t[output_buffer_size[i]]); if (data_buf == nullptr) { GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); return GE_GRAPH_MALLOC_FAILED; } - output_data->blobs.push_back({data_buf.get(), static_cast(output_buffer_size_[i]), false}); - ge::OutputTensorInfo output; - output.dims = output_shape_info_[i]; + output_data->blobs.push_back({data_buf.get(), static_cast(output_buffer_size[i]), false}); + OutputTensorInfo output; + output.dims = output_shape_info[i]; output.data = std::move(data_buf); - output.length = output_buffer_size_[i]; + output.length = output_buffer_size[i]; outputs.emplace_back(std::move(output)); GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, formats::JoinToString(output.dims).c_str(), output.length); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 76c5c8f0..fba1b94b 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -1038,7 +1038,7 @@ class DavinciModel { vector> output_addrs_list_; vector output_buffer_size_; - vector> output_shape_info_; + vector output_shape_info_; vector output_descs_; vector output_formats_; From 5bedbf96964f25e47c29eaf2e7d24495dd05ea95 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Thu, 31 Dec 2020 15:30:25 +0800 Subject: [PATCH 20/54] Add UT --- tests/ut/ge/graph/load/davinci_model_unittest.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index eda3cb15..a9efab3d 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -111,6 +111,12 @@ TEST_F(UtestDavinciModel, init_success) { EXPECT_EQ(model.output_addrs_list_.size(), 1); EXPECT_EQ(model.task_list_.size(), 2); + OutputData output_data; + vector outputs; + EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS); + EXPECT_EQ(output_data.blobs.size(), 1); + EXPECT_EQ(outputs.size(), 1); + ProfilingManager::Instance().is_load_profiling_ = false; } From 974433b14d480863557ea98f65bb03a6492690f2 Mon Sep 17 00:00:00 2001 From: taoxiangdong Date: Thu, 31 Dec 2020 16:17:48 +0800 Subject: [PATCH 21/54] Free mem before return --- ge/graph/load/new_model_manager/model_manager.cc | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 01075255..6f923236 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -1568,6 +1568,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op GE_CHK_RT(rtFree(mem)); } }; + GE_MAKE_GUARD(release, callback); // malloc sysOpInfoList in SysOpCheckInfo status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { @@ -1580,7 +1581,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_res_op_list); @@ -1589,7 +1589,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_ret_code_list); @@ -1601,7 +1600,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1619,7 +1617,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_op_type_name); @@ -1648,7 +1645,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtMalloc(&args, args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); @@ -1664,7 +1660,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(status); } @@ -1679,7 +1674,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op if (op_check_info_res.isWithoutJson) { GELOGI("No need to check aicpu in this scenoria."); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return SUCCESS; } @@ -1698,7 +1692,6 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST)); if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) { GELOGE(FAILED, "Number of retcode is not equal to number of op type."); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } @@ -1722,12 +1715,10 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector &aicpu_op } fail_reason += "not support."; GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str()); - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); return FAILED; } - GE_MAKE_GUARD(release, callback); GE_CHK_RT(rtStreamDestroy(stream)); GELOGI("Cpu kernel launch check optype task success."); return SUCCESS; From 2dfaed0e849a3bd22702ae9e8c60aab26b35ef12 Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Thu, 31 Dec 2020 16:44:35 +0800 Subject: [PATCH 22/54] add OptimizeWholeGraph --- ge/graph/manager/graph_manager.cc | 3 +++ ge/graph/optimize/graph_optimize.cc | 33 +++++++++++++++++++++++++++++ ge/graph/optimize/graph_optimize.h | 3 +++ 3 files changed, 39 insertions(+) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index beb7cd42..a0d598f3 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -731,6 +731,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model, uint64_t session_id) { GE_CHECK_NOTNULL(graph_node); GE_CHECK_NOTNULL(compute_graph); + + CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); + GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index c94408de..d2e45195 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -336,4 +336,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) { } return SUCCESS; } +Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) { + if (compute_graph == nullptr) { + GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr."); + return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; + } + + std::shared_ptr instance_ptr = ge::GELib::GetInstance(); + if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { + GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed."); + return GE_CLI_GE_NOT_INITIALIZED; + } + + auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority(); + GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size()); + Status ret = SUCCESS; + string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; + GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str()); + if (!graph_optimizer.empty()) { + for (auto &iter : graph_optimizer) { + if (iter.first == exclude_core_type || iter.second == nullptr) { + continue; + } + GELOGI("Begin to refine running format by engine %s", iter->first.c_str()); + ret = iter.second->OptimizeWholeGraph(*compute_graph); + GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first); + if (ret != SUCCESS) { + GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret); + return ret; + } + } + } + return ret; +} } // namespace ge diff --git a/ge/graph/optimize/graph_optimize.h b/ge/graph/optimize/graph_optimize.h index 78d580b7..3a1960f7 100755 --- a/ge/graph/optimize/graph_optimize.h +++ b/ge/graph/optimize/graph_optimize.h @@ -52,6 +52,9 @@ class GraphOptimize { // for fe prepare optimize in quantize scene Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph); + // for engine to optimize merged whole graph before ge Optimize2 + Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph); + // for rts optimize before build to add attr and insert memcpy op Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph); From 2f9b6f64e6707056d98a094238579c6d63b72a4a Mon Sep 17 00:00:00 2001 From: chuxing Date: Thu, 31 Dec 2020 16:55:32 +0800 Subject: [PATCH 23/54] Dynamic Inputs --- .../executor/hybrid_model_async_executor.cc | 43 ++++++++++--------- .../executor/hybrid_model_async_executor.h | 6 ++- inc/framework/common/ge_types.h | 1 + 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index ba717a2d..4d23cd55 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -98,10 +98,10 @@ Status HybridModelAsyncExecutor::Init() { return SUCCESS; } -Status HybridModelAsyncExecutor::PreRun(InputData ¤t_data) { +Status HybridModelAsyncExecutor::PreRun(InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args) { GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data"); RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End"); - GE_CHK_STATUS_RET(CopyInputData(current_data), "Failed to copy input data to model"); + GE_CHK_STATUS_RET(PrepareInputs(current_data, args), "Failed to copy input data to model"); RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End"); return SUCCESS; } @@ -126,14 +126,9 @@ Status HybridModelAsyncExecutor::RunInternal() { InputData current_data = data_wrapper->GetInput(); GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id_, current_data.index); - HybridModelExecutor::ExecuteArgs args; - args.inputs.resize(input_tensors_.size()); - for (auto &it : input_tensors_) { - args.inputs[it.first] = it.second; - } - RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_); - ret = PreRun(current_data); + HybridModelExecutor::ExecuteArgs args; + ret = PreRun(current_data, args); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput()); CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); @@ -202,7 +197,9 @@ Status HybridModelAsyncExecutor::SyncVarData() { return SUCCESS; } -Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { +Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args) { + args.inputs.resize(input_tensors_.size()); + args.input_desc.resize(input_tensor_desc_.size()); const std::vector &blobs = current_data.blobs; for (const auto &it : input_tensors_) { auto input_index = it.first; @@ -230,6 +227,13 @@ Status HybridModelAsyncExecutor::CopyInputData(const InputData ¤t_data) { data_buf.data, data_buf.length, RT_MEMCPY_HOST_TO_DEVICE)); + args.inputs[input_index] = input_tensor; + if (is_input_dynamic_[input_index]) { + auto &tensor_desc = input_tensor_desc_[input_index]; + tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); + args.input_desc[input_index] = tensor_desc; + GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); + } } return SUCCESS; @@ -240,7 +244,10 @@ Status HybridModelAsyncExecutor::InitInputTensors() { GE_CHECK_NOTNULL(allocator); int input_index = 0; for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) { - GELOGD("Init input[%u], node = %s", input_index, input_node->NodeName().c_str()); + GELOGD("Init input[%u], node = %s, is_dynamic = %d", + input_index, + input_node->NodeName().c_str(), + input_node->is_dynamic); auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex); GE_CHECK_NOTNULL(output_desc); int64_t tensor_size = 0; @@ -258,6 +265,8 @@ Status HybridModelAsyncExecutor::InitInputTensors() { TensorValue tensor(shared_ptr(buffer.release())); tensor.SetName("Input_" + input_node->NodeName()); input_tensors_.emplace(input_index, tensor); + input_tensor_desc_.emplace(input_index, output_desc); + is_input_dynamic_.push_back(input_node->is_dynamic); input_index += 1; } @@ -402,18 +411,12 @@ Status HybridModelAsyncExecutor::Execute(const vector &inputs, vector< buffer.data = const_cast(tensor.GetData().GetData()); buffer.length = tensor.GetData().size(); input_data.blobs.emplace_back(buffer); + input_data.shapes.emplace_back(tensor.GetTensorDesc().GetShape().GetDims()); } - GE_CHK_STATUS_RET(CopyInputData(input_data), "Failed to copy input data to model"); - GELOGD("Done copying input data successfully."); HybridModelExecutor::ExecuteArgs args; - args.inputs.resize(input_tensors_.size()); - args.input_desc.resize(input_tensors_.size()); - for (auto &it : input_tensors_) { - args.inputs[it.first] = it.second; - args.input_desc[it.first] = MakeShared(inputs[it.first].GetTensorDesc()); - } - + GE_CHK_STATUS_RET(PrepareInputs(input_data, args), "Failed to copy input data to model"); + GELOGD("Done copying input data successfully."); GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model."); std::vector output_tensor_info_list; diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h index 21833b0b..ad39cac5 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.h +++ b/ge/hybrid/executor/hybrid_model_async_executor.h @@ -70,9 +70,9 @@ class HybridModelAsyncExecutor { Status OnComputeDone(uint32_t data_index, uint32_t result_code, std::vector &outputs); - Status PreRun(InputData ¤t_data); + Status PreRun(InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); - Status CopyInputData(const InputData ¤t_data); + Status PrepareInputs(const InputData ¤t_data, HybridModelExecutor::ExecuteArgs &args); std::mutex mu_; HybridModel *model_; @@ -86,6 +86,8 @@ class HybridModelAsyncExecutor { rtStream_t stream_ = nullptr; std::map input_tensors_; + std::map input_tensor_desc_; + std::vector is_input_dynamic_; std::shared_ptr listener_; }; } // namespace hybrid diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 4267aec4..0bf8bb83 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -81,6 +81,7 @@ struct InputData { std::vector blobs; // Actual input data, currently only supports one input bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false std::string batch_label; // Gear used for current inference in dynamic batch scene + std::vector> shapes; // Input shapes }; /// Output result structure definition From 3bbe8c7d04824a0b206e115c8a1e3b46575ad8de Mon Sep 17 00:00:00 2001 From: "gengchao4@huawei.com" Date: Thu, 31 Dec 2020 16:58:17 +0800 Subject: [PATCH 24/54] add OptimizeWholeGraph --- ge/graph/optimize/graph_optimize.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index d2e45195..cd80a956 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -358,7 +358,7 @@ Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) { if (iter.first == exclude_core_type || iter.second == nullptr) { continue; } - GELOGI("Begin to refine running format by engine %s", iter->first.c_str()); + GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str()); ret = iter.second->OptimizeWholeGraph(*compute_graph); GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first); if (ret != SUCCESS) { From d14900380e895606e3fdbc84f8dcf056feca89ed Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Thu, 31 Dec 2020 16:50:14 +0800 Subject: [PATCH 25/54] fixed memory leak occurs when keep_dtype parse failed --- ge/offline/keep_dtype_option.cc | 29 +++++++++++++++++++---------- ge/offline/main.cc | 2 ++ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/ge/offline/keep_dtype_option.cc b/ge/offline/keep_dtype_option.cc index 348a6068..5624f21c 100644 --- a/ge/offline/keep_dtype_option.cc +++ b/ge/offline/keep_dtype_option.cc @@ -42,21 +42,29 @@ bool IsOriginalOpFind(OpDescPtr &op_desc, const std::string &op_name) { } void KeepDtypeReportError(const std::vector &invalid_list) { - std::stringstream error_ops; - for (size_t i = 0; i < invalid_list.size(); i++) { + std::stringstream err_msg; + size_t list_size = invalid_list.size(); + err_msg << "config file contains " << list_size; + if (list_size == 1) { + err_msg << " operator not in the graph, op name:"; + } else { + err_msg << " operators not in the graph, op names:"; + } + + for (size_t i = 0; i < list_size; i++) { if (i == kMaxOpsNum) { - error_ops << "..."; + err_msg << ".."; break; } - error_ops << invalid_list[i] << " "; + err_msg << invalid_list[i]; + if (i != list_size - 1) { + err_msg << " "; + } } - std::string err_msg = "config file contains "; - err_msg = err_msg.append(std::to_string(invalid_list.size())) - .append(" operators not in the graph, op names:") - .append(error_ops.str()); + ErrorManager::GetInstance().ATCReportErrMessage( - "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.c_str()}); - GELOGE(FAILED, "%s", err_msg.c_str()); + "E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.str().c_str()}); + GELOGE(FAILED, "%s", err_msg.str().c_str()); } Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) { @@ -96,6 +104,7 @@ Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep invalid_list.push_back(op_name); } } + ifs.close(); if (!invalid_list.empty()) { KeepDtypeReportError(invalid_list); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index ed67b913..14f7ae89 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -994,6 +994,8 @@ domi::Status GenerateModel(std::map &options, std::string output Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype); if (ret != SUCCESS) { + (void)ge_generator.Finalize(); + (void)ge::GELib::GetInstance()->Finalize(); return ret; } From f175fed5884e55fbdd321dd59ab6e1cf834d05eb Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 31 Dec 2020 18:01:47 +0800 Subject: [PATCH 26/54] Custom pass register. --- ge/graph/manager/graph_manager.cc | 4 ++-- metadef | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 706908af..84572d45 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -101,7 +101,7 @@ #include "graph/common/local_context.h" #include "graph/common/omg_util.h" #include "common/formats/utils/formats_trans_utils.h" -#include "external/register/register_pass.h" +#include "register/custom_pass_helper.h" namespace { const char *const kSummary = "Summary"; @@ -773,7 +773,7 @@ Status GraphManager::RunCustomPass(const GraphNodePtr &graph_node) { GE_TIMESTAMP_START(RunCustomPass); GraphPtr graph = std::const_pointer_cast(const_graph); - GE_CHK_STATUS_RET(CustomPassHelper::Instance()->Run(graph), "Graph[%s] run custom pass fail.", + GE_CHK_STATUS_RET(CustomPassHelper::Instance().Run(graph), "Graph[%s] run custom pass fail.", comp_graph->GetName().c_str()); GE_TIMESTAMP_END(RunCustomPass, "GraphBuilder::RunCustomPass"); return SUCCESS; diff --git a/metadef b/metadef index 37a90f0d..44bcbb5e 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 37a90f0dfd797306e99ec32a688be32a9ad835a4 +Subproject commit 44bcbb5ea25ada1a5393aa4c7f554d40b6859b18 From df0a3647628886f7369790b1ef1b07ed63975fd8 Mon Sep 17 00:00:00 2001 From: wxl Date: Thu, 31 Dec 2020 18:24:32 +0800 Subject: [PATCH 27/54] bugfix --- ge/ir_build/ge_ir_build.cc | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index c7ef6c1a..95fb6749 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -36,6 +36,9 @@ #include "model/ge_model.h" #include "graph/shape_refiner.h" #include "graph/opsproto_manager.h" +#include "inc/pass_manager.h" +#include "graph/passes/net_output_pass.h" +#include "graph/passes/data_pass.h" using std::string; using namespace std; @@ -233,6 +236,7 @@ class Impl { ModelBufferData &ge_models); graphStatus InitDomiOmgContext(const string &input_shape, const string &input_format, const string &net_format, bool is_dynamic_input); + static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph); void SetRtSocVersion(); void UpdateThreadContext(); void LoadOpsProto(); @@ -243,6 +247,22 @@ class Impl { OmgContext omg_context_; }; +static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph) { + GE_CHECK_NOTNULL(compute_graph); + + PassManager prepare_infershape; + prepare_infershape.AddPass("PrepareNetoutput", new(std::nothrow) NetOutputPass); + prepare_infershape.AddPass("PrepareSubGraphReflection", new (std::nothrow) DataPass); + + auto ret = prepare_infershape.Run(compute_graph); + if ((ret != SUCCESS) && (ret != NOT_CHANGED)) { + GELOGE(ret, "Prepair for infershape failed, ret:%d", ret); + return ret; + } + GELOGD("Prepair for infershape success!"); + return GRAPH_SUCCESS; +} + graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { GELOGD("Enter Update Data Attr Process!"); if (options_.find(kInputShape) == options_.end()) { @@ -591,7 +611,12 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { return GRAPH_PARAM_INVALID; } - auto ret = compute_graph->TopologicalSorting(); + auto ret = Impl::InferShapePrepare(root_graph); + if (ret != GRAPH_SUCCESS) { + return ret; + } + + ret = compute_graph->TopologicalSorting(); if (ret != GRAPH_SUCCESS) { GELOGE(ret, "Acl topo logical sort failed."); return ret; From 3aa7852a23f125e55a9725d2673ac02ab4689b5c Mon Sep 17 00:00:00 2001 From: chuxing Date: Thu, 31 Dec 2020 18:29:38 +0800 Subject: [PATCH 28/54] fix sc --- inc/framework/common/ge_types.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 0bf8bb83..7293de7e 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -73,14 +73,14 @@ struct DataBuffer { /// @brief External input data /// struct InputData { - uint32_t index; // Index of input data - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint32_t model_id; // Model ID required for data processing - uint64_t request_id = 0; // Request ID - std::vector blobs; // Actual input data, currently only supports one input - bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false - std::string batch_label; // Gear used for current inference in dynamic batch scene + uint32_t index; // Index of input data + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint32_t model_id; // Model ID required for data processing + uint64_t request_id = 0; // Request ID + std::vector blobs; // Actual input data, currently only supports one input + bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false + std::string batch_label; // Gear used for current inference in dynamic batch scene std::vector> shapes; // Input shapes }; From d2dfa7779888b672c208ddd17fc37524435b312e Mon Sep 17 00:00:00 2001 From: chuxing Date: Thu, 31 Dec 2020 18:55:06 +0800 Subject: [PATCH 29/54] fix sc --- inc/framework/common/ge_types.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 7293de7e..7854396c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -73,15 +73,15 @@ struct DataBuffer { /// @brief External input data /// struct InputData { - uint32_t index; // Index of input data - uint32_t timestamp; // Data creation time - uint32_t timeout; // Processing timeout - uint32_t model_id; // Model ID required for data processing - uint64_t request_id = 0; // Request ID - std::vector blobs; // Actual input data, currently only supports one input - bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false - std::string batch_label; // Gear used for current inference in dynamic batch scene - std::vector> shapes; // Input shapes + uint32_t index; // Index of input data + uint32_t timestamp; // Data creation time + uint32_t timeout; // Processing timeout + uint32_t model_id; // Model ID required for data processing + uint64_t request_id = 0; // Request ID + std::vector blobs; // Actual input data, currently only supports one input + bool is_dynamic_batch = false; // Whether is dynamic batch size scene, default:false + std::string batch_label; // Gear used for current inference in dynamic batch scene + std::vector> shapes; // Input shapes }; /// Output result structure definition From 29be15b8e21cf5e598769d1bc00107af02f6e0f6 Mon Sep 17 00:00:00 2001 From: wxl Date: Mon, 4 Jan 2021 11:01:01 +0800 Subject: [PATCH 30/54] bugfix --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 95fb6749..8423c8bb 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -611,7 +611,7 @@ graphStatus aclgrphInferShapeAndType(ge::Graph &graph) { return GRAPH_PARAM_INVALID; } - auto ret = Impl::InferShapePrepare(root_graph); + auto ret = Impl::InferShapePrepare(compute_graph); if (ret != GRAPH_SUCCESS) { return ret; } From 85f51c068757ed3d570aa1774e320884121ac9b0 Mon Sep 17 00:00:00 2001 From: wxl Date: Mon, 4 Jan 2021 11:16:34 +0800 Subject: [PATCH 31/54] bugfix:aclgrphInfershapAndType suppor subgraph --- ge/ir_build/ge_ir_build.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 8423c8bb..78a69392 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -247,7 +247,7 @@ class Impl { OmgContext omg_context_; }; -static graphStatus InferShapePrepare(const ComputeGraphPtr &compute_graph) { +graphStatus Impl::InferShapePrepare(const ComputeGraphPtr &compute_graph) { GE_CHECK_NOTNULL(compute_graph); PassManager prepare_infershape; From 6ee84a5afc9fe043e53c8a1b21f8587578d66c8d Mon Sep 17 00:00:00 2001 From: taoxudonghaha Date: Mon, 4 Jan 2021 14:28:33 +0800 Subject: [PATCH 32/54] solve msprofiler depend --- CMakeLists.txt | 12 ++++++------ ge/CMakeLists.txt | 47 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 776a3232..9194f119 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,7 +74,7 @@ if (ENABLE_OPEN_SRC) set(STATIC_ACL_LIB ${GE_LIB_PATH}) find_module(slog libslog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) - find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) + find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) find_module(hccl libhccl.so ${GE_LIB_PATH}) find_module(adump_server libadump_server.a ${GE_LIB_PATH}) find_module(runtime libruntime.so ${GE_LIB_PATH}) @@ -83,7 +83,7 @@ if (ENABLE_OPEN_SRC) find_module(error_manager liberror_manager.so ${GE_LIB_PATH}) find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${GE_LIB_PATH}) + find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${GE_LIB_PATH}) #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) elseif(ENABLE_GE_COV OR ENABLE_GE_UT) add_subdirectory(tests) @@ -97,7 +97,7 @@ if (ENABLE_OPEN_SRC) find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) if(PRODUCT STREQUAL "flr3") message(FATAL_ERROR "This platform is not supported in train mode, build terminated") @@ -109,7 +109,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) if(PRODUCT STREQUAL "flr3") elseif(PRODUCT STREQUAL "flr1") @@ -120,7 +120,7 @@ if (ENABLE_OPEN_SRC) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) endif() elseif(PLATFORM STREQUAL "all") - find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) + find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) @@ -128,7 +128,7 @@ if (ENABLE_OPEN_SRC) find_module(resource libresource.so ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) - find_module(msprofiler_fwk libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) + find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) #find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) else() diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 0325a7de..88a323f3 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -615,7 +615,24 @@ set(INFER_SRC_LIST if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) ############ libge_runner.so ############ -add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) +add_library(ge_runner SHARED + ${TRAIN_SRC_LIST} + ${PROTO_SRCS} + ${PROTO_CLIENT_SRCS} + $,msprofiler_fwk,msprofiler_fwk_object>> +) + +add_library(msprofiler_fwk_object OBJECT IMPORTED GLOBAL) + +if (msprofiler_fwk_ext_LIBRARY_DIR) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object) + execute_process( + COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object + ) + file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) + set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST}) +endif() target_compile_definitions(ge_runner PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 @@ -663,9 +680,6 @@ target_link_libraries(ge_runner PRIVATE ge_memory adump_server static_mmpa - -Wl,--whole-archive - msprofiler_fwk - -Wl,--no-whole-archive -Wl,--no-as-needed graph ge_common @@ -755,7 +769,7 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object) if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a) execute_process( COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object ) file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o) else() @@ -764,8 +778,21 @@ endif() add_library(opensrc_ascendcl SHARED ${OBJECT_LIST} + $,msprofiler,msprofiler_object>> ) +add_library(msprofiler_object OBJECT IMPORTED GLOBAL) + +if (msprofiler_ext_LIBRARY_DIR) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object) + execute_process( + COMMAND ar x ${msprofiler_ext_LIBRARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object + ) + file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o) + set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST}) +endif() + target_compile_definitions(opensrc_ascendcl PRIVATE google=ascend_private $<$:ONLY_COMPILE_OPEN_SRC> @@ -780,14 +807,7 @@ target_link_options(opensrc_ascendcl PRIVATE -Wl,--allow-multiple-definition -Wl,-z,muldefs -Wl,-Bsymbolic - -Wl,--exclude-libs,libascend_protobuf.a - -Wl,--exclude-libs,libge_executor.a - -Wl,--exclude-libs,libge_common.a - -Wl,--exclude-libs,libgraph.a - -Wl,--exclude-libs,libmmpa.a - -Wl,--exclude-libs,libregister.a - -Wl,--exclude-libs,liberror_manager.a - -Wl,--exclude-libs,libadump_server.a + -Wl,--exclude-libs,ALL ) target_link_libraries(opensrc_ascendcl PRIVATE -Wl,--whole-archive @@ -799,7 +819,6 @@ target_link_libraries(opensrc_ascendcl PRIVATE register_static error_manager_static adump_server - msprofiler -Wl,--no-whole-archive -Wl,--no-as-needed c_sec From 57386ebe8faf217e8e2d82e887a4c140dcca908d Mon Sep 17 00:00:00 2001 From: taoxudonghaha Date: Mon, 4 Jan 2021 14:45:17 +0800 Subject: [PATCH 33/54] solve msprofiler and delete ge_memory --- ge/CMakeLists.txt | 23 ++++++++++---- ge/graph/build/memory/CMakeLists.txt | 45 ---------------------------- 2 files changed, 17 insertions(+), 51 deletions(-) delete mode 100644 ge/graph/build/memory/CMakeLists.txt diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 88a323f3..8d9edb65 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -1,7 +1,6 @@ if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) add_subdirectory(common) add_subdirectory(plugin/engine) - add_subdirectory(graph/build/memory) add_subdirectory(ge_local_engine) add_subdirectory(host_cpu_engine) add_subdirectory(executor) @@ -342,6 +341,13 @@ set(TRAIN_SRC_LIST "analyzer/analyzer.cc" "ir_build/ge_ir_build.cc" "ir_build/atc_ir_common.cc" + "graph/build/memory/memory_assigner.cc" + "graph/build/memory/graph_mem_assigner.cc" + "graph/build/memory/binary_block_mem_assigner.cc" + "graph/build/memory/block_mem_assigner.cc" + "graph/build/memory/hybrid_mem_assigner.cc" + "graph/build/memory/max_block_mem_assigner.cc" + "graph/build/memory/var_mem_assign_util.cc" ) set(INFER_SRC_LIST @@ -611,6 +617,13 @@ set(INFER_SRC_LIST "graph/label/while_label_maker.cc" "graph/label/partitioned_call_label_maker.cc" "analyzer/analyzer.cc" + "graph/build/memory/memory_assigner.cc" + "graph/build/memory/graph_mem_assigner.cc" + "graph/build/memory/binary_block_mem_assigner.cc" + "graph/build/memory/block_mem_assigner.cc" + "graph/build/memory/hybrid_mem_assigner.cc" + "graph/build/memory/max_block_mem_assigner.cc" + "graph/build/memory/var_mem_assign_util.cc" ) if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) @@ -630,7 +643,7 @@ if (msprofiler_fwk_ext_LIBRARY_DIR) COMMAND ar x ${msprofiler_fwk_ext_LIBRARY_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object ) - file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) + file(GLOB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_fwk_object/*.o) set_property(TARGET msprofiler_fwk_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST}) endif() @@ -677,7 +690,6 @@ target_include_directories(ge_runner PRIVATE target_link_libraries(ge_runner PRIVATE $ - ge_memory adump_server static_mmpa -Wl,--no-as-needed @@ -742,7 +754,6 @@ target_include_directories(ge_compiler PRIVATE target_link_libraries(ge_compiler PRIVATE $ - ge_memory static_mmpa -Wl,--no-as-needed graph @@ -789,8 +800,8 @@ if (msprofiler_ext_LIBRARY_DIR) COMMAND ar x ${msprofiler_ext_LIBRARY_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object ) - file(GOLB MSPROFILER_FWK_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o) - set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_FWK_OBJECT_LIST}) + file(GLOB MSPROFILER_OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/msprofiler_object/*.o) + set_property(TARGET msprofiler_object PROPERTY IMPORTED_OBJECTS ${MSPROFILER_OBJECT_LIST}) endif() target_compile_definitions(opensrc_ascendcl PRIVATE diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt deleted file mode 100644 index f6f56a54..00000000 --- a/ge/graph/build/memory/CMakeLists.txt +++ /dev/null @@ -1,45 +0,0 @@ -set(SRC_LIST - "memory_assigner.cc" - "graph_mem_assigner.cc" - "binary_block_mem_assigner.cc" - "block_mem_assigner.cc" - "hybrid_mem_assigner.cc" - "max_block_mem_assigner.cc" - "var_mem_assign_util.cc" -) - -############ libge_memory.a ############ -add_library(ge_memory STATIC ${SRC_LIST}) - -target_compile_options(ge_memory PRIVATE - -Werror - -O2 - -fno-common -) - -target_compile_definitions(ge_memory PRIVATE - google=ascend_private - LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> -) - -target_link_libraries(ge_memory PRIVATE - $ - ascend_protobuf - c_sec -) - -target_include_directories(ge_memory PRIVATE - ${CMAKE_CURRENT_LIST_DIR} - ${GE_CODE_DIR}/ge - ${GE_CODE_DIR}/inc - ${GE_CODE_DIR}/inc/external - ${METADEF_DIR}/inc - ${METADEF_DIR}/inc/external - ${METADEF_DIR}/inc/external/graph - ${GE_CODE_DIR}/inc/framework - #### yellow zone #### - ${GE_CODE_DIR}/../inc - #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc -) From dd6996e2e952c05a80f2ca79ab37e1645f1a18a7 Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Mon, 4 Jan 2021 18:58:51 +0800 Subject: [PATCH 34/54] change switchn to case and add ut --- .../load/new_model_manager/davinci_model.cc | 181 +++--- .../load/new_model_manager/davinci_model.h | 16 +- .../load/new_model_manager/model_manager.cc | 12 +- .../load/new_model_manager/model_manager.h | 6 +- .../task_info/hccl_task_info.cc | 4 +- ge/graph/manager/graph_manager.cc | 6 +- .../common_subexpression_elimination_pass.cc | 6 +- ge/graph/passes/multi_batch_clone_pass.cc | 553 +++++++++++++++--- ge/graph/passes/multi_batch_clone_pass.h | 58 +- ge/graph/passes/unused_args_clean_pass.cc | 4 + ge/graph/preprocess/multi_batch_copy_graph.cc | 12 +- ge/graph/preprocess/multi_batch_options.cc | 5 +- inc/framework/omg/omg_inner_types.h | 3 + metadef | 2 +- parser | 2 +- tests/ut/ge/CMakeLists.txt | 1 + .../ge/graph/load/davinci_model_unittest.cc | 101 ++++ .../passes/multi_batch_clone_pass_unittest.cc | 247 ++++++++ 18 files changed, 1016 insertions(+), 203 deletions(-) create mode 100644 tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index f3d6f82b..706d4b3b 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -87,6 +87,7 @@ const uint32_t kDumpL1FusionOpMByteSize = 2097152; // 2 * 1024 * 1024 const uint32_t kDumpFlagOfL1Fusion = 0; const char *const kDefaultBatchLable = "Batch_default"; const char *const kGetDynamicDimsName = "ascend_mbatch_get_dynamic_dims_node"; +const char *const kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; const int32_t kInvalidStream = -1; const uint32_t kEndOfSequence = 0x0704000a; const uint32_t kEndOfSequenceNew = 507005; @@ -867,6 +868,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } + if (InitRealSizeAndShapeInfo(compute_graph, node) != SUCCESS) { + GELOGE(PARAM_INVALID, "Init real size and shape failed, Name: %s", op_desc->GetName().c_str()); + return PARAM_INVALID; + } continue; } @@ -1143,16 +1148,24 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & real_virtual_addrs_.insert(real_addr); } } + return SUCCESS; +} +Status DavinciModel::InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node) { + if (node->GetName().find(kMultiBatchNodePostfix) != string::npos) { + GELOGD("No need to get size and shape of netoutput in subgraph."); + return SUCCESS; + } + GELOGD("Start init real size and shape info of %s.", node->GetName().c_str()); GetAllGearsInfo(node); if (is_getnext_sink_dynamic_) { GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get info of getdynamicdims node."); return PARAM_INVALID;); } if (is_online_infer_dynamic_) { - GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(input_count, node) != SUCCESS, + GE_IF_BOOL_EXEC(GetGearAndRealOutSizeInfo(compute_graph, node) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get gear and real out size info."); return PARAM_INVALID;); - GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, + GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(compute_graph, node) != SUCCESS, GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); } @@ -1171,7 +1184,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) { if (shape_str.empty()) { continue; } - std::vector gear_info; + std::vector gear_info; std::vector dims = ge::StringUtils::Split(shape_str, ','); for (const auto &dim : dims) { if (dim.empty()) { @@ -1187,6 +1200,7 @@ void DavinciModel::GetAllGearsInfo(const NodePtr &node) { } } } + Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) { GE_CHECK_NOTNULL(node->GetOpDesc()); size_t input_count = node->GetAllInDataAnchors().size(); @@ -1224,11 +1238,11 @@ Status DavinciModel::GetGetDynamicDimsNodeInfo(const NodePtr &node) { return SUCCESS; } -Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node) { - GELOGD("Start get gear and real output size info of %s, input count is %zu.", node->GetName().c_str(), input_count); +Status DavinciModel::GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node) { + GELOGD("Start get gear and real output size info of %s.", node->GetName().c_str()); merge_nodes_gear_and_real_out_size_info_.clear(); - for (size_t idx = 0; idx < input_count; ++idx) { - auto in_anchor = node->GetAllInDataAnchors().at(idx); + size_t idx = 0; + for (const auto &in_anchor : node->GetAllInDataAnchors()) { auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); if (peer_out_anchor == nullptr) { continue; @@ -1236,89 +1250,106 @@ Status DavinciModel::GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr auto peer_node = peer_out_anchor->GetOwnerNode(); auto op_desc = peer_node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); - if ((peer_node->GetType() == MERGE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { - if (GetRealOutputSizeOfMerge(idx, peer_node) != SUCCESS) { + if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { + if (GetRealOutputSizeOfCase(graph, idx, peer_node) != SUCCESS) { GELOGE(PARAM_INVALID, "Get real output size of %s failed.", peer_node->GetName().c_str()); return PARAM_INVALID; } } + idx++; } return SUCCESS; } -Status DavinciModel::GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node) { - GELOGD("Start get output size of %s, which is %zu input to netoutput.", merge_node->GetName().c_str(), input_index); - std::map, int64_t> gear_and_real_out_size_info; - for (auto &in_anchor : merge_node->GetAllInDataAnchors()) { - auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); - if (peer_out_anchor == nullptr) { - continue; - } - auto in_node = peer_out_anchor->GetOwnerNode(); - GELOGD("Input node of merge is %s.", in_node->GetName().c_str()); - auto op_desc = in_node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - string batch_label; - if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { - size_t batch_index = static_cast(stoi(batch_label.substr(batch_label.rfind('_') + 1))); - GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index); - if (batch_index > all_gears_info_.size()) { - GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid."); - return PARAM_INVALID; - } - - const vector output_size_list = ModelUtils::GetOutputSize(op_desc); - int output_index = ge::AnchorUtils::GetIdx(peer_out_anchor); - auto tensor_desc = op_desc->GetOutputDescPtr(output_index); - GE_CHECK_NOTNULL(tensor_desc); - int64_t data_size = 0; - if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Get tensor size in bytes failed."); - return FAILED; +Status DavinciModel::GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, + const NodePtr &case_node) { + GELOGD("Start get output size of %s, which is %zu input to netoutput.", case_node->GetName().c_str(), input_index); + const auto &func_desc = case_node->GetOpDesc(); + GE_CHECK_NOTNULL(func_desc); + std::map, int64_t> gear_and_real_out_size_info; + for (const auto &name : func_desc->GetSubgraphInstanceNames()) { + const auto &subgraph = graph->GetSubgraph(name); + if (subgraph == nullptr) { + GELOGE(GE_GRAPH_EMPTY_SUBGRAPH, "Subgraph not found, name: %s.", name.c_str()); + return GE_GRAPH_EMPTY_SUBGRAPH; + } + for (auto &node : subgraph->GetDirectNode()) { + if (node->GetType() == NETOUTPUT) { + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + string batch_label; + if (AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label)) { + size_t batch_index = static_cast(stoi(batch_label.substr(batch_label.rfind('_') + 1))); + GELOGD("Batch index of %s is %zu.", op_desc->GetName().c_str(), batch_index); + if (batch_index > all_gears_info_.size()) { + GELOGE(PARAM_INVALID, "The value of ATTR_NAME_BATCH_LABEL is invalid."); + return PARAM_INVALID; + } + + const vector input_size_list = ModelUtils::GetInputSize(op_desc); + auto tensor_desc = op_desc->GetInputDescPtr(input_index); + GE_CHECK_NOTNULL(tensor_desc); + int64_t data_size = 0; + if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, data_size) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Get tensor size in bytes failed."); + return FAILED; + } + gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size; + GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld", + batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(), + input_size_list[input_index], data_size); + } + break; } - gear_and_real_out_size_info[all_gears_info_[batch_index]] = data_size; - GELOGD("Get real gear index is: %zu, gear info is %s, size is %ld, tensor size is %ld", - batch_index, formats::JoinToString(all_gears_info_[batch_index]).c_str(), - output_size_list[output_index], data_size); } } merge_nodes_gear_and_real_out_size_info_[input_index] = gear_and_real_out_size_info; return SUCCESS; } -Status DavinciModel::GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc) { - GELOGD("Start to get dynamic output dims of %s.", op_desc->GetName().c_str()); +Status DavinciModel::GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node) { + GELOGD("Start to get dynamic output dims of %s.", node->GetName().c_str()); merge_nodes_gear_and_real_out_shape_info_.clear(); - std::vector dynamic_output_shape_info; - if (!AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { - GELOGD("Can not get dynamic output dims attr"); - return SUCCESS; - } - GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str()); - std::vector> dynamic_output_shape; - ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape); - // idx: input_index to netoutput - for (size_t idx = 0; idx < input_count; ++idx) { - std::map, vector> gear_and_real_out_shape_info; - for (auto &it : dynamic_output_shape) { - auto gear_index = static_cast(it[0]); - if (gear_index > all_gears_info_.size()) { - GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast(it[0])); - return PARAM_INVALID; + size_t idx = 0; + for (const auto &in_anchor : node->GetAllInDataAnchors()) { + auto peer_out_anchor = in_anchor->GetPeerOutAnchor(); + if (peer_out_anchor == nullptr) { + continue; + } + auto peer_node = peer_out_anchor->GetOwnerNode(); + auto op_desc = peer_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if ((peer_node->GetType() == CASE) && (op_desc->HasAttr(ATTR_INSERT_BY_MBATCH))) { + std::vector dynamic_output_shape_info; + if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { + GELOGD("Can not get dynamic output dims attr from %s.", node->GetName().c_str()); + return SUCCESS; } + GELOGI("Dynamic output shape info is %s", formats::JoinToString(dynamic_output_shape_info).c_str()); + std::vector> dynamic_output_shape; + ParseDynamicOutShape(dynamic_output_shape_info, dynamic_output_shape); + std::map, vector> gear_and_real_out_shape_info; + for (auto &it : dynamic_output_shape) { + auto gear_index = static_cast(it[0]); + if (gear_index > all_gears_info_.size()) { + GELOGE(PARAM_INVALID, "The value of cur index: %zu is invalid.", static_cast(it[0])); + return PARAM_INVALID; + } - if (static_cast(it[1]) == idx) { - vector output_shape; - for (size_t i = 2; i < it.size(); ++i) { - output_shape.emplace_back(it[i]); + if (static_cast(it[1]) == idx) { + vector output_shape; + for (size_t i = 2; i < it.size(); ++i) { + output_shape.emplace_back(it[i]); + } + gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape; + GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.", + gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(), + formats::JoinToString(output_shape).c_str()); } - gear_and_real_out_shape_info[all_gears_info_[gear_index]] = output_shape; - GELOGD("Get real gear index is: %zu, gear info is %s, output shape is %s.", - gear_index, formats::JoinToString(all_gears_info_[gear_index]).c_str(), - formats::JoinToString(output_shape).c_str()); } + merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info; } - merge_nodes_gear_and_real_out_shape_info_[idx] = gear_and_real_out_shape_info; + idx++; } return SUCCESS; } @@ -1962,7 +1993,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO uint32_t &format_result) { /// netoutput input tensor desc GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); - return ); + return); Format format = op_desc->GetInputDescPtr(index)->GetFormat(); GeShape shape = op_desc->GetInputDescPtr(index)->GetShape(); DataType data_type = op_desc->GetInputDescPtr(index)->GetDataType(); @@ -2567,7 +2598,7 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); cur_dynamic_dims_.clear(); cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); - auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), + auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int32_t), netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); GE_CHK_RT_RET(ret); } @@ -2668,11 +2699,11 @@ void *DavinciModel::Run(DavinciModel *model) { GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); auto shape_data_buffer_data = current_data.blobs.back().data; auto shape_data_buffer_length = current_data.blobs.back().length; - model->cur_dynamic_dims_.assign(reinterpret_cast(shape_data_buffer_data), - reinterpret_cast(shape_data_buffer_data) + - shape_data_buffer_length / sizeof(int64_t)); + model->cur_dynamic_dims_.assign(reinterpret_cast(shape_data_buffer_data), + reinterpret_cast(shape_data_buffer_data) + + shape_data_buffer_length / sizeof(int32_t)); GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); - delete[] reinterpret_cast(current_data.blobs.back().data); + delete[] reinterpret_cast(current_data.blobs.back().data); current_data.blobs.pop_back(); } GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 6b930b05..9ff59d4e 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -864,11 +864,13 @@ class DavinciModel { void ParseDynamicOutShape(const vector &str_info, vector> &vec_info); bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); + + Status InitRealSizeAndShapeInfo(const ComputeGraphPtr &compute_graph, const NodePtr &node); void GetAllGearsInfo(const NodePtr &node); Status GetGetDynamicDimsNodeInfo(const NodePtr &node); - Status GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node); - Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); - Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); + Status GetGearAndRealOutSizeInfo(const ComputeGraphPtr &graph, const NodePtr &node); + Status GetRealOutputSizeOfCase(const ComputeGraphPtr &graph, size_t input_index, const NodePtr &case_node); + Status GetGearAndRealOutShapeInfo(const ComputeGraphPtr &graph, const NodePtr &node); bool is_weight_mem_has_inited_; bool is_feature_map_mem_has_inited_; @@ -1021,15 +1023,15 @@ class DavinciModel { bool is_new_model_desc_{false}; bool is_online_infer_dynamic_ = false; bool is_getnext_sink_dynamic_ = false; - vector cur_dynamic_dims_; + vector cur_dynamic_dims_; void *netoutput_last_input_addr_ = nullptr; int64_t netoutput_last_input_size_ = 0; size_t shape_of_cur_dynamic_dims_ = 0; // key: input_index: input is merge node; value: each gear info and each output size - map, int64_t>> merge_nodes_gear_and_real_out_size_info_; + map, int64_t>> merge_nodes_gear_and_real_out_size_info_; // key: input_index: input is merge node; value: each gear info and each output shape - map, vector>> merge_nodes_gear_and_real_out_shape_info_; - vector> all_gears_info_; + map, vector>> merge_nodes_gear_and_real_out_shape_info_; + vector> all_gears_info_; multimap op_id_map_; vector profile_list_; diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index 6f923236..b2cce73a 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -460,8 +460,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d Status ModelManager::GetCurDynamicDims(const vector> &user_real_input_dims, const vector>> &user_input_dims, - vector &cur_dynamic_dims) { - GELOGD(" Start get cur dynamic dims."); + vector &cur_dynamic_dims) { + GELOGD("Start get cur dynamic dims."); if (user_real_input_dims.size() != user_input_dims.size()) { GELOGE(INTERNAL_ERROR, "The input count of user: %zu should be equal to the data count of graph: %zu", @@ -478,7 +478,7 @@ Status ModelManager::GetCurDynamicDims(const vector> &user_real_ } for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) { if (user_input_dims.at(i).second.at(j) < 0) { - cur_dynamic_dims.emplace_back(user_real_input_dims[i][j]); + cur_dynamic_dims.emplace_back(static_cast(user_real_input_dims[i][j])); } } } @@ -523,7 +523,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector cur_dynamic_dims; + std::vector cur_dynamic_dims; if (!GetLocalOmgContext().user_real_input_dims.empty()) { if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims, cur_dynamic_dims) != SUCCESS) { @@ -531,9 +531,9 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector(cur_dynamic_dims.size() * sizeof(int64_t)); + uint32_t length = static_cast(cur_dynamic_dims.size() * sizeof(int32_t)); GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR, "Failed to memcpy data."); data.length = length; diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 088ea5fd..500cad31 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -126,14 +126,14 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// /// @ingroup domi_ome /// @brief Get cur_dynamic_dims for all input. - /// @param [in] vector> &user_real_input_dims: dims info of all user_inputs. + /// @param [in] vector> &user_real_input_dims: dims info of all user_inputs. /// @param [in] vector>> &user_input_dims: key:name. value:dynamic dims from option. - /// @param [out] vector &cur_dynamic_dims: real dims gather, where the index of -1. + /// @param [out] vector &cur_dynamic_dims: real dims gather, where the index of -1. /// @return 0: SUCCESS / others: INTERNAL_ERROR /// Status GetCurDynamicDims(const vector> &user_real_input_dims, const vector>> &user_input_dims, - vector &cur_dynamic_dims); + vector &cur_dynamic_dims); /// /// @ingroup domi_ome diff --git a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc index df43fd5b..8033c93e 100644 --- a/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/hccl_task_info.cc @@ -145,7 +145,9 @@ Status HcclTaskInfo::SetFollowStream(const ge::ConstOpDescPtr &op_desc, DavinciM } else { GELOGI("need to reuse follow stream and create new follow stream."); size_t created_stream_num = follow_stream_usage.size(); - hccl_stream_list_ = follow_stream_usage; + for (const auto &stream : follow_stream_usage) { + hccl_stream_list_.emplace_back(stream); + } ret = CreateStream(hccl_stream_num - created_stream_num, davinci_model, main_stream_id); if (ret != SUCCESS) { GELOGE(RT_FAILED, "Create hccl stream failed."); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 6372a018..38de6ff7 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -2780,8 +2780,10 @@ Status GraphManager::ParseInputsDims(const std::vector &input_t if (!GetLocalOmgContext().dynamic_node_type.empty()) { vector data_nodes; vector getnext_nosink_nodes; - data_nodes = compute_graph_->TryGetExtAttr(kExtAttrDataNodes, data_nodes); - getnext_nosink_nodes = compute_graph_->TryGetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes); + data_nodes = GetLocalOmgContext().data_nodes; + getnext_nosink_nodes = GetLocalOmgContext().getnext_nosink_nodes; + GELOGD("Data nodes count is %zu, getnext nosink nodes count is %zu.", data_nodes.size(), + getnext_nosink_nodes.size()); if (GetLocalOmgContext().dynamic_node_type == DATA) { if (getnext_nosink_nodes.empty()) { // just data or data+getnext_sink diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index a4662d5d..7d9724fc 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -26,6 +26,10 @@ namespace ge { namespace { +std::set un_compute_attrs = { + {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES}, +}; + std::string GetCseKey(const NodePtr &node) { std::stringstream ss; ss << node->GetType() << "-data-inputs-"; @@ -49,7 +53,7 @@ std::string GetCseKey(const NodePtr &node) { ss << name << "-"; } - ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); + ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs); return ss.str(); } diff --git a/ge/graph/passes/multi_batch_clone_pass.cc b/ge/graph/passes/multi_batch_clone_pass.cc index f8451ace..b7efa070 100755 --- a/ge/graph/passes/multi_batch_clone_pass.cc +++ b/ge/graph/passes/multi_batch_clone_pass.cc @@ -25,31 +25,65 @@ #include "graph/utils/tensor_utils.h" #include "graph/utils/type_utils.h" #include "register/op_registry.h" +#include "graph/common/omg_util.h" namespace ge { namespace { constexpr uint8_t kDataInIndex = 0; constexpr uint8_t kDataOutIndex = 0; constexpr uint8_t kCaseArgIndex = 1; +const int kDivisionConst = 2; +const size_t kNumOfGetnextNode = 1; const std::string kMultiBatchCaseNode = "ascend_mbatch_shape_case"; const std::string kMultiBatchDataNode = "ascend_mbatch_shape_data"; +const std::string kMultiBatchGetDynamicDimsNode = "ascend_mbatch_get_dynamic_dims_node"; const std::string kMultiBatchConstNode = "ascend_mbatch_shape_const"; const std::string kMultiBatchMapIndexNode = "ascend_mbatch_shape_mapindex"; const std::string kMultiBatchNodePostfix = "_ascend_mbatch_batch_"; +const char *const kGetNextName = "IteratorV2"; } // namespace +inline bool IsGetNextType(const NodePtr &node) { + std::string original_type; + GE_IF_BOOL_EXEC(GetOriginalType(node, original_type) != SUCCESS, + GELOGW("Get original type failed."); return false); + return (original_type == kGetNextName); +} + Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { + GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(FAILED, "Original graph is nullptr"); return FAILED); if (graph->GetParentGraph() != nullptr) { GELOGD("Subgraph %s skip the MultiBatchClonePass", graph->GetName().c_str()); return SUCCESS; } - + if (!GetLocalOmgContext().need_multi_batch) { + GELOGI("No need to process_multi for no_train graph."); + return SUCCESS; + } + std::vector data_nodes; + std::vector getnext_nosink_nodes; + std::vector getnext_sink_nodes; + if (multibatch::CheckSequenceOfOptions(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] CheckSequenceOfOptions failed."); + return PARAM_INVALID; + } + if (multibatch::UpdateNameOfInputShape(graph, data_nodes, getnext_nosink_nodes, getnext_sink_nodes) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] UpdateNameForInputShapeOfOption failed."); + return PARAM_INVALID; + } + if (multibatch::DeleteIdentityInsertByAdapter(graph) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] DeleteIdentityInsertByAdapter failed."); + return PARAM_INVALID; + } if (!multibatch::InitDynamicParams(batch_shapes_)) { GELOGD("There is no multi-batch options, no need clone multi-batch graph"); return SUCCESS; } - + if (multibatch::CheckNegativeCountOfOptions(batch_shapes_) != SUCCESS) { + GELOGE(PARAM_INVALID, "[Train_Dynamic] Input_shape and dynamic_dims should set correct params."); + return PARAM_INVALID; + } GELOGD("Begin to run Multi-batch clone on graph: %s", graph->GetName().c_str()); GE_CHK_STATUS_RET(multibatch::CheckDynamicParams(batch_shapes_), "Invalid multi-batch param"); if (CollectIoNodes(graph) != SUCCESS) { @@ -66,21 +100,14 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { (void)AttrUtils::GetStr(graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); ComputeGraphPtr branch = MakeShared(graph->GetName()); - if (branch == nullptr) { - GELOGE(OUT_OF_MEMORY, "Create multi-batch graph failed"); - return OUT_OF_MEMORY; - } + GE_IF_BOOL_EXEC(branch == nullptr, GELOGE(OUT_OF_MEMORY, "Create multi batch graph failed"); return OUT_OF_MEMORY); (void)AttrUtils::SetStr(branch, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id_); graph->InValid(); // Will modify, need topological again. graph->Swap(*branch); - if (CreateRootGraph(graph) != SUCCESS) { - return FAILED; - } - - if (CreateSubgraphs(graph, branch) != SUCCESS) { - return FAILED; - } + GE_CHK_STATUS_RET(CreateRootGraph(graph), "Construct root graph failed."); + GE_CHK_STATUS_RET(CreateOriGraph(branch), "Construct original graph failed.") + GE_CHK_STATUS_RET(CreateSubgraphs(graph, branch), "Construct subgraph failed."); GE_CHK_STATUS_RET(PruneDirectOutput(graph), "Prune direct output failed"); GELOGD("MultiBatchClonePass Leave"); @@ -95,9 +122,13 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { /// Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { for (const auto &node : graph->GetDirectNode()) { + if (!GetLocalOmgContext().dynamic_node_type.empty() && IsGetNextType(node)) { + all_data_nodes_.emplace_back(node); + GE_CHK_STATUS_RET(InitParamsOfGetNext(node), "Init params of %s failed.", node->GetName().c_str()); + } if (node->GetType() == DATA) { all_data_nodes_.emplace_back(node); - } else if (node->GetType() == CONSTANT) { + } else if (node->GetType() == CONSTANT || node->GetType() == CONSTANTOP) { all_const_nodes_.emplace_back(node); } else if (node->GetType() == NETOUTPUT) { all_output_nodes_.emplace_back(node); @@ -114,10 +145,16 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { } int64_t data_index = 0; + size_t getnext_node_count = 0; for (size_t i = 0; i < all_data_nodes_.size(); ++i) { + if (IsGetNextType(all_data_nodes_[i])) { + // just one getnext node in graph + getnext_node_count++; + continue; + } const auto &op_desc = all_data_nodes_[i]->GetOpDesc(); if (!AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { - (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i); + (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, i - getnext_node_count); } } @@ -133,7 +170,43 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { "Remove edge failed"); } } + GELOGD("Data count is %zu, const count is %zu, getnext count is %zu, output count is %zu, direct out count is %zu.", + all_data_nodes_.size(), all_const_nodes_.size(), getnext_node_count, all_output_nodes_.size(), + direct_output_.size()); + + return SUCCESS; +} +Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { + data_count_from_getnext_ = 0; + getnext_sink_dynamic_dims_ = false; + GE_CHECK_NOTNULL(node->GetOpDesc()); + data_count_from_getnext_ = node->GetOpDesc()->GetOutputsSize(); + if (GetLocalOmgContext().dynamic_node_type == GETNEXT) { + data_count_from_getnext_ = data_count_from_getnext_ / kDivisionConst; + for (size_t i = 0; i < data_count_from_getnext_; ++i) { + GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(i); + GELOGD("The %zu data shape from getnext sink is %s.", i, + formats::JoinToString(output_desc.GetShape().GetDims()).c_str()); + const auto &dims = output_desc.GetShape().GetDims(); + if (std::all_of(dims.begin(), dims.end(), [](int64_t val) {return val >= 0; })) { + GELOGD("The %zu data from %s is static.", i, node->GetName().c_str()); + } else { + getnext_sink_dynamic_dims_ = true; + GELOGD("Dynamic dims in the pattern of getnext sink."); + } + } + } + if (node->GetOutControlAnchor() != nullptr) { + for (const auto &peer_in_control_anchor : node->GetOutControlAnchor()->GetPeerInControlAnchors()) { + NodePtr next_node = peer_in_control_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(next_node); + if (next_node->GetType() == CONSTANTOP) { + out_control_nodes_.insert(next_node); + GELOGD("Control edge: %s connect with %s.", node->GetName().c_str(), next_node->GetName().c_str()); + } + } + } return SUCCESS; } @@ -144,7 +217,11 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { + GELOGD("Start create root graph of %s.", graph->GetName().c_str()); uint32_t input_num = all_data_nodes_.size() + all_const_nodes_.size(); + if (data_count_from_getnext_ != 0) { + input_num = input_num + data_count_from_getnext_ - kNumOfGetnextNode; + } uint32_t output_num = all_output_nodes_[0]->GetAllInDataAnchorsSize(); OpDescBuilder op_builder(kMultiBatchCaseNode, CASE); @@ -185,6 +262,10 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { op_desc->GetName().c_str()); return FAILED; } + if (!AttrUtils::SetBool(op_desc, ATTR_INSERT_BY_MBATCH, true)) { + GELOGE(INTERNAL_ERROR, "Failed to add insert attr on case node %s", op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } GE_CHK_STATUS_RET(multibatch::StampDynamicType(op_desc), "Set dynamic type failed"); GE_CHK_STATUS_RET(CreateIndexNode(graph), "Create index node failed"); @@ -202,7 +283,7 @@ Status MultiBatchClonePass::CreateRootGraph(const ComputeGraphPtr &graph) { /// @param [in] NodePtr node: index data node. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node) { +Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node) { const OpDescPtr data_desc = MakeShared(kMultiBatchDataNode, DATA); if (data_desc == nullptr) { GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); @@ -220,11 +301,12 @@ Status MultiBatchClonePass::CreateIndexDataNode(const ComputeGraphPtr &graph, No } size_t data_index = all_data_nodes_.size(); + data_index = data_count_from_getnext_ != 0 ? data_index - kNumOfGetnextNode : data_index; (void)AttrUtils::SetInt(data_desc, ATTR_NAME_INDEX, data_index); (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true); - node = graph->AddNode(data_desc); - if (node == nullptr) { + shape_node = graph->AddNode(data_desc); + if (shape_node == nullptr) { GELOGE(OUT_OF_MEMORY, "Create multi-batch data node failed"); return OUT_OF_MEMORY; } @@ -286,15 +368,19 @@ Status MultiBatchClonePass::CreateIndexConstNode(const ComputeGraphPtr &graph, N /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { - // Data --> MapIndex --> Case - NodePtr data_node; - GE_CHK_STATUS_RET(CreateIndexDataNode(graph, data_node), "Create data node failed"); + // Data/GetDynamicDims --> MapIndex --> Case + if (!getnext_sink_dynamic_dims_) { + GE_CHK_STATUS_RET(CreateIndexDataNode(graph, shape_node_), "Create data node failed"); + } else { + GE_CHK_STATUS_RET(CreateGetDynamicDimsNode(graph, shape_node_), "Create get dynamic dims node failed"); + } NodePtr const_node; GE_CHK_STATUS_RET(CreateIndexConstNode(graph, const_node), "Create const node failed"); - + GELOGD("Shape node name is %s, type is %s, const node name is %s.", shape_node_->GetName().c_str(), + shape_node_->GetType().c_str(), const_node->GetName().c_str()); OpDescBuilder op_builder(kMultiBatchMapIndexNode, "MapIndex"); - op_builder.AddInput("x", data_node->GetOpDesc()->GetOutputDesc(0)) + op_builder.AddInput("x", shape_node_->GetOpDesc()->GetOutputDesc(0)) .AddInput("data_seq", const_node->GetOpDesc()->GetOutputDesc(0)) .AddOutput("y", GeTensorDesc(GeShape(), FORMAT_ND, DT_INT32)); @@ -309,8 +395,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { return OUT_OF_MEMORY; } - if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", data_node->GetName().c_str(), + GE_CHK_STATUS_RET(AddAttrForGetDynamicDims(shape_node_), "Failed to add attr for %s.", + shape_node_->GetName().c_str()); + if (GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(0), index_node->GetInDataAnchor(0)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Failed to add edge between node:%s to MapIndex:%s", shape_node_->GetName().c_str(), index_node->GetName().c_str()); return FAILED; } @@ -328,6 +416,120 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { return SUCCESS; } +Status MultiBatchClonePass::CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node) { + const OpDescPtr data_desc = MakeShared(kMultiBatchGetDynamicDimsNode, GETDYNAMICDIMS); + if (data_desc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Create multi-batch get dynamic dims node failed"); + return OUT_OF_MEMORY; + } + + // input of GetDynamicDims is shape_of_each_data, output is gear_info + for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) { + size_t input_shape_dims = GetLocalOmgContext().user_input_dims.at(i).second.size(); + // add input desc without GeShape for const input, value of input_shape is 1 transferred by adapter + if (input_shape_dims == 1 && GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) { + GeTensorDesc tensor_desc; + tensor_desc.SetFormat(FORMAT_ND); + tensor_desc.SetDataType(DT_INT32); + auto ret = data_desc->AddInputDesc(tensor_desc); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); + return FAILED); + continue; + } + GeTensorDesc tensor_desc(GeShape({static_cast(input_shape_dims)}), FORMAT_ND, DT_INT32); + auto ret = data_desc->AddInputDesc(tensor_desc); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add input desc for created data"); + return FAILED); + } + GeTensorDesc tensor_desc(GeShape({static_cast(batch_shapes_.at(0).size())}), FORMAT_ND, DT_INT32); + auto ret = data_desc->AddOutputDesc(tensor_desc); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to add output desc for created data"); + return FAILED); + + (void)AttrUtils::SetBool(data_desc, ATTR_INSERT_BY_MBATCH, true); + + shape_node = graph->AddNode(data_desc); + if (shape_node == nullptr) { + GELOGE(OUT_OF_MEMORY, "Create multi-batch dynamic dims node failed"); + return OUT_OF_MEMORY; + } + return SUCCESS; +} + +Status MultiBatchClonePass::AddAttrForGetDynamicDims(const NodePtr &shape_node) { + if (!getnext_sink_dynamic_dims_) { + GELOGD("No need to add attr when not insert get dynamic dims node."); + return SUCCESS; + } + GELOGD("Add attr for :%s, type is %s:", shape_node->GetName().c_str(), shape_node->GetType().c_str()); + if (!AttrUtils::SetInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_DATA_COUNT, data_count_from_getnext_)) { + GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_DATA_COUNT failed"); + return INTERNAL_ERROR; + } + vector shape_info; + for (size_t i = 0; i < GetLocalOmgContext().user_input_dims.size(); ++i) { + if (GetLocalOmgContext().user_input_dims.at(i).second.size() == 1 && + GetLocalOmgContext().user_input_dims.at(i).second.at(0) == 0) { + shape_info.emplace_back(0); + continue; + } + shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.size()); + for (size_t j = 0; j < GetLocalOmgContext().user_input_dims.at(i).second.size(); ++j) { + shape_info.emplace_back(GetLocalOmgContext().user_input_dims.at(i).second.at(j)); + } + } + if (!AttrUtils::SetListInt(shape_node->GetOpDesc(), ATTR_GETNEXT_SINK_SHAPE_INFO, shape_info)) { + GELOGE(INTERNAL_ERROR, "set ATTR_GETNEXT_SINK_SHAPE_INFO failed"); + return INTERNAL_ERROR; + } + return SUCCESS; +} + +Status MultiBatchClonePass::LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node) { + GELOGD("Start relink shape anchor of %s to %s.", getnext_node->GetName().c_str(), shape_node->GetName().c_str()); + size_t input_index = 0; + size_t data_count = getnext_node->GetAllOutDataAnchors().size() / kDivisionConst; + for (size_t out_index = data_count; out_index < getnext_node->GetAllOutDataAnchors().size(); ++out_index, + ++input_index) { + GELOGD("Start add %s of %zu out_anchor to %s of %zu in_anchor.", getnext_node->GetName().c_str(), out_index, + shape_node->GetName().c_str(), input_index); + auto out_data_anchor = getnext_node->GetOutDataAnchor(out_index); + auto ret = GraphUtils::AddEdge(out_data_anchor, shape_node->GetInDataAnchor(input_index)); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link getnext %s to getdynamicdims %s", + getnext_node->GetName().c_str(), shape_node->GetName().c_str()); + return INTERNAL_ERROR); + } + return SUCCESS; +} + +Status MultiBatchClonePass::LinkGetDynamicDimsToNetOutput(const NodePtr &output_node) { + if (!GetLocalOmgContext().dynamic_node_type.empty()) { + if (!AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, GetLocalOmgContext().dynamic_dims)) { + GELOGE(INTERNAL_ERROR, "Failed to set all gears info attr on netoutput %s.", output_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + if (getnext_sink_dynamic_dims_) { + GELOGD("Start link %s to %s.", shape_node_->GetName().c_str(), output_node->GetName().c_str()); + size_t input_index = output_node->GetAllInDataAnchors().size(); + if (NodeUtils::AppendInputAnchor(output_node, input_index + 1) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Append input anchor of %s of %zu failed.", output_node->GetName().c_str(), input_index); + return INTERNAL_ERROR; + } + auto ret = GraphUtils::AddEdge(shape_node_->GetOutDataAnchor(kDataOutIndex), + output_node->GetInDataAnchor(input_index)); + GE_IF_BOOL_EXEC(ret != GRAPH_SUCCESS, GELOGE(INTERNAL_ERROR, "Failed to link netoutput %s to getdynamicdims %s", + output_node->GetName().c_str(), shape_node_->GetName().c_str()); + return INTERNAL_ERROR); + if (!AttrUtils::SetBool(output_node->GetOpDesc(), ATTR_GETNEXT_SINK_DYNMAIC, true)) { + GELOGE(INTERNAL_ERROR, "Failed to set getnext sink dynamic attr on netoutput %s.", + output_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + return SUCCESS; +} + /// /// @ingroup ge /// @brief Create input node for root graph. @@ -337,8 +539,10 @@ Status MultiBatchClonePass::CreateIndexNode(const ComputeGraphPtr &graph) { Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { // Data --> Case std::vector all_data_nodes; - const size_t arg_index = kCaseArgIndex; - for (size_t i = 0; i < all_data_nodes_.size(); ++i) { + size_t case_input_index = kCaseArgIndex; + NodePtr getnext_node = nullptr; + size_t input_index_of_getnext = 0; + for (size_t i = 0; i < all_data_nodes_.size(); ++i, ++case_input_index) { const auto &node = all_data_nodes_[i]; const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); if (op_desc == nullptr) { @@ -353,22 +557,60 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { op_desc->SetName(node->GetName()); const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); - if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", - data->GetName().c_str(), case_node_->GetName().c_str()); - return FAILED; + if (IsGetNextType(node)) { + getnext_node = data; + input_index_of_getnext = case_input_index; + case_input_index = case_input_index + data_count_from_getnext_; + continue; + } else { + if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(case_input_index)) != + GRAPH_SUCCESS) { + GELOGE(FAILED, "Failed to add edge between Data:%s to Case:%s", data->GetName().c_str(), + case_node_->GetName().c_str()); + return FAILED; + } } - if (SetMaxShapeToData(data) != SUCCESS) { + if (SetMaxShape(data) != SUCCESS) { + GELOGE(FAILED, "Set max shape of %s failed.", data->GetName().c_str()); return FAILED; } all_data_nodes.emplace_back(data); } + if (getnext_node != nullptr) { + if (LinkEdgeForGetNext(getnext_node, input_index_of_getnext) != SUCCESS) { + GELOGE(FAILED, "Failed to link edge for %s.", getnext_node->GetName().c_str()); + return FAILED; + } + if (SetMaxShape(getnext_node) != SUCCESS) { + GELOGE(FAILED, "Set max shape of %s failed.", getnext_node->GetName().c_str()); + return FAILED; + } + all_data_nodes.emplace_back(getnext_node); + } all_data_nodes_.swap(all_data_nodes); return SUCCESS; } +Status MultiBatchClonePass::LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index) { + GELOGD("Start link edge for %s, which is the %zu input of %s.", getnext_node->GetName().c_str(), + case_input_index, case_node_->GetName().c_str()); + for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++case_input_index) { + if (GraphUtils::AddEdge(getnext_node->GetOutDataAnchor(out_index), + case_node_->GetInDataAnchor(case_input_index)) != GRAPH_SUCCESS) { + GELOGE(FAILED, "Failed to add data edge between %zu Data:%s to %zu Case:%s", out_index, + getnext_node->GetName().c_str(), case_input_index, case_node_->GetName().c_str()); + return FAILED; + } + } + if (getnext_sink_dynamic_dims_) { + GE_CHK_STATUS_RET(LinkGetNextToGetDynamicDims(getnext_node, shape_node_), "Failed to add link for %s.", + shape_node_->GetName().c_str()); + } + return SUCCESS; +} + /// /// @ingroup ge /// @brief Create Const node for root graph. @@ -378,7 +620,11 @@ Status MultiBatchClonePass::CreateInputNode(const ComputeGraphPtr &graph) { Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { // Const --> Case std::vector all_const_nodes; - const size_t arg_index = kCaseArgIndex + all_data_nodes_.size(); + size_t arg_index = kCaseArgIndex + all_data_nodes_.size(); + if (data_count_from_getnext_ != 0) { + arg_index = arg_index + data_count_from_getnext_ - kNumOfGetnextNode; + } + for (size_t i = 0; i < all_const_nodes_.size(); ++i) { const auto &node = all_const_nodes_[i]; const OpDescPtr op_desc = AttrUtils::CopyOpDesc(node->GetOpDesc()); @@ -395,15 +641,33 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { const NodePtr &data = graph->AddNode(op_desc); GE_CHK_BOOL_EXEC(data != nullptr, return FAILED, "Add node[%s] to graph failed", op_desc->GetName().c_str()); if (GraphUtils::AddEdge(data->GetOutDataAnchor(0), case_node_->GetInDataAnchor(arg_index + i)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", - data->GetName().c_str(), case_node_->GetName().c_str()); + GELOGE(FAILED, "Failed to add edge between Const:%s to Case:%s", data->GetName().c_str(), + case_node_->GetName().c_str()); return FAILED; } all_const_nodes.emplace_back(data); } + ChangeConstToData(); + all_const_nodes_.swap(all_const_nodes); + return SUCCESS; +} +void MultiBatchClonePass::ChangeConstToData() { size_t data_index = all_data_nodes_.size(); + if (data_count_from_getnext_ != 0) { + data_index = data_index + data_count_from_getnext_ - kNumOfGetnextNode; + } for (size_t i = 0; i < all_const_nodes_.size(); ++i, ++data_index) { // Trans subgraph Const to Data. + auto &const_node = all_const_nodes_[i]; + bool need_change_type = true; + if (out_control_nodes_.find(const_node) != out_control_nodes_.end()) { + GELOGD("No need to change %s to data type.", const_node->GetName().c_str()); + need_change_type = false; + break; + } + if (!need_change_type) { + continue; + } const OpDescPtr &op_desc = all_const_nodes_[i]->GetOpDesc(); op_desc->SetType(DATA); (void)op_desc->DelAttr(ATTR_NAME_WEIGHTS); // Delete weight. @@ -413,9 +677,6 @@ Status MultiBatchClonePass::CreateConstNode(const ComputeGraphPtr &graph) { (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); (void)NodeUtils::AppendInputAnchor(all_const_nodes_[i], 1); } - - all_const_nodes_.swap(all_const_nodes); - return SUCCESS; } /// @@ -461,7 +722,8 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { } } } - + GE_CHK_STATUS_RET(LinkGetDynamicDimsToNetOutput(node), "Failed to add edge between %s to netoutput: %s.", + shape_node_->GetName().c_str(), output->GetName().c_str()); all_output_nodes_.clear(); all_output_nodes_.emplace_back(node); return SUCCESS; @@ -473,34 +735,69 @@ Status MultiBatchClonePass::CreateOutputNode(const ComputeGraphPtr &graph) { /// @param [in] const NodePtr &data: data in Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { - auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); - auto data_name = data->GetName(); +Status MultiBatchClonePass::SetMaxShape(const NodePtr &data) { + GELOGD("Start set max shape for %s.", data->GetName().c_str()); + if (!IsGetNextType(data)) { + if (SetMaxShapeToData(data, kDataOutIndex) != SUCCESS) { + GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str()); + return PARAM_INVALID; + } + } else { + for (size_t out_anchor_index = 0; out_anchor_index < data_count_from_getnext_; ++out_anchor_index) { + if (SetMaxShapeToData(data, out_anchor_index) != SUCCESS) { + GELOGE(PARAM_INVALID, "Failed to update max shape of %s.", data->GetName().c_str()); + return PARAM_INVALID; + } + } + } + return SUCCESS; +} + +Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index) { + GELOGD("Start update max shape of %s, %zu output.", node->GetName().c_str(), out_anchor_index); + auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape(); + string data_name = node->GetName(); + if (IsGetNextType(node)) { + data_name.append("_").append(std::to_string(out_anchor_index)); + } + GELOGD("Update max shape of %s, shape dims is %s.", data_name.c_str(), + formats::JoinToString(data_shape.GetDims()).c_str()); const auto &dims = data_shape.GetDims(); - if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { - return SUCCESS; + if (!IsGetNextType(node)) { + if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { + GELOGD("No need to do anything for static data."); + return SUCCESS; + } + } else { + if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { + if (getnext_sink_dynamic_dims_) { + // need to update shape of Shape_node when getnext node has dynamic data + GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(node, out_anchor_index), "Failed to update shape of shape node"); + } + return SUCCESS; + } } - (void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); + (void)AttrUtils::SetListInt(node->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims()); - GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex)); + GeTensorDesc tensor(NodeUtils::GetOutputDesc(*node, kDataOutIndex)); std::vector input_dims_str; for (size_t i = 0; i < batch_shapes_.size(); ++i) { auto shape = data_shape; auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape); if (ret != SUCCESS) { - GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str()); + GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", node->GetName().c_str()); return ret; } tensor.SetShape(shape); int64_t tensor_size = 0; (void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size); string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" + - TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" + + TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + node->GetName() + ":" + std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" + formats::JoinToString(tensor.GetShape().GetDims()); input_dims_str.emplace_back(input_str); } - (void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); + (void)AttrUtils::SetListStr(node->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str); size_t max_shape_index = 0; int64_t max_size = 0; @@ -519,18 +816,72 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) { max_shape_index = i; } } + return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), node, data_shape, out_anchor_index); +} - return SetShapeToData(data_to_dynamic_info_.at(data_name).at(max_shape_index), data, data_shape); +/// +/// @ingroup ge +/// @brief Set max shape to Data/GetNext node in root graph. +/// @param [in] const std::vector &shapes: dims of shape. +/// @param [in] const NodePtr &data: data in Root/Case graph. +/// @param [in] GeShape &data_shape: dims of data node. +/// @param [in] size_t out_anchor_index: out anchor index of data node. +/// @return 0: SUCCESS / others: FAILED +/// +Status MultiBatchClonePass::SetShapeToData(const std::vector &shapes, const NodePtr &data, GeShape &data_shape, + size_t out_anchor_index) { + GELOGD("Start set shape to %zu out of %s.", out_anchor_index, data->GetName().c_str()); + if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to calculate the batched shape for data node %s, the shapes may not match", + data->GetName().c_str()); + return INTERNAL_ERROR; + } + + if (NodeUtils::UpdateOutputShape(*data, out_anchor_index, data_shape) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str()); + return INTERNAL_ERROR; + } + if (!IsGetNextType(data)) { + if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str()); + return INTERNAL_ERROR; + } + } else { + if (getnext_sink_dynamic_dims_) { + // need to update shape of Shape_node when getnext_sink_dynamic + GE_CHK_STATUS_RET(UpdateShapeOfShapeNode(data, out_anchor_index), "Failed to update shape of shape node"); + } + } + + GELOGI("Update the data %s input/output shape to the max %s", data->GetName().c_str(), + formats::ShapeToString(data_shape).c_str()); + return SUCCESS; +} + +Status MultiBatchClonePass::UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index) { + GELOGD("Start update output shape of shape node insert by adapter, which is the %zu out of %s.", out_anchor_index, + node->GetName().c_str()); + auto data_shape = NodeUtils::GetOutputDesc(*node, out_anchor_index).GetShape(); + size_t shape_index = out_anchor_index + (node->GetAllOutDataAnchors().size() / kDivisionConst); + GeTensorDesc output_desc = node->GetOpDesc()->GetOutputDesc(shape_index); + std::vector output_dims = {static_cast(data_shape.GetDims().size())}; + GeShape output_shape(output_dims); + output_desc.SetShape(output_shape); + if (node->GetOpDesc()->UpdateOutputDesc(shape_index, output_desc) != SUCCESS) { + GELOGE(FAILED, "Update output desc fail."); + return FAILED; + } + return SUCCESS; } /// /// @ingroup ge /// @brief Update Data node in Subgraph. /// @param [in] const NodePtr &data: data in Subgraph. -/// @param [in] size_t index: The batch index. +/// @param [in] size_t batch_index: The batch index. /// @return 0: SUCCESS / others: FAILED /// -Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) { +Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t batch_index) { int node_index = -1; if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) { GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str()); @@ -545,6 +896,8 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape(); const auto &dims = data_shape.GetDims(); + GELOGD("Start update shape of %s , batch index is %zu, dims is %s.", data->GetName().c_str(), batch_index, + formats::JoinToString(dims).c_str()); if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) { return SUCCESS; } @@ -559,35 +912,77 @@ Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index } auto parent_name = data_name.substr(0, pos); - return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(index), data, data_shape); + return SetShapeToData(data_to_dynamic_info_.at(parent_name).at(batch_index), data, data_shape, kDataOutIndex); } -/// -/// @ingroup ge -/// @brief Set max shape to Data node in root graph. -/// @param [in] const std::vector &shapes: dims of shape. -/// @param [in] const NodePtr &data: data in Root/Case graph. -/// @param [in] GeShape &data_shape: dims of data node. -/// @return 0: SUCCESS / others: FAILED -/// -Status MultiBatchClonePass::SetShapeToData(const vector &shapes, const NodePtr &data, GeShape &data_shape) { - // must not be error, the calc result has been checked in function InsertSwitchNForData - if (multibatch::CalcShape(shapes, data_shape) != SUCCESS) { - return INTERNAL_ERROR; +Status MultiBatchClonePass::CreateOriGraph(const ComputeGraphPtr &graph) { + if (data_count_from_getnext_ == 0) { + GELOGD("No need to change original graph without getnext node."); + return SUCCESS; } - - if (NodeUtils::UpdateInputShape(*data, kDataInIndex, data_shape) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update input shape for data %s", data->GetName().c_str()); - return INTERNAL_ERROR; + GELOGD("Start change original graph: %s when exit getnext node.", graph->GetName().c_str()); + size_t data_index = all_data_nodes_.size() - kNumOfGetnextNode; + for (const auto &node : graph->GetDirectNode()) { + if (IsGetNextType(node)) { + for (size_t out_index = 0; out_index < data_count_from_getnext_; ++out_index, ++data_index) { + auto out_data_anchor = node->GetOutDataAnchor(out_index); + GE_IF_BOOL_EXEC(out_data_anchor == nullptr, continue); + NodePtr data_node = CreateDataNode(graph, out_data_anchor, data_index); + GE_IF_BOOL_EXEC(data_node == nullptr, GELOGE(INTERNAL_ERROR, "Create %zu data node failed.", + out_data_anchor->GetIdx()); return INTERNAL_ERROR); + for (auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { + GE_IF_BOOL_EXEC(in_anchor == nullptr, continue); + NodePtr dst_node = in_anchor->GetOwnerNode(); + if (GraphUtils::RemoveEdge(out_data_anchor, in_anchor) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to remove edge between %s to %s", node->GetName().c_str(), + dst_node->GetName().c_str()); + return INTERNAL_ERROR; + } + if (GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), dst_node->GetInDataAnchor(in_anchor->GetIdx())) != + GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Failed to add edge between %s to %s", data_node->GetName().c_str(), + dst_node->GetName().c_str()); + return INTERNAL_ERROR; + } + } + } + if (graph->RemoveNode(node) != GRAPH_SUCCESS) { + GELOGE(GRAPH_FAILED, "Remove node %s failed!", node->GetName().c_str()); + return GRAPH_FAILED; + } + break; + } } + return SUCCESS; +} - if (NodeUtils::UpdateOutputShape(*data, kDataOutIndex, data_shape) != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update output shape for data %s", data->GetName().c_str()); - return INTERNAL_ERROR; +NodePtr MultiBatchClonePass::CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, + size_t data_index) { + size_t out_anchor_index = out_data_anchor->GetIdx(); + std::string node_name = out_data_anchor->GetOwnerNode()->GetName() + "_" + std::to_string(out_anchor_index); + OpDescPtr op_desc = MakeShared(node_name, DATA); + if (op_desc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Create data node failed."); + return nullptr; } + (void)AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index); - GELOGI("Update %s input/output shape to %s", data->GetName().c_str(), formats::ShapeToString(data_shape).c_str()); - return SUCCESS; + OpDescPtr getnext_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); + if (getnext_op_desc == nullptr) { + GELOGE(OUT_OF_MEMORY, "Op desc of %s is nullptr.", out_data_anchor->GetOwnerNode()->GetName().c_str()); + return nullptr; + } + if (op_desc->AddInputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add %s input desc failed.", op_desc->GetName().c_str()); + return nullptr; + } + if (op_desc->AddOutputDesc(getnext_op_desc->GetOutputDesc(out_anchor_index)) != GRAPH_SUCCESS) { + GELOGE(INTERNAL_ERROR, "Add %s output desc failed.", op_desc->GetName().c_str()); + return nullptr; + } + NodePtr data_node = graph->AddNode(op_desc); + GELOGD("Success create %s node.", data_node->GetName().c_str()); + return data_node; } /// @@ -598,17 +993,14 @@ Status MultiBatchClonePass::SetShapeToData(const vector &shapes, const /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const ComputeGraphPtr &branch) { + GELOGD("Start create subgraphs for %s.", graph->GetName().c_str()); const auto &op_desc = case_node_->GetOpDesc(); for (size_t i = 0; i < batch_shapes_.size(); ++i) { std::vector input_nodes; std::vector output_nodes; const std::string postfix = kMultiBatchNodePostfix + std::to_string(i); ComputeGraphPtr subgraph = (i == 0) ? branch : GraphUtils::CloneGraph(branch, postfix, input_nodes, output_nodes); - if (subgraph == nullptr) { - GELOGE(FAILED, "Create multi-batch case node failed"); - return FAILED; - } - + GE_IF_BOOL_EXEC(subgraph == nullptr, GELOGE(FAILED, "Create multi-batch case node failed"); return FAILED); subgraph->SetName("Batch_" + std::to_string(i)); subgraph->SetParentNode(case_node_); subgraph->SetParentGraph(graph); @@ -621,6 +1013,7 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const op_desc->AddSubgraphName(key_name); op_desc->SetSubgraphInstanceName(i, subgraph->GetName()); + GELOGD("The %s has %zu input, %zu output.", subgraph->GetName().c_str(), input_nodes.size(), output_nodes.size()); for (const auto &data : input_nodes) { GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str()); } @@ -666,6 +1059,7 @@ Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) { /// @return 0: SUCCESS / others: FAILED /// Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) { + GELOGD("Start prune direct output."); const auto &func_desc = case_node_->GetOpDesc(); uint32_t unused_num = 0; uint32_t output_num = func_desc->GetOutputsSize(); @@ -710,6 +1104,7 @@ Status MultiBatchClonePass::PruneDirectOutput(const ComputeGraphPtr &graph) { /// Status MultiBatchClonePass::UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num) { if (unused_num == 0) { + GELOGD("No need to update output tensor."); return SUCCESS; } diff --git a/ge/graph/passes/multi_batch_clone_pass.h b/ge/graph/passes/multi_batch_clone_pass.h index ee137b5a..66e92892 100755 --- a/ge/graph/passes/multi_batch_clone_pass.h +++ b/ge/graph/passes/multi_batch_clone_pass.h @@ -36,6 +36,7 @@ class MultiBatchClonePass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status CollectIoNodes(const ComputeGraphPtr &graph); + Status InitParamsOfGetNext(const NodePtr &node); /// /// @ingroup ge @@ -49,10 +50,12 @@ class MultiBatchClonePass : public GraphPass { /// @ingroup ge /// @brief Create index data node for root graph. /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. - /// @param [in] NodePtr node: index data node. + /// @param [in] NodePtr shape_node: index data node, DATA or GETDYNAMICDIMS type. /// @return 0: SUCCESS / others: FAILED /// - Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &node); + Status CreateIndexDataNode(const ComputeGraphPtr &graph, NodePtr &shape_node); + + Status CreateGetDynamicDimsNode(const ComputeGraphPtr &graph, NodePtr &shape_node); /// /// @ingroup ge @@ -70,6 +73,9 @@ class MultiBatchClonePass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status CreateIndexNode(const ComputeGraphPtr &graph); + Status AddAttrForGetDynamicDims(const NodePtr &shape_node); + Status LinkGetNextToGetDynamicDims(const NodePtr &getnext_node, const NodePtr &shape_node); + Status LinkGetDynamicDimsToNetOutput(const NodePtr &output_node); /// /// @ingroup ge @@ -78,39 +84,54 @@ class MultiBatchClonePass : public GraphPass { /// @return 0: SUCCESS / others: FAILED /// Status CreateInputNode(const ComputeGraphPtr &graph); + Status LinkEdgeForGetNext(const NodePtr &getnext_node, size_t &case_input_index); /// /// @ingroup ge - /// @brief Create Const node for root graph. - /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. + /// @brief Set max shape to Data node in root graph. + /// @param [in] const NodePtr &data: data in Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// - Status CreateConstNode(const ComputeGraphPtr &graph); + Status SetMaxShape(const NodePtr &data); + Status SetMaxShapeToData(const NodePtr &node, size_t out_anchor_index); + /// + /// @ingroup ge + /// @brief Set max shape to Data/GetNext node in root graph. + /// @param [in] const std::vector &shapes: dims of shape. + /// @param [in] const NodePtr &data: data in Root/Case graph. + /// @param [in] GeShape &data_shape: dims of data node. + /// @param [in] size_t out_anchor_index: out anchor index of data node. + /// @return 0: SUCCESS / others: FAILED + /// + Status SetShapeToData(const std::vector &shapes, const NodePtr &data, GeShape &data_shape, + size_t out_anchor_index); + Status UpdateShapeOfShapeNode(const NodePtr &node, size_t out_anchor_index); /// /// @ingroup ge - /// @brief Create output node for root graph. + /// @brief Create Const node for root graph. /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// - Status CreateOutputNode(const ComputeGraphPtr &graph); + Status CreateConstNode(const ComputeGraphPtr &graph); + void ChangeConstToData(); /// /// @ingroup ge - /// @brief Set max shape to Data node in root graph. - /// @param [in] const NodePtr &data: data in Root/Case graph. + /// @brief Create output node for root graph. + /// @param [in] const ComputeGraphPtr &graph: Root/Case graph. /// @return 0: SUCCESS / others: FAILED /// - Status SetMaxShapeToData(const NodePtr &data); + Status CreateOutputNode(const ComputeGraphPtr &graph); /// /// @ingroup ge /// @brief Update Data node in Subgraph. /// @param [in] const NodePtr &data: data in Subgraph. - /// @param [in] size_t index: The batch index. + /// @param [in] size_t batch_index: The batch index. /// @return 0: SUCCESS / others: FAILED /// - Status UpdateSubgraphData(const NodePtr &data, size_t index); + Status UpdateSubgraphData(const NodePtr &data, size_t batch_index); /// /// @ingroup ge @@ -122,13 +143,12 @@ class MultiBatchClonePass : public GraphPass { /// /// @ingroup ge - /// @brief Set max shape to Data node in root graph. - /// @param [in] const std::vector &shapes: dims of shape. - /// @param [in] const NodePtr &data: data in Root/Case graph. - /// @param [in] GeShape &data_shape: dims of data node. + /// @brief Create nodes for root graph. + /// @param [in] const ComputeGraphPtr &graph: Original graph. /// @return 0: SUCCESS / others: FAILED /// - Status SetShapeToData(const std::vector &shapes, const NodePtr &data, GeShape &data_shape); + Status CreateOriGraph(const ComputeGraphPtr &graph); + NodePtr CreateDataNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, size_t data_index); /// /// @ingroup ge @@ -168,6 +188,10 @@ class MultiBatchClonePass : public GraphPass { std::map>> data_to_dynamic_info_; NodePtr case_node_; + size_t data_count_from_getnext_ = 0; + bool getnext_sink_dynamic_dims_ = false; + NodePtr shape_node_; + std::set out_control_nodes_; }; } // namespace ge #endif // GE_GRAPH_PASSES_MULTI_BATCH_CLONE_PASS_H_ diff --git a/ge/graph/passes/unused_args_clean_pass.cc b/ge/graph/passes/unused_args_clean_pass.cc index 83fd0438..ec66b129 100755 --- a/ge/graph/passes/unused_args_clean_pass.cc +++ b/ge/graph/passes/unused_args_clean_pass.cc @@ -204,6 +204,10 @@ Status UnusedArgsCleanPass::RemoveInputTensor(const mapGetName().c_str(), func_node->GetName().c_str()); + if (out_node->GetInDataNodes().size() == 0 && out_node->GetOutAllNodes().size() == 0) { + GE_CHK_GRAPH_STATUS_RET(out_node->GetOwnerComputeGraph()->RemoveNode(out_node), "Remove node failed: %s", + out_node->GetName().c_str()); + } return SUCCESS; } } // namespace ge \ No newline at end of file diff --git a/ge/graph/preprocess/multi_batch_copy_graph.cc b/ge/graph/preprocess/multi_batch_copy_graph.cc index c8880b2e..5506435e 100644 --- a/ge/graph/preprocess/multi_batch_copy_graph.cc +++ b/ge/graph/preprocess/multi_batch_copy_graph.cc @@ -1692,13 +1692,11 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) { } Status ProcessMultiBatch(ComputeGraphPtr &graph) { - if (GetLocalOmgContext().dynamic_node_type.empty()) { - const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); - if (multi_batch_with_switchn == nullptr) { - PassManager pass_manager; - GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); - return pass_manager.Run(graph); - } + const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); + if (multi_batch_with_switchn == nullptr) { + PassManager pass_manager; + GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); + return pass_manager.Run(graph); } if (!GetLocalOmgContext().need_multi_batch) { GELOGI("No need to process_multi for no_train graph."); diff --git a/ge/graph/preprocess/multi_batch_options.cc b/ge/graph/preprocess/multi_batch_options.cc index c26b08bc..aba2b88d 100644 --- a/ge/graph/preprocess/multi_batch_options.cc +++ b/ge/graph/preprocess/multi_batch_options.cc @@ -99,9 +99,8 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector &data_n } GELOGI("Data count is %zu, getnext nosink count is %zu, getnext sink count is %zu.", data_nodes.size(), getnext_nosink_nodes.size(), getnext_sink_nodes.size()); - GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrDataNodes, data_nodes), GELOGW("Set data nodes attr failed.");) - GE_IF_BOOL_EXEC(!graph->SetExtAttr(kExtAttrGetNextNoSink, getnext_nosink_nodes), - GELOGW("Set getnext nosink nodes attr failed.");) + GetLocalOmgContext().data_nodes = data_nodes; + GetLocalOmgContext().getnext_nosink_nodes = getnext_nosink_nodes; return SUCCESS; } diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index dab79053..1049b6b5 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -26,6 +26,7 @@ #include #include "framework/common/fmk_error_codes.h" #include "register/register_fmk_types.h" +#include "graph/node.h" using domi::DOMI_TENSOR_ND; using domi::DOMI_TENSOR_RESERVED; @@ -120,6 +121,8 @@ struct OmgContext { std::vector> user_real_input_dims; std::vector cur_dynamic_dims; bool need_multi_batch = false; + std::vector data_nodes; + std::vector getnext_nosink_nodes; }; } // namespace ge diff --git a/metadef b/metadef index 44bcbb5e..fe37bc34 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 44bcbb5ea25ada1a5393aa4c7f554d40b6859b18 +Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900 diff --git a/parser b/parser index 5b93b050..336cd310 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 5b93b050dd7ca5b77c3001a790031d877fa10956 +Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index dcf389c0..db725dfb 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -627,6 +627,7 @@ set(PASS_TEST_FILES "graph/passes/net_output_pass_unittest.cc" "graph/passes/no_use_reshape_remove_pass_unittest.cc" "graph/passes/infershape_pass_unittest.cc" + "graph/passes/multi_batch_clone_pass_unittest.cc" ) set(KERNEL_TEST_FILES diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index a9efab3d..9e51585b 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -32,6 +32,18 @@ class UtestDavinciModel : public testing::Test { void SetUp() {} void TearDown() {} + public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } }; TEST_F(UtestDavinciModel, init_success) { @@ -324,5 +336,94 @@ TEST_F(UtestDavinciModel, SyncVarData_test) { EXPECT_NE(model.SyncVarData(), SUCCESS); } +TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ1) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + OpDescPtr op_output = CreateOpDesc("output_ascend_mbatch_batch_1", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({1024}); + NodePtr node_output = graph->AddNode(op_output); + EXPECT_EQ(model.InitRealSizeAndShapeInfo(graph, node_output), SUCCESS); +} + +TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ2) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = std::make_shared("test_graph"); + + OpDescPtr data1 = CreateOpDesc("data1", DATA); + GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->AddInputDesc(shape_desc); + data1->AddOutputDesc(shape_desc); + NodePtr data1_node = graph->AddNode(data1); + + OpDescPtr case_node = CreateOpDesc("case1", CASE); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + case_node->AddInputDesc(tensor); + case_node->AddOutputDesc(tensor); + NodePtr case1_node = graph->AddNode(case_node); + + OpDescPtr output = CreateOpDesc("output1", NETOUTPUT); + output->AddInputDesc(tensor); + output->SetSrcName( { "case1" } ); + output->SetSrcIndex( { 0 } ); + NodePtr output_node = graph->AddNode(output); + + GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), case1_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(case1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + + (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1;2;4;8"); + (void)AttrUtils::SetBool(case_node, ATTR_INSERT_BY_MBATCH, true); + + model.is_getnext_sink_dynamic_ = false; + model.is_online_infer_dynamic_ = true; + auto ret = model.InitRealSizeAndShapeInfo(graph, output_node); + // GetGearAndRealOutShapeInfo without ATTR_NAME_DYNAMIC_OUTPUT_DIMS + EXPECT_EQ(ret, SUCCESS); + vector dynamic_output_dims = {"0,0,1,1,0,2,2,0,4,3,0,8"}; + (void)AttrUtils::SetListStr(output_node->GetOpDesc(), ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims); + ret = model.InitRealSizeAndShapeInfo(graph, output_node); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ3) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = std::make_shared("test_graph"); + + OpDescPtr data1 = CreateOpDesc("data1", DATA); + GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->AddInputDesc(shape_desc); + data1->AddOutputDesc(shape_desc); + NodePtr data1_node = graph->AddNode(data1); + + OpDescPtr shape_node = CreateOpDesc("ascend_mbatch_get_dynamic_dims_node", GETDYNAMICDIMS); + GeTensorDesc in_tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + GeTensorDesc out_tensor(GeShape({4,3}), FORMAT_NCHW, DT_FLOAT); + shape_node->AddInputDesc(in_tensor); + shape_node->AddOutputDesc(out_tensor); + NodePtr get_dynamic_dims_node = graph->AddNode(shape_node); + + OpDescPtr output = CreateOpDesc("output1", NETOUTPUT); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + output->AddInputDesc(tensor); + output->SetSrcName( { "data1", "ascend_mbatch_get_dynamic_dims_node" } ); + output->SetSrcIndex( { 0, 1 } ); + NodePtr output_node = graph->AddNode(output); + GraphUtils::AddEdge(data1_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(get_dynamic_dims_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(1)); + + (void)AttrUtils::SetStr(output_node->GetOpDesc(), ATTR_ALL_GEARS_INFO, "1,3;;4,3;,3"); + + model.is_getnext_sink_dynamic_ = true; + model.is_online_infer_dynamic_ = false; + auto ret = model.InitRealSizeAndShapeInfo(graph, output_node); + EXPECT_EQ(ret, SUCCESS); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 4; + ret = model.InitRealSizeAndShapeInfo(graph, output_node); + EXPECT_EQ(ret, SUCCESS); +} } // namespace ge diff --git a/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc new file mode 100644 index 00000000..b1cd6d4d --- /dev/null +++ b/tests/ut/ge/graph/passes/multi_batch_clone_pass_unittest.cc @@ -0,0 +1,247 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/multi_batch_clone_pass.h" + +#include +#include +#include + +#include "inc/pass_manager.h" +#include "graph/utils/tensor_utils.h" +#include "graph/common/local_context.h" +#include "graph/passes/multi_batch_pass.h" +#include "graph/preprocess/multi_batch_copy_graph.h" +#include "graph/preprocess/insert_op/util_insert_aipp_op.h" +#include "framework/omg/omg_inner_types.h" +#include "register/op_registry.h" + + +namespace ge{ +class UtestMultiBatchClonePass : public testing::Test { +protected: + void SetUp() { + SetLocalOmgContext(domi::GetContext()); + GetLocalOmgContext().dynamic_image_size.clear(); + GetLocalOmgContext().dynamic_batch_size.clear(); + } + void TearDown() { + GetLocalOmgContext().dynamic_image_size.clear(); + GetLocalOmgContext().dynamic_batch_size.clear(); + GetLocalOmgContext().dynamic_node_type.clear(); + } + +public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } + + NodePtr MakeConstNode(const ComputeGraphPtr &graph) { + static uint32_t index = 0; + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared("dynamic_const_" + std::to_string(index++), "Const"); + op_desc->AddOutputDesc(test_desc); + return graph->AddNode(op_desc); + } + + void make_original_graph(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 1, "data", "Data"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto bn_conv1 = MakeNode(graph, 4, 1, "bn_conv1", "BNInference"); + { + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(2)); + auto const3= MakeConstNode(graph); + GraphUtils::AddEdge(const3->GetOutDataAnchor(0), bn_conv1->GetInDataAnchor(3)); + } + + auto scale_conv1 = MakeNode(graph, 4, 1, "scale1", "Scale"); + { + GraphUtils::AddEdge(bn_conv1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), scale_conv1->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(scale_conv1->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } + + void GraphWithJustData(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 1, "data", "Data"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } + + void GraphWithGetNextNosink(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 1, "IteratorGetNext_data", "Data"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } + + // getnext has one data and has one out of shape + void GraphWithGetNextSink(const ComputeGraphPtr &graph) { + auto conv2d_node = MakeNode(graph, 3, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 2, "data", "IteratorV2"); + GeTensorDesc tensor_desc(GeShape({-1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + GeTensorDesc shape_desc(GeShape({4,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(1, shape_desc); + AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_INDEX, 0); + GetLocalOmgContext().user_input_dims = {std::make_pair(data1->GetOpDesc()->GetName(), vector{-1,3,224,224})}; + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + auto identity = MakeNode(graph, 1, 0, "identity", "Identity"); + GraphUtils::AddEdge(data1->GetOutDataAnchor(1), identity->GetInDataAnchor(0)); + auto const1 = MakeConstNode(graph); + GraphUtils::AddEdge(const1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + auto const2 = MakeConstNode(graph); + GraphUtils::AddEdge(const2->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(2)); + } + + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } +}; + +// graph is nullptr +TEST_F(UtestMultiBatchClonePass, graph_nullptr) { + PassManager pass_manager; + pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass); + ComputeGraphPtr graph; + EXPECT_EQ(pass_manager.Run(graph), PARAM_INVALID); +} + +// graph with subgraph +TEST_F(UtestMultiBatchClonePass, graph_with_subgraph) { + PassManager pass_manager; + pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass); + ComputeGraphPtr graph = std::make_shared("test_graph"); + make_original_graph(graph); + EXPECT_EQ(pass_manager.Run(graph), SUCCESS); + + ComputeGraphPtr owner = std::make_shared("test_owner"); + auto func_node = MakeNode(owner, 3, 1, "test_if", "If"); + graph->SetParentNode(func_node); + graph->SetParentGraph(owner); + EXPECT_EQ(pass_manager.Run(graph), SUCCESS); +} + +//graph is uncompute graph, not need to do multi batch +TEST_F(UtestMultiBatchClonePass, uncompute_graph) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + make_original_graph(graph); + GetLocalOmgContext().need_multi_batch = false; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); +} + + +//compute_graph with data from DATA +TEST_F(UtestMultiBatchClonePass, compute_graph_with_data) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + GraphWithJustData(graph); + GetLocalOmgContext().need_multi_batch = true; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + GetLocalOmgContext().dynamic_node_type = DATA; + GetLocalOmgContext().dynamic_dims = "1;2;4;8"; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + EXPECT_EQ(GetLocalOmgContext().data_nodes.size(), 1); +} + +//compute_graph with data from GetNext_nosink +TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_nosink) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + GraphWithGetNextNosink(graph); + GetLocalOmgContext().need_multi_batch = true; + GetLocalOmgContext().dynamic_node_type = GETNEXT; + GetLocalOmgContext().dynamic_dims = "1;2;4;8"; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 1); +} + +//compute_graph with data from GetNext_nosink +TEST_F(UtestMultiBatchClonePass, compute_graph_with_getnext_sink) { + MultiBatchClonePass multi_batch_clone; + ComputeGraphPtr graph = std::make_shared("test_graph"); + GraphWithGetNextSink(graph); + GetLocalOmgContext().need_multi_batch = true; + GetLocalOmgContext().dynamic_node_type = GETNEXT; + GetLocalOmgContext().dynamic_dims = "1;2;4;8"; + EXPECT_EQ(multi_batch_clone.Run(graph), SUCCESS); + EXPECT_EQ(GetLocalOmgContext().getnext_nosink_nodes.size(), 0); +} + +} From 6ce14620cc1d1a4649aeb23a9d169a9789ccbcff Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Mon, 4 Jan 2021 21:15:19 +0800 Subject: [PATCH 35/54] Eliminate data_op_list_ --- ge/executor/ge_executor.cc | 39 +- ge/graph/execute/graph_execute.cc | 28 +- ge/graph/execute/graph_execute.h | 6 +- .../load/new_model_manager/davinci_model.cc | 321 ++++++++-------- .../load/new_model_manager/davinci_model.h | 55 ++- .../new_model_manager/davinci_model_parser.cc | 75 ---- .../load/new_model_manager/model_manager.cc | 14 +- .../load/new_model_manager/model_manager.h | 16 +- inc/framework/executor/ge_executor.h | 3 - .../ge/graph/load/davinci_model_unittest.cc | 343 +++++++++++++++++- .../graph/load/kernel_task_info_unittest.cc | 1 - 11 files changed, 539 insertions(+), 362 deletions(-) diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index abdc0c3f..0ea0e66d 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -676,7 +676,7 @@ Status GeExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); return ACL_ERROR_GE_EXEC_NOT_INIT; } - Status ret = GraphExecutor::GetAIPPInfo(model_id, index, aipp_info); + Status ret = GraphExecutor::GetAippInfo(model_id, index, aipp_info); if (ret != SUCCESS) { GELOGW("GetAIPPInfo is not success."); return ret; @@ -713,43 +713,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector &dyn return SUCCESS; } -Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, - std::vector &output_desc) { - GELOGI("get model desc info for zero copy begin."); - if (!isInit_) { - GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); - return ACL_ERROR_GE_EXEC_NOT_INIT; - } - - std::vector input_desc_infos; - std::vector output_desc_infos; - std::vector input_formats; - std::vector output_formats; - - Status ret = GraphExecutor::GetInputOutputDescInfoForZeroCopy(model_id, input_desc_infos, output_desc_infos, - input_formats, output_formats); - if (ret != domi::SUCCESS) { - GELOGE(ret, "Get DescInfo from zero copy failed. ret = %u", ret); - return ACL_ERROR_GE_GET_TENSOR_INFO; - } - - if (input_formats.size() != input_desc_infos.size()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "input_formats.size() != input_desc_infos.size()."); - return ACL_ERROR_GE_PARAM_INVALID; - } - - if (output_formats.size() != output_desc_infos.size()) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "output_formats.size() != output_desc_infos.size()."); - return ACL_ERROR_GE_PARAM_INVALID; - } - - GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); - GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); - - GELOGI("get model desc info from zero copy end."); - return ge::SUCCESS; -} - Status GeExecutor::CommandHandle(const Command &command) { Status ret = GraphLoader::CommandHandle(command); if (ret != SUCCESS) { diff --git a/ge/graph/execute/graph_execute.cc b/ge/graph/execute/graph_execute.cc index 97e2fd1b..3c5618e8 100755 --- a/ge/graph/execute/graph_execute.cc +++ b/ge/graph/execute/graph_execute.cc @@ -560,34 +560,10 @@ Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector &dynam return SUCCESS; } -Status GraphExecutor::GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &out_formats) { - try { - auto model_manager = ge::ModelManager::GetInstance(); - GE_CHECK_NOTNULL(model_manager); - Status ret = - model_manager->GetInputOutputDescInfoForZeroCopy(model_id, input_desc, output_desc, input_formats, out_formats); - if (ret != SUCCESS) { - GELOGE(ret, "GetInputOutputDescInfoForZeroCopy failed."); - return ret; - } - } catch (std::bad_alloc &) { - GELOGE(MEMALLOC_FAILED, "GetInputOutputDescInfoForZeroCopy failed, bad memory allocation occur !"); - return MEMALLOC_FAILED; - } catch (...) { - GELOGE(FAILED, "GetInputOutputDescInfoForZeroCopy failed, some exceptions occur !"); - return FAILED; - } - - return SUCCESS; -} - -Status GraphExecutor::GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { +Status GraphExecutor::GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { auto model_manager = ge::ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); - Status ret = model_manager->GetAIPPInfo(model_id, index, aipp_info); + Status ret = model_manager->GetAippInfo(model_id, index, aipp_info); if (ret != SUCCESS) { GELOGW("GetAIPPInfo is not success."); return ret; diff --git a/ge/graph/execute/graph_execute.h b/ge/graph/execute/graph_execute.h index efc30743..d2a92e47 100755 --- a/ge/graph/execute/graph_execute.h +++ b/ge/graph/execute/graph_execute.h @@ -73,7 +73,7 @@ class GraphExecutor { vector &output_desc, std::vector &input_formats, std::vector &output_formats, bool new_model_desc = false); - static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + static Status GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); static Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); @@ -110,10 +110,6 @@ class GraphExecutor { static Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); - static Status GetInputOutputDescInfoForZeroCopy(uint32_t model_id, vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats); static Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info); static Status GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, std::vector &output_dims); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 706d4b3b..1e8192a5 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -156,7 +156,6 @@ DavinciModel::~DavinciModel() { GE_CHK_STATUS(ModelRunStop()); op_list_.clear(); - data_op_list_.clear(); tensor_name_to_fixed_addr_size_.clear(); tensor_name_to_peer_output_index_.clear(); GE_DELETE_NEW_SINGLE(data_inputer_); @@ -878,7 +877,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { auto it = op_desc_handle.find(op_desc->GetType()); if (it != op_desc_handle.end()) { if ((this->*it->second)(op_desc) != SUCCESS) { - GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); + GELOGE(PARAM_INVALID, "Node init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } continue; @@ -931,7 +930,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); - return OptInputOutputInfo(data_by_index, output_op_list); + return GenInputOutputInfo(data_by_index, output_op_list); } void DavinciModel::SetLabelForDynamic(const NodePtr &node) { @@ -974,9 +973,6 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod } data_by_index[data_index] = op_desc; - auto data_op = AttrUtils::CopyOpDesc(op_desc); - GE_CHECK_NOTNULL(data_op); - data_op_list_.push_back(data_op); if (known_node_) { return SUCCESS; } @@ -1022,23 +1018,18 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod /// @param [in] output_op_list: list of NetOutput op. /// @return Status /// -Status DavinciModel::OptInputOutputInfo(const map &data_by_index, +Status DavinciModel::GenInputOutputInfo(const map &data_by_index, const vector &output_op_list) { - GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size()); - if (data_by_index.size() != data_op_list_.size()) { - GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); - return INTERNAL_ERROR; - } - - data_op_list_.clear(); + GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); for (auto &item : data_by_index) { - auto data_op = AttrUtils::CopyOpDesc(item.second); - GE_CHECK_NOTNULL(data_op); - data_op_list_.emplace_back(data_op); auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); input_addrs_list_.emplace_back(output_addrs); + GE_CHK_STATUS_RET(InitAippInfo(item.first, item.second), "Init AIPP Info failed"); + GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); + GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); + GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); if (item.second->GetType() == AIPP_DATA_TYPE) { GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); is_dynamic_aipp_ = true; @@ -1066,7 +1057,8 @@ Status DavinciModel::OptInputOutputInfo(const map &data_by_ } } - return InitOutputDescInfo(output_op_list, output_descs_, output_formats_); + GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); + return InitOutputDescInfo(output_op_list); } bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { @@ -1791,73 +1783,101 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector &user_inp /// @ingroup ge /// @brief Get AIPP input info /// @param [in] index -/// @param [out] aipp_info +/// @param [int] OpDescPtr /// @return execute result /// -Status DavinciModel::GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - OpDescPtr data_op = data_op_list_[index]; - if (!data_op->HasAttr(ATTR_NAME_AIPP)) { - GELOGW("GetAIPPInfo: there is not AIPP related with index %u.", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; +Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { + if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { + GELOGW("there is not AIPP related with index %u.", index); + return SUCCESS; } - std::unique_ptr aipp_params(new (std::nothrow) domi::AippOpParams()); - GE_CHECK_NOTNULL(aipp_params); - - ge::GeAttrValue::NAMED_ATTRS aipp_attr; - GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + domi::AippOpParams aipp_params; + GeAttrValue::NAMED_ATTRS aipp_attr; + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); - GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); - GELOGI("GetAIPPInfo: node data: %s, type: %s, current index: %u, current node related input rank: %u", - data_op->GetName().c_str(), data_op->GetType().c_str(), index, aipp_params->related_input_rank()); + GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); + GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); - GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(aipp_params.get(), aipp_info), + AippConfigInfo aipp_info; + GE_CHK_STATUS_RET(AippUtils::ConvertAippParams2AippInfo(&aipp_params, aipp_info), "convert aipp params to aipp config info failed"); + aipp_info_list_[index] = aipp_info; return SUCCESS; } -Status DavinciModel::GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - // Set default value - type = DATA_WITHOUT_AIPP; - aipp_index = 0xFFFFFFFF; // default invalid value - OpDescPtr data_op = data_op_list_[index]; - GE_CHECK_NOTNULL(data_op); - if (!data_op->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { +/// +/// @ingroup ge +/// @brief Get AIPP input info +/// @param [in] index +/// @param [out] aipp_info +/// @return execute result +/// +Status DavinciModel::GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const { + const auto it = aipp_info_list_.find(index); + if (it == aipp_info_list_.end()) { + GELOGW("there is not AIPP related with index %u.", index); + return ACL_ERROR_GE_AIPP_NOT_EXIST; + } + + aipp_info = it->second; + return SUCCESS; +} + +Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, const map &data_list) { + if (!op_desc->HasAttr(ATTR_DATA_RELATED_AIPP_MODE)) { GELOGW("There is no aipp releated info with index %u.", index); return SUCCESS; } - std::string data_mode; - (void)AttrUtils::GetStr(data_op, ATTR_DATA_RELATED_AIPP_MODE, data_mode); + + // Set default value + InputAippType aipp_type = DATA_WITHOUT_AIPP; + string data_mode; + (void)AttrUtils::GetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, data_mode); if (data_mode == "static_aipp") { - type = DATA_WITH_STATIC_AIPP; + aipp_type = DATA_WITH_STATIC_AIPP; } else if (data_mode == "dynamic_aipp") { - type = DATA_WITH_DYNAMIC_AIPP; + aipp_type = DATA_WITH_DYNAMIC_AIPP; } else if (data_mode == "dynamic_aipp_conf") { - type = DYNAMIC_AIPP_NODE; + aipp_type = DYNAMIC_AIPP_NODE; } else { GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "The info of aipp releated info %s is invalid with index %u.", data_mode.c_str(), index); return ACL_ERROR_GE_AIPP_MODE_INVALID; } - if (type == DATA_WITH_DYNAMIC_AIPP) { + size_t aipp_index = 0xFFFFFFFF; // default invalid value + if (aipp_type == DATA_WITH_DYNAMIC_AIPP) { string releated_name; - (void)AttrUtils::GetStr(data_op, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); - for (size_t i = 0; i < data_op_list_.size(); ++i) { - GE_CHECK_NOTNULL(data_op_list_[i]); - if (data_op_list_[i]->GetName() == releated_name) { - GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), i, index); - aipp_index = i; + (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); + for (const auto item : data_list) { + if (item.second->GetName() == releated_name) { + GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index); + aipp_index = item.first; } } + if (aipp_index == 0xFFFFFFFF) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "Can not find aipp data node from index %u", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; + GELOGW("Can not find aipp data node from index %u", index); + return SUCCESS; } } + + aipp_type_list_[index] = { aipp_type, aipp_index }; + return SUCCESS; +} + +Status DavinciModel::GetAippType(uint32_t index, InputAippType &aipp_type, size_t &aipp_index) const { + const auto it = aipp_type_list_.find(index); + if (it == aipp_type_list_.end()) { + GELOGW("There is no aipp releated info with index %u.", index); + return SUCCESS; + } + + aipp_type = it->second.first; + aipp_index = it->second.second; return SUCCESS; } @@ -1873,7 +1893,7 @@ void DavinciModel::SetDynamicSize(const std::vector &batch_num, int32_ dynamic_type_ = dynamic_type; } -void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynamic_type) { +void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynamic_type) const { if (batch_size_.empty()) { GELOGD("User does not set dynamic size"); } @@ -1885,38 +1905,10 @@ void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynami dynamic_type = dynamic_type_; } -void DavinciModel::GetModelAttr(vector &out_shape_info) { +void DavinciModel::GetModelAttr(vector &out_shape_info) const { out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); } -Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector &input_desc, - vector &output_desc, - std::vector &input_formats, - std::vector &output_formats) { - if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) { - GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); - return FAILED; - } - - GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); - - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); - - GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, - "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), - output_memory_size_list_.size()); - - /// For function zero copy,the momery should be aligned by 512 bytes. - /// And, because of the cce op limit, size should be lager than the real shape size. The memory should be padded by 32 - /// bytes. - /// *size equals to ((tensorDesc->dataSize + 2 * 32 - 1) / 32) * 32; - for (size_t i = 0; i < output_memory_size_list_.size(); i++) { - output_desc[i].size = output_memory_size_list_[i]; - } - - return SUCCESS; -} - void DavinciModel::SetInputDimsInfo(const vector &model_input_dims, Format &format, InputOutputDescInfo &input) { uint32_t n, c, h, w; @@ -1966,24 +1958,30 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, } } -Status DavinciModel::GetInputDescInfo(vector &input_desc, std::vector &formats) { - for (size_t index = 0; index < data_op_list_.size(); ++index) { - InputOutputDescInfo input; - GE_CHECK_NOTNULL(data_op_list_[index]); - GE_CHECK_NOTNULL(data_op_list_[index]->GetInputDescPtr(0)); +Status DavinciModel::InitInputDescInfo(const map &data_by_index) { + for (const auto &item : data_by_index) { + const auto op_desc = item.second; + GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); - Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat(); - CreateInputDimsInfo(data_op_list_[index], format, input); + InputOutputDescInfo input; + Format format = op_desc->GetInputDescPtr(0)->GetFormat(); + CreateInputDimsInfo(op_desc, format, input); - input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType(); - input.name = data_op_list_[index]->GetName(); + input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); + input.name = op_desc->GetName(); int64_t input_size = 0; - GE_CHK_STATUS_RET(TensorUtils::GetSize(*data_op_list_[index]->GetInputDescPtr(0), input_size), - "get input size failed."); + GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); input.size = input_size; - formats.push_back(format); - input_desc.push_back(input); + input_formats_.push_back(format); + input_descs_.push_back(input); } + return SUCCESS; +} + +Status DavinciModel::GetInputDescInfo(vector &input_descs, vector &input_formats) { + input_descs.insert(input_descs.end(), input_descs_.begin(), input_descs_.end()); + input_formats.insert(input_formats.end(), input_formats_.begin(), input_formats_.end()); + // cause GetInputDescInfo called not only once, set is_new_model_desc_ to false after calc the model input dims is_new_model_desc_ = false; return SUCCESS; @@ -2042,8 +2040,7 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); } -Status DavinciModel::InitOutputDescInfo(const vector &output_op_list, - vector &output_descs, vector &output_formats) { +Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { GELOGD("Output node size: %zu", output_op_list.size()); for (const auto &op_desc : output_op_list) { uint32_t out_size = static_cast(op_desc->GetInputsSize()); @@ -2068,28 +2065,20 @@ Status DavinciModel::InitOutputDescInfo(const vector &output_op_list, std::to_string(src_index[index]); } output.name = output_name; - output_descs.push_back(output); - output_formats.push_back(format_result); + output_descs_.push_back(output); + output_formats_.push_back(format_result); } } return SUCCESS; } -Status DavinciModel::GetOutputDescInfo(vector &output_descs, vector &output_formats) { +Status DavinciModel::GetOutputDescInfo(vector &output_descs, + vector &output_formats) const { output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); return SUCCESS; } -ge::Format DavinciModel::GetFormat() { - if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { - GELOGW("OP List Pointer is null or input_desc size is not 1!"); - return FORMAT_NCHW; - } - - return data_op_list_[0]->GetInputDescPtr(0)->GetFormat(); -} - Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; const std::vector &blobs = input_data.blobs; @@ -4004,25 +3993,45 @@ void DavinciModel::SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_s } } -Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - OpDescPtr data_op = data_op_list_[index]; - if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetOrigInputInfo: there is not AIPP related with index %u.", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; +Status DavinciModel::InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc) { + if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { + GELOGI("there is not AIPP related with index %u, node: %s.", index, op_desc->GetName().c_str()); + return SUCCESS; } - vector inputs; - if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { + vector inputs; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { std::string input = inputs[kAippOriginInputIndex]; - GELOGI("GetOrigInputInfo: origin input str: %s", input.c_str()); + GELOGI("origin input str: %s", input.c_str()); std::vector infos = ge::StringUtils::Split(input, ':'); if (infos.size() != kAippInfoNum) { - GELOGW("origin input str is invalid."); + GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum); + return ACL_ERROR_GE_AIPP_MODE_INVALID; } - orig_input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); - orig_input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); - orig_input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); + + OriginInputInfo input_info; + input_info.format = TypeUtils::SerialStringToFormat(infos[kAippInfoFormat]); + input_info.data_type = TypeUtils::SerialStringToDataType(infos[kAippInfoDataType]); + input_info.dim_num = std::strtol(infos[kAippInfoDimNum].c_str(), nullptr, kDecimal); + orig_input_info_[index] = input_info; + } else { + OriginInputInfo input_info = { FORMAT_RESERVED, DT_UNDEFINED, 0 }; + orig_input_info_[index] = input_info; + } + + return SUCCESS; +} + +Status DavinciModel::GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const { + const auto it = orig_input_info_.find(index); + if (it == orig_input_info_.end()) { + GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); + return ACL_ERROR_GE_AIPP_NOT_EXIST; + } + + const OriginInputInfo &input_info = it->second; + if (input_info.format != FORMAT_RESERVED || input_info.data_type != DT_UNDEFINED) { + orig_input_info = input_info; } return SUCCESS; @@ -4032,7 +4041,8 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_ GELOGI("ParseAIPPInfo: origin str: %s", in_out_info.c_str()); std::vector infos = ge::StringUtils::Split(in_out_info, ':'); if (infos.size() != kAippInfoNum) { - GELOGW("origin input str is invalid."); + GELOGE(ACL_ERROR_GE_AIPP_MODE_INVALID, "origin input str is invalid[%zu, %u].", infos.size(), kAippInfoNum); + return; } dims_info.name = infos[kAippInfoTensorName]; dims_info.size = std::strtol(infos[kAippInfoTensorSize].c_str(), nullptr, kDecimal); @@ -4047,47 +4057,58 @@ void DavinciModel::ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_ } } -Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, - std::vector &output_dims) { - GE_CHK_BOOL_RET_STATUS(index < data_op_list_.size(), PARAM_INVALID, "Index %u is invalid.", index); - OpDescPtr data_op = data_op_list_[index]; - if (!data_op->HasAttr(ATTR_NAME_AIPP_INPUTS) || !data_op->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { - GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "GetAllAippInputOutputDims: there is not AIPP related with index %u.", index); - return ACL_ERROR_GE_AIPP_NOT_EXIST; +Status DavinciModel::InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc) { + if (!op_desc->HasAttr(ATTR_NAME_AIPP_INPUTS) || !op_desc->HasAttr(ATTR_NAME_AIPP_OUTPUTS)) { + GELOGI("there is not AIPP related with index %u.", index); + return SUCCESS; } - vector inputs; - if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { - GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size()); + vector inputs; + vector input_dims; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { + GELOGI("Data: %s has %zu related aippInfo.", op_desc->GetName().c_str(), inputs.size()); for (auto it : inputs) { InputOutputDims input_info; ParseAIPPInfo(it, input_info); input_dims.emplace_back(input_info); - GELOGD("GetAllAippInputOutputDims Aipp origin input dims info: %s", it.c_str()); + GELOGD("Aipp origin input dims info: %s", it.c_str()); - ConstGeTensorDescPtr data_input_desc = data_op->GetInputDescPtr(kDataIndex); + ConstGeTensorDescPtr data_input_desc = op_desc->GetInputDescPtr(kDataIndex); int64_t data_input_size; - (void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); - GELOGD( - "GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: " - "%s, data_type: %s, shape: %s .", - index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, - TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), - TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), - formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); + (void)TensorUtils::GetSize(*(op_desc->GetInputDescPtr(kDataIndex)), data_input_size); + GELOGD("related Data[%d]: tensor_name: %s, dim_num: %zu, tensor_size: %zu, format: %s, data_type: %s, shape: %s", + index, op_desc->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, + TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), + TypeUtils::DataTypeToSerialString(data_input_desc->GetDataType()).c_str(), + formats::JoinToString(data_input_desc->GetShape().GetDims()).c_str()); } } - vector outputs; - if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { + vector outputs; + vector output_dims; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs) && !outputs.empty()) { for (auto it : outputs) { InputOutputDims output_info; ParseAIPPInfo(it, output_info); output_dims.emplace_back(output_info); - GELOGD("GetAllAippInputOutputDims Aipp output dims info: %s", it.c_str()); + GELOGD("Aipp output dims info: %s", it.c_str()); } } + aipp_dims_info_[index] = { input_dims, input_dims }; + return SUCCESS; +} + +Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, vector &input_dims, + vector &output_dims) const { + const auto it = aipp_dims_info_.find(index); + if (it == aipp_dims_info_.end()) { + GELOGE(ACL_ERROR_GE_AIPP_NOT_EXIST, "there is not AIPP related with index %u.", index); + return ACL_ERROR_GE_AIPP_NOT_EXIST; + } + + input_dims = it->second.first; + output_dims = it->second.second; return SUCCESS; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 9ff59d4e..b5f546f1 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -286,13 +286,6 @@ class DavinciModel { // Modified from KernelTaskInfo. SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } - /// - /// @ingroup ge - /// @brief get model input and output format - /// @return ccTensorFormat_t current model input and output format - /// - Format GetFormat(); - rtModel_t GetRtModelHandle() const { return rt_model_handle_; } rtStream_t GetRtModelStream() const { return rt_model_stream_; } @@ -326,7 +319,7 @@ class DavinciModel { Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc); Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc, - vector &inputFormats, vector &output_formats); + vector &input_formats, vector &output_formats); /// /// @ingroup ge @@ -347,9 +340,9 @@ class DavinciModel { void GetUserDesignateShapeOrder(vector &user_input_shape_order) const; - void GetCurShape(vector &batch_info, int32_t &dynamic_type); + void GetCurShape(vector &batch_info, int32_t &dynamic_type) const; - void GetModelAttr(vector &dynamic_output_shape_info); + void GetModelAttr(vector &dynamic_output_shape_info) const; /// /// @ingroup ge @@ -358,9 +351,9 @@ class DavinciModel { /// @param [out] aipp_info /// @return execute result /// - Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); + Status GetAippInfo(uint32_t index, AippConfigInfo &aipp_info) const; - Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); + Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index) const; /// /// @ingroup ge @@ -378,17 +371,6 @@ class DavinciModel { /// void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); - /// - /// @ingroup ge - /// @brief get model input and output desc for zero copy - /// @param [out] input_shape model input size - /// @param [out] output_shape model output size - /// @return execute result - /// - Status GetInputOutputDescInfoForZeroCopy(vector &input_desc, - vector &output_desc, - vector &inputFormats, vector &output_formats); - Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); Status ReturnNoOutput(uint32_t data_id); @@ -538,9 +520,9 @@ class DavinciModel { Status UpdateKnownZeroCopyAddr(vector &total_io_addrs, bool update_args = true); void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } - Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); + Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const; Status GetAllAippInputOutputDims(uint32_t index, vector &input_dims, - vector &output_dims); + vector &output_dims) const; void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } // om file name void SetOmName(string om_name) { om_name_ = om_name; } @@ -626,7 +608,7 @@ class DavinciModel { void SetInputDimsInfo(const vector &model_input_dims, Format &format, InputOutputDescInfo &input); Status GetInputDescInfo(vector &input_desc, vector &input_formats); - Status GetOutputDescInfo(vector &output_desc, vector &output_formats); + Status GetOutputDescInfo(vector &output_desc, vector &output_formats) const; Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); @@ -688,7 +670,7 @@ class DavinciModel { /// @param [in] output_op_list: list of NetOutput op. /// @return Status /// - Status OptInputOutputInfo(const map &data_by_index, const vector &output_op_list); + Status GenInputOutputInfo(const map &data_by_index, const vector &output_op_list); /// /// @ingroup ge @@ -856,8 +838,13 @@ class DavinciModel { Status InitOutputTensorInfo(const OpDescPtr &op_desc); Status GenOutputTensorInfo(OutputData *output_data, vector &outputs); - Status InitOutputDescInfo(const vector &output_op_list, - vector &output_desc, vector &formats); + Status InitInputDescInfo(const map &data_by_index); + Status InitOutputDescInfo(const vector &output_op_list); + + Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); + Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); + Status InitAippType(uint32_t index, const OpDescPtr &op_desc, const map &data_list); + Status InitAippInputOutputDims(uint32_t index, const OpDescPtr &op_desc); void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); void SetLabelForDynamic(const NodePtr &node); @@ -890,9 +877,6 @@ class DavinciModel { map op_list_; // release after DavinciModel::Init - // data op_desc - vector data_op_list_; - vector variable_op_list_; map new_input_data_info_; @@ -1048,6 +1032,13 @@ class DavinciModel { vector output_buffer_size_; vector output_shape_info_; + map orig_input_info_; + map aipp_info_list_; + map> aipp_type_list_; + map, vector>> aipp_dims_info_; + + vector input_descs_; + vector input_formats_; vector output_descs_; vector output_formats_; }; diff --git a/ge/graph/load/new_model_manager/davinci_model_parser.cc b/ge/graph/load/new_model_manager/davinci_model_parser.cc index 34180d08..76526de2 100644 --- a/ge/graph/load/new_model_manager/davinci_model_parser.cc +++ b/ge/graph/load/new_model_manager/davinci_model_parser.cc @@ -16,82 +16,7 @@ #include "graph/load/new_model_manager/davinci_model_parser.h" -#include -#include -#include -#include "securec.h" - -#include "common/debug/log.h" -#include "graph/load/new_model_manager/davinci_model.h" - namespace ge { -FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelInfoParser(const ModelData &model, ModelInfo &model_info) { - GE_CHK_RT_RET(rtSetDevice(0)); - try { - uint32_t model_len = 0; - uint8_t *model_data = nullptr; - - Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); return ret, "Parse model failed"); - - auto *file_header = reinterpret_cast(model.model_data); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(file_header == nullptr, GE_CHK_RT(rtDeviceReset(0)); - return PARAM_INVALID, "file_header is null."); - - model_info.version = file_header->version; - model_info.is_encrypt = false; - GE_IF_BOOL_EXEC(ENCRYPTED == file_header->is_encrypt, model_info.is_encrypt = true); - - std::shared_ptr davinci_model = - std::shared_ptr(new (std::nothrow) DavinciModel(model.priority, nullptr)); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(davinci_model == nullptr, GE_CHK_RT(rtDeviceReset(0)); - return PARAM_INVALID, "davinci_model is null."); - - GE_MAKE_GUARD(davinci_model, [&] { davinci_model = nullptr; }); - - ModelHelper model_helper; - ret = model_helper.LoadModel(model); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((ret != SUCCESS), GE_CHK_RT(rtDeviceReset(0)); return FAILED, "load model failed"); - - ret = davinci_model->Assign(model_helper.GetGeModel()); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); - return ret, "Parse davinci model data failed"); - - ret = davinci_model->Init(); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); - return ret, "Davinci model init failed"); - - vector input_list; - vector output_list; - - ret = davinci_model->GetInputOutputDescInfo(input_list, output_list); - - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, GE_CHK_RT(rtDeviceReset(0)); - return ret, "Davinci model GetInputOutputDescInfo failed"); - - for (const auto &desc : input_list) { - model_info.input_desc.push_back(desc.shape_info); - } - for (const auto &desc : output_list) { - model_info.output_desc.push_back(desc.shape_info); - } - - model_info.name = davinci_model->Name(); - } catch (...) { - DOMI_LOGE("OM model parser failed, some exceptions occur !"); - GE_CHK_RT(rtDeviceReset(0)); - return FAILED; - } - - GE_CHK_RT(rtDeviceReset(0)); - - return SUCCESS; -} - DavinciModelParser::DavinciModelParser() {} DavinciModelParser::~DavinciModelParser() {} diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc index b2cce73a..22fddf86 100755 --- a/ge/graph/load/new_model_manager/model_manager.cc +++ b/ge/graph/load/new_model_manager/model_manager.cc @@ -995,16 +995,6 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynami return SUCCESS; } -Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector &input_desc, - vector &output_desc, - std::vector &inputFormats, - std::vector &outputFormats) { - std::shared_ptr davinci_model = GetModel(model_id); - GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, - "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); - return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); -} - /// /// @ingroup ge /// @brief Get AIPP info @@ -1013,11 +1003,11 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, /// @param [out] aipp_info /// @return execute result /// -Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { +Status ModelManager::GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", model_id); - return davinci_model->GetAIPPInfo(index, aipp_info); + return davinci_model->GetAippInfo(index, aipp_info); } Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h index 500cad31..418bae62 100755 --- a/ge/graph/load/new_model_manager/model_manager.h +++ b/ge/graph/load/new_model_manager/model_manager.h @@ -239,24 +239,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { /// @param [out] aipp_info /// @return execute result /// - ge::Status GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); + ge::Status GetAippInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info); ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); - /// - /// @ingroup domi_ome - /// @brief set model input and output size zero copy - /// @param [in] model_id model id - /// @param [out] input_shape input tensor - /// @param [out] output_shape output tensor - /// @return SUCCESS success - /// @return PARAM_INVALID parameter invalid - /// - ge::Status GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, std::vector &input_desc, - std::vector &output_desc, - std::vector &inputFormats, - std::vector &outputFormats); - ge::Status GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type); ge::Status GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info); diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h index 1b78860d..3136e172 100644 --- a/inc/framework/executor/ge_executor.h +++ b/inc/framework/executor/ge_executor.h @@ -157,9 +157,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { ge::Status GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index); - ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector &input_desc, - std::vector &output_desc); - ge::Status CommandHandle(const ge::Command &command); ge::Status SetDump(const DumpConfig &dump_config); diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 9e51585b..fe7c70c9 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -139,13 +139,14 @@ TEST_F(UtestDavinciModel, init_data_op) { model.runtime_param_.mem_size = 5120000; ComputeGraphPtr graph = make_shared("default"); - OpDescPtr op_input = CreateOpDesc("data", DATA); GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); op_input->AddInputDesc(tensor); op_input->AddOutputDesc(tensor); op_input->SetInputOffset({1024}); - op_input->SetOutputOffset({5120}); + op_input->SetOutputOffset({1024}); NodePtr node_input = graph->AddNode(op_input); OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); @@ -168,12 +169,14 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) { model.runtime_param_.mem_size = 5120000; ComputeGraphPtr graph = make_shared("default"); - OpDescPtr op_input = CreateOpDesc("data", DATA); GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); op_input->AddInputDesc(tensor); op_input->AddOutputDesc(tensor); op_input->SetInputOffset({1024}); - op_input->SetOutputOffset({5120}); + op_input->SetOutputOffset({1024}); NodePtr node = graph->AddNode(op_input); uint32_t data_op_index = 0; @@ -192,8 +195,10 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { model.runtime_param_.mem_size = 5120000; ComputeGraphPtr graph = make_shared("default"); - OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); op_output->AddInputDesc(tensor); op_output->SetInputOffset({1024}); op_output->SetSrcName( { "data" } ); @@ -426,4 +431,332 @@ TEST_F(UtestDavinciModel, InitRealSizeAndShapeInfo_succ3) { EXPECT_EQ(ret, SUCCESS); } +TEST_F(UtestDavinciModel, init_data_aipp_info) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); + + GeAttrValue::NAMED_ATTRS aipp_attr; + aipp_attr.SetAttr("aipp_mode", GeAttrValue::CreateFrom(domi::AippOpParams::dynamic)); + aipp_attr.SetAttr("related_input_rank", GeAttrValue::CreateFrom(0)); + aipp_attr.SetAttr("max_src_image_size", GeAttrValue::CreateFrom(2048)); + aipp_attr.SetAttr("support_rotation", GeAttrValue::CreateFrom(1)); + EXPECT_TRUE(AttrUtils::SetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr)); + + AippConfigInfo aipp_info; + EXPECT_EQ(model.GetAippInfo(0, aipp_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippInfo(0, aipp_info), SUCCESS); + EXPECT_EQ(aipp_info.aipp_mode, domi::AippOpParams::dynamic); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_static) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); + + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "static_aipp"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(aipp_type, DATA_WITH_STATIC_AIPP); + EXPECT_EQ(aipp_index, 0xFFFFFFFFu); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_dynamic) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); + AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_releated) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + { + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp"); + AttrUtils::SetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, "releated_aipp"); + } + { + OpDescPtr op_desc = CreateOpDesc("releated_aipp", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 1 + } + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(aipp_type, DATA_WITH_DYNAMIC_AIPP); + EXPECT_EQ(aipp_index, 1); + + EXPECT_EQ(model.input_addrs_list_.size(), 2); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 2); +} + +TEST_F(UtestDavinciModel, init_data_aipp_dynamic_conf) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_conf"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(aipp_type, DYNAMIC_AIPP_NODE); + EXPECT_EQ(aipp_index, 0xFFFFFFFFU); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_dynamic_invalid) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + AttrUtils::SetStr(op_desc, ATTR_DATA_RELATED_AIPP_MODE, "dynamic_aipp_invalid"); + + InputAippType aipp_type; + size_t aipp_index = 0; + EXPECT_EQ(model.GetAippType(0, aipp_type, aipp_index), SUCCESS); + EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_info_empty) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = {}; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = {}; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + OriginInputInfo orig_input_info; + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_info_normal) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + OriginInputInfo orig_input_info; + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_info_invalid) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = { "NCHW:DT_FLOAT:TensorName" }; // Invalid + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + OriginInputInfo orig_input_info; + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), ACL_ERROR_GE_AIPP_MODE_INVALID); + EXPECT_EQ(model.GetOrigInputInfo(0, orig_input_info), ACL_ERROR_GE_AIPP_NOT_EXIST); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} + +TEST_F(UtestDavinciModel, init_data_aipp_input_dims_normal) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); // for CustAICPUKernelStore::GetCustAICPUKernelStore() + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_desc = CreateOpDesc("data", DATA); + op_desc->AddInputDesc(tensor); + op_desc->AddOutputDesc(tensor); + op_desc->SetInputOffset({1024}); + op_desc->SetOutputOffset({1024}); + NodePtr node = graph->AddNode(op_desc); // op_index 0 + + vector inputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_INPUTS, inputs); + vector outputs = { "NCHW:DT_FLOAT:TensorName:TensorSize:3:1,2,8" }; + AttrUtils::SetListStr(op_desc, ATTR_NAME_AIPP_OUTPUTS, outputs); + + vector input_dims; + vector output_dims; + EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), ACL_ERROR_GE_AIPP_NOT_EXIST); + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + EXPECT_EQ(model.GetAllAippInputOutputDims(0, input_dims, output_dims), SUCCESS); + EXPECT_EQ(input_dims.size(), 1); + EXPECT_EQ(output_dims.size(), 1); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(model.op_list_.size(), 1); +} } // namespace ge diff --git a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc index 43abc54b..fe886b49 100644 --- a/tests/ut/ge/graph/load/kernel_task_info_unittest.cc +++ b/tests/ut/ge/graph/load/kernel_task_info_unittest.cc @@ -1120,7 +1120,6 @@ TEST_F(UtestKernelTaskInfo, kernel_task_info_init_success) { op_desc->AddOutputDesc(descout); op_desc->SetId(0); - model.data_op_list_.push_back(op_desc); model.op_list_[0] = op_desc; domi::TaskDef task_def; From 2697519926d43c97bca67bd31f434d2411ff12c8 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Mon, 4 Jan 2021 21:28:14 +0800 Subject: [PATCH 36/54] profiling training trace --- ge/common/profiling/profiling_manager.cc | 2 + ge/common/types.cc | 3 + ge/graph/build/graph_builder.cc | 52 +++++ ge/graph/build/graph_builder.h | 1 + ge/graph/build/task_generator.cc | 113 +++++++--- ge/graph/build/task_generator.h | 7 +- .../load/new_model_manager/davinci_model.cc | 14 +- .../load/new_model_manager/davinci_model.h | 2 + ge/hybrid/executor/worker/execution_engine.cc | 2 + ge/hybrid/model/hybrid_model_builder.cc | 196 +++++++++++++++++- ge/hybrid/model/hybrid_model_builder.h | 6 + .../node_executor/rts/rts_node_executor.cc | 33 +++ .../node_executor/rts/rts_node_executor.h | 13 ++ ge/hybrid/node_executor/task_context.h | 2 +- inc/framework/common/ge_types.h | 2 + inc/framework/common/types.h | 3 + 16 files changed, 420 insertions(+), 31 deletions(-) diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 92417286..aad2bbe3 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -302,6 +302,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin } data.append(" model_id:").append(std::to_string(model_id)); + data.append(" task_id:").append(std::to_string(graph.task_id)); + data.append(" stream_id:").append(std::to_string(graph.stream_id)); data.append("\n"); GraphDescReport(device_id, data); diff --git a/ge/common/types.cc b/ge/common/types.cc index 1cc70347..268e7caa 100644 --- a/ge/common/types.cc +++ b/ge/common/types.cc @@ -480,6 +480,9 @@ REGISTER_OPTYPE_DEFINE(HVDWAIT, "HorovodWait"); // aicpu op for online_infer dynamic_dims REGISTER_OPTYPE_DEFINE(GETDYNAMICDIMS, "GetDynamicDims"); +// profiling training trace node +REGISTER_OPTYPE_DEFINE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); + const std::string MODEL_ATTR_TASKS = "tasks"; const std::string MODEL_ATTR_TASK_GEN_BASE_ADDR = "task_gen_base_addr"; const std::string MODEL_ATTR_TASK_GEN_WEIGHT_ADDR = "task_gen_weight_addr"; diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index dce40c3e..143d5550 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -421,6 +421,52 @@ static Status GenerateTaskForConstant(const std::shared_ptr &graph return SUCCESS; } +Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { + bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag(); + com_graph->SetGraphUnknownFlag(false); + + GELOGD("Start to mark profiling task attr for fp and bp."); + TaskGenerator task_generator; + ProfilingPoint profiling_point; + std::vector all_reduce_node_index; + Status ret = task_generator.FindProfilingNodeIndex(com_graph, profiling_point, all_reduce_node_index); + com_graph->SetGraphUnknownFlag(original_unknown_shape_flag); + if (ret != SUCCESS) { + GELOGW("Find profiling node index failed."); + } + if (profiling_point.fp_index == 0 || profiling_point.bp_index == 0 || profiling_point.end_index.empty()) { + GELOGD("No need to mark fp bp profiling task attr."); + return SUCCESS; + } + // mark profiling task attr for node + uint32_t node_index = 0; + for (const auto &node : com_graph->GetAllNodes()) { + OpDescPtr op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(node->GetOpDesc()); + node_index++; + if (profiling_point.fp_index == node_index) { + GELOGI("The first fp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, true); + } + if (profiling_point.bp_index == node_index) { + GELOGI("The bp node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); + } + for (size_t i = 0; i < all_reduce_node_index.size(); i++) { + if (all_reduce_node_index[i] == node_index) { + GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); + continue; + } + } + if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { + GELOGI("The end node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); + (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, true); + } + } + return SUCCESS; +} + Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, @@ -437,6 +483,12 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, } } + // Set fp bp profiling task attr for graph + if (MarkFpBpProfilingTaskAttr(comp_graph) != SUCCESS) { + GELOGE(FAILED, "Set fp bp profiling task attr for graph."); + return FAILED; + } + auto all_graphs = comp_graph->GetAllSubgraphs(); if (all_graphs.empty()) { all_graphs.push_back(comp_graph); diff --git a/ge/graph/build/graph_builder.h b/ge/graph/build/graph_builder.h index b828a80d..524b60e0 100644 --- a/ge/graph/build/graph_builder.h +++ b/ge/graph/build/graph_builder.h @@ -60,6 +60,7 @@ class GraphBuilder { Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector &subgraph_ptr_list); + Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector &subgraph_ptr_list, GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 7e45ad61..21e82d11 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -274,6 +274,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra }; GE_MAKE_GUARD(release, callback); + uint64_t all_reduce_node_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -292,7 +293,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra // Part2: Call auto fusion_task_info = FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, - ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; + ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); // continue directly @@ -316,7 +317,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra type.c_str()); // Profiling task size_t task_list_size_before = task_def_list.size(); - GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); + GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_node_idx)); int64_t op_id = op_desc->GetId(); // Compatible with dynamic shape scenes, the default is 0 int64_t stream_id = 0; @@ -336,8 +338,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra return ret; } // Profiling task - GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); - + GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_node_idx)); size_t task_list_size_after = task_def_list.size(); // If tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -380,6 +382,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info auto &op_name_map = fusion_task_info.op_name_map; auto &profiling_point = fusion_task_info.profiling_point; auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; + auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; // If op_desc have this attr, call nodes with same group key in a stream together if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && (fusion_nodes_seen.count(node.get()) == 0)) { @@ -426,7 +429,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return INTERNAL_ERROR; } // profiling task - (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); + (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_idx); run_context.stream = run_context.graphStreamList[stream_id]; GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); @@ -439,7 +443,8 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return ret; } // profiling task - (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); + (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, + node_index, task_def_list, all_reduce_idx); size_t task_list_size_after = task_def_list.size(); // if tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -830,6 +835,11 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint return SUCCESS; } +Status TaskGenerator::FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, + std::vector &all_reduce_nodes) { + return FindProfilingTaskIndex(graph, profiling_point, all_reduce_nodes); +} + Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, vector &all_reduce_nodes) const { GE_CHECK_NOTNULL(graph); @@ -840,7 +850,6 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi GELOGD("Profiling is not open."); return SUCCESS; } - GELOGI("Start get FP/BP index."); std::string fp_point_str; std::string bp_point_str; @@ -878,18 +887,27 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi return SUCCESS; } - Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list) { + vector &task_def_list, uint64_t &all_reduce_node_idx) { const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); - if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || - (profiling_point.end_index.empty())) { + bool is_insert_fp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || + (profiling_point.end_index.empty())) && + (!(is_insert_fp_profiling_task || is_insert_bp_profiling_task)); + if (!is_profiling || no_insert_profiling_task) { return SUCCESS; } - if (profiling_point.fp_index == node_index) { + GELOGD("Insert fp profiling task: %d, insert bp profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", + is_insert_fp_profiling_task, is_insert_bp_profiling_task, profiling_point.fp_index, profiling_point.bp_index, + profiling_point.end_index.size()); + + if ((profiling_point.fp_index == node_index) || is_insert_fp_profiling_task) { uint64_t jobid_log_id = ge::GetContext().TraceId(); GELOGI("The first FP operator is %s, idx %u, job_id %lu", op_desc->GetName().c_str(), node_index, jobid_log_id); @@ -913,22 +931,40 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const task_def_list.emplace_back(fp_task_def); } - for (size_t i = 0; i < all_reduce_nodes.size(); i++) { - if (all_reduce_nodes[i] != node_index) { - continue; + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + uint64_t all_reduce_task_idx = 0; + bool is_insert_all_reduce_task = false; + if (is_all_reduce && is_insert_bp_profiling_task) { + all_reduce_task_idx = all_reduce_node_idx; + is_insert_all_reduce_task = true; + } + if (is_all_reduce) { + all_reduce_node_idx++; + } + if (!is_insert_all_reduce_task) { + for (size_t i = 0; i < all_reduce_nodes.size(); i++) { + if (all_reduce_nodes[i] == node_index) { + all_reduce_task_idx = i; + is_insert_all_reduce_task = true; + break; + } } + } + + if (is_insert_all_reduce_task) { GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef ar_task_def; ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); ar_task_def.set_stream_id(op_desc->GetStreamId()); LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); if (ar_log_def != nullptr) { - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), GELOGE(FAILED, "Multiply result is out of range."); return FAILED); - auto log_id = i * kProfilingArStep + kProfilingArStartLogid; + auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; ar_log_def->set_logid(log_id); ar_log_def->set_notify(false); + (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); } task_def_list.push_back(ar_task_def); } @@ -937,16 +973,27 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list) { + vector &task_def_list, uint64_t all_reduce_node_idx) { GE_CHECK_NOTNULL(op_desc); const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); - if (!is_profiling || (profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || - (profiling_point.end_index.empty())) { + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool is_insert_end_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); + bool no_insert_profiling_task = ((profiling_point.fp_index == 0) || (profiling_point.bp_index == 0) || + (profiling_point.end_index.empty())) && + (!(is_insert_bp_profiling_task || is_insert_end_profiling_task)); + if (!is_profiling || no_insert_profiling_task) { return SUCCESS; } - if (profiling_point.bp_index == node_index) { + GELOGD("Insert bp profiling task: %d, insert end profiling task: %d, fp index: %u, bp index: %u, end index size: %zu", + is_insert_bp_profiling_task, is_insert_end_profiling_task, profiling_point.fp_index, profiling_point.bp_index, + profiling_point.end_index.size() ); + + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + if ((profiling_point.bp_index == node_index) || (!is_all_reduce && is_insert_bp_profiling_task)) { GELOGI("The last BP operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef bp_task_def; bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); @@ -957,7 +1004,9 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P bp_log_def->set_notify(false); task_def_list.emplace_back(bp_task_def); } - if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end()) { + + if (profiling_point.end_index.find(node_index) != profiling_point.end_index.end() || + is_insert_end_profiling_task) { GELOGI("The iteration end operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef end_task_def; end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); @@ -969,20 +1018,32 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P task_def_list.emplace_back(end_task_def); } + uint32_t all_reduce_task_idx = 0; + bool is_insert_all_reduce_task = false; + if (is_all_reduce && is_insert_bp_profiling_task) { + all_reduce_task_idx = all_reduce_node_idx; + is_insert_all_reduce_task = true; + } + for (size_t i = 0; i < all_reduce_nodes.size(); i++) { - if (all_reduce_nodes[i] != node_index) { - continue; + if (all_reduce_nodes[i] == node_index) { + all_reduce_task_idx = i; + is_insert_all_reduce_task = true; + break; } + } + + if (is_insert_all_reduce_task) { GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); TaskDef ar_task_def; ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); ar_task_def.set_stream_id(op_desc->GetStreamId()); LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); GE_CHECK_NOTNULL(ar_log_def); - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), GELOGE(FAILED, "Multiply result is out of range."); return FAILED); - auto log_id = i * kProfilingArStep + kProfilingArEndLogid; + auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; ar_log_def->set_logid(log_id); ar_log_def->set_notify(false); task_def_list.emplace_back(ar_task_def); diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index c93b2007..5970954c 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -51,6 +51,7 @@ struct FusionTaskInfo { std::map &op_name_map; ProfilingPoint &profiling_point; vector all_reduce_nodes; + uint64_t all_reduce_node_idx; }; class TaskGenerator { @@ -76,6 +77,8 @@ class TaskGenerator { /// Status GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context); + Status FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, + std::vector &all_reduce_nodes); private: Status UpdateAnchorStatus(const NodePtr &node); @@ -126,10 +129,10 @@ class TaskGenerator { std::vector &all_reduce_nodes) const; Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list); + std::vector &task_def_list, uint64_t &all_reduce_node_idx); Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list); + std::vector &task_def_list, uint64_t all_reduce_node_idx); static bool IsProfPoint(const OpDescPtr &op, const std::string &name); diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 706d4b3b..ad5ee49b 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -3113,6 +3113,8 @@ Status DavinciModel::DistributeTask() { task_desc_info.stream_id = task->GetStreamId(); task_desc_info.shape_type = "static"; task_desc_info.cur_iter_num = 0; + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); task_desc_info_.emplace_back(task_desc_info); if (flag) { if (task->GetSktTaskID() != 0xFFFFFFFF) { @@ -3120,6 +3122,8 @@ Status DavinciModel::DistributeTask() { string op_name = "super_kernel_" + to_string(task_index); task_desc_info.op_name = op_name; task_desc_info.task_id = task->GetSktTaskID(); + profiler_report_op_info_[task_desc_info.op_name] = + std::pair(task_desc_info.task_id, task_desc_info.stream_id); task_desc_info_.emplace_back(task_desc_info); } } @@ -3991,7 +3995,15 @@ Status DavinciModel::GetComputeGraphInfo(vector &graph_des compute_graph_info.output_format = op_desc.output_format; compute_graph_info.output_shape = op_desc.output_shape; compute_graph_info.output_data_type = op_desc.output_data_type; - + uint32_t task_id = 0; + uint32_t stream_id = 0; + auto iter = profiler_report_op_info_.find(op_desc.op_name); + if (iter != profiler_report_op_info_.end()) { + task_id = iter->second.first; + stream_id = iter->second.second; + } + compute_graph_info.task_id = task_id; + compute_graph_info.stream_id = stream_id; graph_desc_info.emplace_back(compute_graph_info); } return SUCCESS; diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index 9ff59d4e..893dfc2a 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -978,6 +978,8 @@ class DavinciModel { // for profiling task and graph info vector task_desc_info_; + std::map> profiler_report_op_info_; + int64_t maxDumpOpNum_; // for data dump DataDumper data_dumper_; diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 21dd8e4b..e9c6ef29 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -221,6 +221,8 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); } + tmp_compute_graph_info.task_id = context_->GetTaskId(); + tmp_compute_graph_info.stream_id = context_->GetStreamId(); compute_graph_info.emplace_back(tmp_compute_graph_info); GELOGD("GetComputeGraphInfo of node [%s] end.", node->GetName().c_str()); } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 46c9c39b..32fc495a 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -35,11 +35,22 @@ namespace ge { namespace hybrid { +using domi::LogTimeStampDef; +using domi::TaskDef; namespace { const uint32_t kSubgraphIndex = 0U; const uint32_t kVarOutputIndex = 0U; +const uint64_t kProfilingFpStartLogid = 1U; +const uint64_t kProfilingBpEndLogid = 2U; +const uint64_t kProfilingIterEndLogid = 65535U; const int kBytes = 8; const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown"; +const char *const kProfilingGraph = "ProfilingGraph"; +const char *const kProfilingFpNode = "ProfilingFpNode"; +const char *const kProfilingBpNode = "ProfilingBpNode"; +const char *const kProfilingEndNode = "ProfilingEndNode"; +const char *const kProfilingArNode = "ProfilingAllReduceNode"; +const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE"; Status SetOutputNameAttr(ComputeGraph &graph) { vector output_names; @@ -1531,6 +1542,188 @@ Status HybridModelBuilder::RecoverGraphUnknownFlag() { return SUCCESS; } +Status HybridModelBuilder::GenerateFpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list) { + uint64_t jobid_log_id = ge::GetContext().TraceId(); + GELOGD("The first FP operator is %s,, job_id %lu", op_desc->GetName().c_str(), jobid_log_id); + + TaskDef job_task_def; + job_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + job_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *job_log_def = job_task_def.mutable_log_timestamp(); + if (job_log_def != nullptr) { + job_log_def->set_logid(jobid_log_id); + job_log_def->set_notify(false); + } + task_def_list.emplace_back(job_task_def); + TaskDef fp_task_def; + fp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + fp_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *fp_log_def = fp_task_def.mutable_log_timestamp(); + if (fp_log_def != nullptr) { + fp_log_def->set_logid(kProfilingFpStartLogid); + fp_log_def->set_notify(false); + } + task_def_list.emplace_back(fp_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, + vector &task_def_list) { + TaskDef ar_task_def; + ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + ar_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); + if (ar_log_def != nullptr) { + ar_log_def->set_logid(log_id); + ar_log_def->set_notify(false); + } + task_def_list.emplace_back(ar_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::GenerateBpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list) { + TaskDef bp_task_def; + bp_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + bp_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *bp_log_def = bp_task_def.mutable_log_timestamp(); + GE_CHECK_NOTNULL(bp_log_def); + bp_log_def->set_logid(kProfilingBpEndLogid); + bp_log_def->set_notify(false); + task_def_list.emplace_back(bp_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::GenerateEndProfilingTask(const OpDescPtr &op_desc, vector &task_def_list) { + TaskDef end_task_def; + end_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + end_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *end_log_def = end_task_def.mutable_log_timestamp(); + GE_CHECK_NOTNULL(end_log_def); + end_log_def->set_logid(kProfilingIterEndLogid); + end_log_def->set_notify(true); + task_def_list.emplace_back(end_task_def); + + return SUCCESS; +} + +Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node) { + GE_CHECK_NOTNULL(node); + const OpDescPtr &op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const auto &compute_graph = MakeShared(kProfilingGraph); + GE_CHECK_NOTNULL(compute_graph); + + NodePtr node_ptr = nullptr; + vector task_def_list; + // create fp node + bool is_insert_fp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); + if (is_insert_fp_profiling_task) { + (void)GenerateFpProfilingTask(op_desc, task_def_list); + auto fp_desc = MakeShared(kProfilingFpNode, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(fp_desc); + fp_desc->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(fp_desc); + GELOGD("Create fp profiling node success before."); + } + // creat all reduce start node + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + if (is_all_reduce && is_insert_bp_profiling_task) { + int64_t log_id = 0; + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); + GELOGD("All reduce node profiling task log id: %ld before", log_id); + (void) GenerateArProfilingTask(op_desc, log_id, task_def_list); + string op_name = string(kProfilingArNode) + std::to_string(log_id); + auto ar_desc_start = MakeShared(op_name, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(ar_desc_start); + ar_desc_start->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(ar_desc_start); + GELOGD("Create all reduce start profiling node success before."); + } + + if (node_ptr != nullptr) { + for (const auto &task_def : task_def_list) { + hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); + } else { + GELOGD("No need to create profiling node before."); + } + + return SUCCESS; +} + +Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node) { + GE_CHECK_NOTNULL(node); + const OpDescPtr &op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const auto &compute_graph = MakeShared(kProfilingGraph); + GE_CHECK_NOTNULL(compute_graph); + + NodePtr node_ptr = nullptr; + vector task_def_list; + // Create all reduce end node + bool is_insert_bp_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); + bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); + if (is_all_reduce && is_insert_bp_profiling_task) { + int64_t log_id = 0; + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); + GELOGD("All reduce node profiling task log id: %ld after", log_id); + (void) GenerateArProfilingTask(op_desc, log_id + 1, task_def_list); + string op_name = string(kProfilingArNode) + std::to_string(log_id + 1); + auto ar_desc_end = MakeShared(op_name, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(ar_desc_end); + ar_desc_end->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(ar_desc_end); + GELOGD("Create all reduce end profiling node success after."); + } + // create bp node + if (!is_all_reduce && is_insert_bp_profiling_task) { + (void) GenerateBpProfilingTask(op_desc, task_def_list); + auto bp_op_desc = MakeShared(kProfilingBpNode, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(bp_op_desc); + bp_op_desc->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(bp_op_desc); + GELOGD("Create bp profiling node success after."); + } + // create end node + bool is_insert_end_profiling_task = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); + if (is_insert_end_profiling_task) { + (void)GenerateEndProfilingTask(op_desc, task_def_list); + auto end_desc = MakeShared(kProfilingEndNode, PROFILINGTRAININGTRACE); + GE_CHECK_NOTNULL(end_desc); + end_desc->SetOpKernelLibName(kEngineNameRts); + node_ptr = compute_graph->AddNode(end_desc); + GELOGD("Create end profiling node success after."); + } + + if (node_ptr != nullptr) { + for (const auto &task_def : task_def_list) { + hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); + } else { + GELOGD("No need to create profiling node after."); + } + + return SUCCESS; +} + Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root_graph) { GELOGD("Start to load subgraph [%s]", graph.GetName().c_str()); // for known partitioned call, load all nodes @@ -1567,8 +1760,9 @@ Status HybridModelBuilder::LoadDynamicSubgraph(ComputeGraph &graph, bool is_root graph_item->output_node_ = node_item; GE_CHK_STATUS_RET_NOLOG(BuildOutputMapping(*graph_item, *node_item, is_root_graph)); } - + GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeBefore(*graph_item, node)); graph_item->node_items_.emplace_back(node_item); + GE_CHK_STATUS_RET_NOLOG(CreateProfilingNodeAfter(*graph_item, node)); // parse var outputs GE_CHK_STATUS_RET_NOLOG(ParseVarOutputs(*node_item)); GELOGD("NodeItem created: %s", node_item->DebugString().c_str()); diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index a11faae2..55a19b6c 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -79,6 +79,12 @@ class HybridModelBuilder { Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item); Status RecoverGraphUnknownFlag(); Status CheckAicpuOpList(); + Status CreateProfilingNodeBefore(GraphItem &graph_item, const NodePtr &node); + Status CreateProfilingNodeAfter(GraphItem &graph_item, const NodePtr &node); + Status GenerateFpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); + Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); + Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); + Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector &task_def_list); const char* GetGraphName() const { return hybrid_model_.model_name_.c_str(); diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.cc b/ge/hybrid/node_executor/rts/rts_node_executor.cc index 18b875fd..90b623e0 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.cc +++ b/ge/hybrid/node_executor/rts/rts_node_executor.cc @@ -18,6 +18,7 @@ #include "common/debug/log.h" #include "common/ge/ge_util.h" #include "graph/utils/tensor_utils.h" +#include "hybrid/model/hybrid_model.h" #include "runtime/rt.h" namespace ge { @@ -79,12 +80,44 @@ Status IdentityNNodeTask::ExecuteAsync(TaskContext &context, std::function done_callback) { + for (const auto &task_def : task_defs_) { + auto log_time_stamp_def = task_def.log_timestamp(); + uint64_t log_id = log_time_stamp_def.logid(); + bool notify = log_time_stamp_def.notify(); + uint32_t flat = log_time_stamp_def.flat(); + + GELOGD("ProfilingTraceTask execute async start. logid = %lu, notify = %d.", log_id, notify); + rtError_t rt_ret = rtProfilerTrace(log_id, notify, flat, context.GetStream()); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + GELOGD("[%s] ProfilingTraceTask[%lu] execute success.", context.GetNodeName(), log_id); + } + + return SUCCESS; +}; + Status RtsNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const { + GE_CHECK_NOTNULL(node); + auto op_type = node->GetType(); if (op_type == IDENTITY) { task = MakeShared(); } else if (op_type == IDENTITYN) { task = MakeShared(); + } else if (op_type == PROFILINGTRAININGTRACE) { + auto *task_defs = model.GetTaskDefs(node); + if (task_defs == nullptr || task_defs->empty()) { + GELOGE(INTERNAL_ERROR, "Profiling node has no task to execute."); + return INTERNAL_ERROR; + } + task = MakeShared(*task_defs); } else { GELOGE(INTERNAL_ERROR, "[%s] Unsupported RTS op type: %s", node->GetName().c_str(), op_type.c_str()); return INTERNAL_ERROR; diff --git a/ge/hybrid/node_executor/rts/rts_node_executor.h b/ge/hybrid/node_executor/rts/rts_node_executor.h index 2576b73b..df487d6c 100644 --- a/ge/hybrid/node_executor/rts/rts_node_executor.h +++ b/ge/hybrid/node_executor/rts/rts_node_executor.h @@ -18,6 +18,7 @@ #define GE_HYBRID_NODE_EXECUTOR_RTS_RTS_NODE_EXECUTOR_H_ #include "hybrid/node_executor/node_executor.h" +#include "proto/task.pb.h" namespace ge { namespace hybrid { @@ -35,6 +36,18 @@ class IdentityNNodeTask : public IdentityNodeTask { Status ExecuteAsync(TaskContext &context, std::function done_callback) override; }; +class ProfilingTraceNodeTask : public NodeTask { + public: + explicit ProfilingTraceNodeTask(const std::vector &task_defs) : task_defs_(task_defs) {} + ~ProfilingTraceNodeTask() override = default; + + Status UpdateArgs(TaskContext &context) override; + Status ExecuteAsync(TaskContext &context, std::function done_callback) override; + + private: + std::vector task_defs_; +}; + class RtsNodeExecutor : public NodeExecutor { public: Status LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr &task) const override; diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index 0e85a8e3..8ba4fb90 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -123,7 +123,7 @@ class TaskContext { Status status_ = SUCCESS; std::vector workspaces_; uint64_t iteration_ = 0; - uint32_t task_id_= 0; + uint32_t task_id_ = 0; uint32_t stream_id_ = 0; }; } // namespace hybrid diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index 4267aec4..685e03fd 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -263,6 +263,8 @@ struct ComputeGraphDescInfo { std::vector output_format; std::vector> output_shape; std::vector output_data_type; + uint32_t task_id; + uint32_t stream_id; }; struct OpDescInfo { diff --git a/inc/framework/common/types.h b/inc/framework/common/types.h index 99c2ea03..e3baa816 100644 --- a/inc/framework/common/types.h +++ b/inc/framework/common/types.h @@ -529,6 +529,9 @@ REGISTER_OPTYPE_DECLARE(HVDWAIT, "HorovodWait"); // aicpu op for online_infer dynamic_dims REGISTER_OPTYPE_DECLARE(GETDYNAMICDIMS, "GetDynamicDims"); +// profiling training trace node +REGISTER_OPTYPE_DECLARE(PROFILINGTRAININGTRACE, "ProfilingTrainingTrace"); + enum InputMode { INPUT = 0, CONST_INPUT }; // Definition of the processing status enum of the process module From 0ed8136d003d3455ed0fba51ae6334a685d19fdf Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Mon, 4 Jan 2021 22:49:19 +0800 Subject: [PATCH 37/54] modified: ge/graph/preprocess/graph_preprocess.cc --- ge/graph/preprocess/graph_preprocess.cc | 52 +++++++++++++------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index f6a9ea80..2ae39b3c 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -899,6 +899,23 @@ Status ProcessNetoutputNodeDynShape(NodePtr &node) { } return SUCCESS; } +long StringToLongNoThrow(const string &str) { + try { + return std::stol(str); + } catch (const std::invalid_argument) { + GELOGE(PARAM_INVALID, + "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " + "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + str.c_str()); + return PARAM_INVALID; + } catch (const std::out_of_range) { + GELOGE(PARAM_INVALID, + "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " + "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", + str.c_str()); + return PARAM_INVALID; + } +} /** * Parser shape_range from string to vector * shape_range from option normally is "[1~20,3,3~6,-1],[1~20,3,3~6,-1]" @@ -910,7 +927,7 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, GELOGE(PARAM_INVALID, "Shape range %s is invalid.", shape_range.c_str()); return PARAM_INVALID; } - // different shape_ragne of single input are split by ']' + // different shape_range of single input are split by ']' vector shape_range_set = ge::StringUtils::Split(shape_range, ']'); if (shape_range_set.empty()) { GELOGE(PARAM_INVALID, "Shape range %s is not valid. Correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", @@ -919,22 +936,16 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, } for (auto &shape_range_str : shape_range_set) { if (shape_range_str.empty()) { - GELOGE(PARAM_INVALID, - "Shape range of input is empty. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", - shape_range.c_str()); - return PARAM_INVALID; + continue; } // trim start bytes, after that, single input should be "1~20,3,3~6,-1" if (ge::StringUtils::StartWith(shape_range_str, "[")) { shape_range_str = shape_range_str.substr(1, shape_range_str.size()); - } else if (ge::StringUtils::StartWith(shape_range_str, ",")) { + } + if (ge::StringUtils::StartWith(shape_range_str, ",")) { shape_range_str = shape_range_str.substr(2, shape_range_str.size()); - } else { - GELOGE(PARAM_INVALID, - "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", - shape_range.c_str()); - return PARAM_INVALID; } + // parse shape_range of single input. eg. "1~20,3,3~6,-1" std::vector> range_of_single_input; vector dim_range_set = ge::StringUtils::Split(shape_range_str, ','); @@ -943,26 +954,17 @@ Status ParseDynamicInputShapeRange(const std::string &shape_range, pair range_pair; if (range_pair_set.size() == 1) { // fix dim - auto range_value = std::stol(range_pair_set.at(0).c_str()); + auto range_value = StringToLongNoThrow(range_pair_set.at(0).c_str()); if (range_value < 0) { - range_pair = std::make_pair(1, range_value); + range_pair = std::make_pair(0, range_value); } else { range_pair = std::make_pair(range_value, range_value); } } else if (range_pair_set.size() == 2) { // unknown dim, should get range. - try { - auto range_left = std::stol(range_pair_set.at(0).c_str()); - auto range_right = std::stol(range_pair_set.at(1).c_str()); - range_pair = std::make_pair(range_left, range_right); - } catch (const std::invalid_argument) { - GELOGE( - PARAM_INVALID, - "Parse shape range of input failed when transfer from string to int64. Given %s, while correct example: " - "\"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", - shape_range.c_str()); - return PARAM_INVALID; - } + auto range_left = StringToLongNoThrow(range_pair_set.at(0).c_str()); + auto range_right = StringToLongNoThrow(range_pair_set.at(1).c_str()); + range_pair = std::make_pair(range_left, range_right); } else { GELOGE(PARAM_INVALID, "Shape range of input is invalid. Given %s, while correct example: \"[1~20,3,3~6,-1],[1~20,3,3~6,-1]\"", From 50b8b31008c7619c7521eb1e318523c52dd0c917 Mon Sep 17 00:00:00 2001 From: zhaoxinxin Date: Mon, 4 Jan 2021 23:20:38 +0800 Subject: [PATCH 38/54] modified: ge/graph/preprocess/graph_preprocess.cc --- ge/graph/preprocess/graph_preprocess.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 2ae39b3c..9672c497 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1032,7 +1032,7 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, desc.SetShape(origin_shape); desc.SetShapeRange(current_shape_range_vec); - /*int64_t dynamic_shape_size = 1; + int64_t dynamic_shape_size = 1; for (const auto range_pair : range_vec.at(index)) { FMK_INT64_MULCHECK(dynamic_shape_size, range_pair.second); dynamic_shape_size *= range_pair.second; @@ -1046,7 +1046,7 @@ Status UpdateDynamicInputShapeRange(const ge::GeAttrValue::INT index, FMK_INT64_MULCHECK(dynamic_shape_size, data_type_size); dynamic_shape_size *= data_type_size; GELOGI("In dynamic_execute mode ,set input %s shape range size %ld", op->GetName().c_str(), dynamic_shape_size); - ge::TensorUtils::SetSize(desc, dynamic_shape_size);*/ + ge::TensorUtils::SetSize(desc, dynamic_shape_size); graphStatus graph_ret = op->UpdateInputDesc(0, desc); GE_CHK_STATUS_RET(graph_ret, "UpdateInputDesc fail, graph ret: %u", graph_ret); graph_ret = op->UpdateOutputDesc(0, desc); From 7f7b662750642446f060c2580259fca56fb1c883 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Tue, 5 Jan 2021 10:17:40 +0800 Subject: [PATCH 39/54] Delete useless kOutputNum --- ge/graph/load/new_model_manager/davinci_model.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index 78f6f8bf..37a39308 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -75,7 +75,6 @@ namespace ge { namespace { const uint32_t kDataIndex = 0; -const uint32_t kOutputNum = 1; const uint32_t kTrueBranchStreamNum = 1; const uint32_t kGetDynamicDimsCount = 1; const uint32_t kThreadNum = 16; From 5f68aaa0c7f7c702ef2eaa0e5e84bc711b0ce2c3 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 30 Dec 2020 10:11:34 +0800 Subject: [PATCH 40/54] rm compile macro --- ge/CMakeLists.txt | 8 +-- ge/executor/CMakeLists.txt | 2 +- ge/ge_local_engine/engine/host_cpu_engine.cc | 38 ----------- ge/graph/manager/graph_manager.cc | 6 -- ge/graph/manager/graph_mem_allocator.cc | 8 --- ge/graph/manager/graph_mem_allocator.h | 6 -- ge/graph/manager/host_mem_allocator.h | 2 +- ge/graph/manager/host_mem_manager.cc | 9 --- ge/graph/manager/host_mem_manager.h | 4 -- ge/graph/optimize/graph_optimize.cc | 6 +- ge/graph/passes/assign_remove_pass.cc | 67 +------------------ ge/graph/passes/assign_remove_pass.h | 3 +- ge/graph/passes/constant_fuse_same_pass.cc | 6 -- ge/graph/passes/constant_fuse_same_pass.h | 13 ---- ge/graph/preprocess/graph_preprocess.cc | 11 --- ge/hybrid/common/npu_memory_allocator.cc | 10 --- ge/hybrid/model/hybrid_model_builder.cc | 14 ---- .../host_cpu/host_cpu_node_executor.cc | 15 ----- 18 files changed, 13 insertions(+), 215 deletions(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 8d9edb65..e94258ac 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -124,7 +124,7 @@ set(TRAIN_SRC_LIST "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" - $<$>:graph/manager/host_mem_allocator.cc> + "graph/manager/host_mem_allocator.cc" "graph/manager/memory_api.cc" "graph/manager/model_manager/event_manager.cc" "graph/manager/trans_var_data_utils.cc" @@ -166,7 +166,7 @@ set(TRAIN_SRC_LIST "graph/passes/hccl_group_pass.cc" "graph/passes/enter_pass.cc" "graph/passes/assign_remove_pass.cc" - $<$>:graph/passes/inplace_support_check_pass.cc> + "graph/passes/inplace_support_check_pass.cc" "graph/passes/flow_ctrl_pass.cc" "graph/passes/global_step_insert_pass.cc" "host_kernels/transpose_kernel.cc" @@ -409,7 +409,7 @@ set(INFER_SRC_LIST "graph/manager/graph_var_manager.cc" "graph/manager/host_mem_manager.cc" "graph/manager/rdma_pool_allocator.cc" - $<$>:graph/manager/host_mem_allocator.cc> + "graph/manager/host_mem_allocator.cc" "graph/manager/graph_mem_allocator.cc" "graph/manager/graph_caching_allocator.cc" "model/ge_model.cc" @@ -531,7 +531,7 @@ set(INFER_SRC_LIST "graph/passes/for_pass.cc" "graph/passes/enter_pass.cc" "graph/passes/assign_remove_pass.cc" - $<$>:graph/passes/inplace_support_check_pass.cc> + "graph/passes/inplace_support_check_pass.cc" "graph/passes/addn_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc" "graph/passes/remove_same_const_pass.cc" diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index 4ca18864..ac4d4ebd 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -28,7 +28,7 @@ set(SRC_LIST "../graph/manager/trans_var_data_utils.cc" "../graph/manager/util/debug.cc" "../graph/manager/rdma_pool_allocator.cc" - $<$>:../graph/manager/host_mem_allocator.cc> + "../graph/manager/host_mem_allocator.cc" "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../model/ge_model.cc" "../model/ge_root_model.cc" diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc index 0f46b4cb..35ecfb2d 100755 --- a/ge/ge_local_engine/engine/host_cpu_engine.cc +++ b/ge/ge_local_engine/engine/host_cpu_engine.cc @@ -26,7 +26,6 @@ #include "common/math/math_util.h" namespace { -#ifndef ONLY_COMPILE_OPEN_SRC #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ case (DTYPE): { \ GeTensorPtr ge_tensor = nullptr; \ @@ -50,43 +49,6 @@ namespace { named_outputs.emplace(tensor_name, tensor); \ break; \ } -#else -#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ - case (DTYPE): { \ - GeTensorPtr ge_tensor = nullptr; \ - if (need_create_flag) { \ - GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \ - std::unique_ptr buf(new (std::nothrow) TYPE[data_num]()); \ - if (buf == nullptr) { \ - GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \ - static_cast(sizeof(TYPE) * data_num)); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor = MakeShared(out_desc); \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ - if (ge_tensor->SetData(reinterpret_cast(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ - GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ - return MEMALLOC_FAILED; \ - } \ - ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ - ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ - outputs.emplace_back(ge_tensor); \ - } else { \ - ge_tensor = outputs[i]; \ - GE_CHECK_NOTNULL(ge_tensor); \ - GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ - } \ - auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ - auto tensor_name = op_desc->GetOutputNameByIndex(i); \ - GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ - op_desc->GetName().c_str(), i); \ - GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \ - op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \ - named_outputs.emplace(tensor_name, tensor); \ - break; \ - } -#endif } namespace ge { diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index c4f91036..c0f084d8 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -38,10 +38,8 @@ #include "graph/partition/stage_partition.h" #include "graph/passes/addn_pass.h" #include "graph/passes/bitcast_pass.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/passes/assign_remove_pass.h" #include "graph/passes/inplace_support_check_pass.h" -#endif #include "graph/passes/atomic_addr_clean_pass.h" #include "graph/passes/attach_stream_label_pass.h" #include "graph/passes/cast_remove_pass.h" @@ -2269,20 +2267,16 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { ReshapeRemovePass reshape_remove_pass; CondRemovePass condition_remove_pass; BitcastPass bitcast_pass; -#ifndef ONLY_COMPILE_OPEN_SRC AssignRemovePass assign_remove_pass; InplaceSupportCheckPass inplace_support_check_pass; -#endif names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); names_to_passes.emplace_back("BitcastPass", &bitcast_pass); -#ifndef ONLY_COMPILE_OPEN_SRC if (GetContext().GetHostExecFlag()) { names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass); } -#endif GE_TIMESTAMP_START(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index 4e31d835..f3037299 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -19,9 +19,7 @@ #include #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/rdma_pool_allocator.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/host_mem_allocator.h" -#endif namespace ge { void MemoryAllocator::Initialize(uint32_t device_id) { GELOGI("MemoryAllocator::Initialize"); @@ -192,12 +190,10 @@ Status MemManager::Initialize(const std::vector &memory_type) { GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); return ge::INTERNAL_ERROR; } -#ifndef ONLY_COMPILE_OPEN_SRC if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); return ge::INTERNAL_ERROR; } -#endif return SUCCESS; } @@ -219,9 +215,7 @@ void MemManager::Finalize() noexcept { // caching and rdma allocator use memory allocator, so finalize them first FinalizeAllocatorMap(caching_allocator_map_); FinalizeAllocatorMap(rdma_allocator_map_); -#ifndef ONLY_COMPILE_OPEN_SRC FinalizeAllocatorMap(host_allocator_map_); -#endif FinalizeAllocatorMap(memory_allocator_map_); } @@ -250,9 +244,7 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { return Instance().GetAllocator(memory_type, rdma_allocator_map_); } -#ifndef ONLY_COMPILE_OPEN_SRC HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { return Instance().GetAllocator(memory_type, host_allocator_map_); } -#endif } // namespace ge diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index 6cdbd9b4..bd75dbb9 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -139,9 +139,7 @@ class MemoryAllocator { using MemoryAllocatorPtr = std::shared_ptr; class CachingAllocator; class RdmaPoolAllocator; -#ifndef ONLY_COMPILE_OPEN_SRC class HostMemAllocator; -#endif class MemManager { public: MemManager(); @@ -150,9 +148,7 @@ class MemManager { static MemoryAllocator *Instance(rtMemType_t memory_type); CachingAllocator &CachingInstance(rtMemType_t memory_type); RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); -#ifndef ONLY_COMPILE_OPEN_SRC HostMemAllocator &HostMemInstance(rtMemType_t memory_type); -#endif MemManager(const MemManager &) = delete; MemManager &operator=(const MemManager &) = delete; /// @@ -240,9 +236,7 @@ class MemManager { std::map memory_allocator_map_; std::map caching_allocator_map_; std::map rdma_allocator_map_; -#ifndef ONLY_COMPILE_OPEN_SRC std::map host_allocator_map_; -#endif std::recursive_mutex allocator_mutex_; }; } // namespace ge diff --git a/ge/graph/manager/host_mem_allocator.h b/ge/graph/manager/host_mem_allocator.h index b9dbdc4c..d10b2475 100644 --- a/ge/graph/manager/host_mem_allocator.h +++ b/ge/graph/manager/host_mem_allocator.h @@ -27,7 +27,7 @@ namespace ge { class HostMemAllocator { public: - explicit HostMemAllocator(rtMemType_t) {} + explicit HostMemAllocator(rtMemType_t) {} ~HostMemAllocator() = default; HostMemAllocator(const HostMemAllocator &) = delete; diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index c9a33f5c..60a7586d 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -43,29 +43,20 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { return GE_GRAPH_MEMORY_ALLOC_FAILED; } mem_info.fd = output_para.fd; -#ifndef ONLY_COMPILE_OPEN_SRC mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc([&output_para](std::unique_ptr &ptr) { ptr.reset(reinterpret_cast(output_para.ptr)); }, [](uint8_t *ptr) { ptr = nullptr; }); -#else - mem_info.host_address = reinterpret_cast(output_para.ptr); -#endif mem_info.device_address = reinterpret_cast(output_para.devPtr); return SUCCESS; } Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { GELOGD("SharedMemAllocator::DeAllocate"); -#ifndef ONLY_COMPILE_OPEN_SRC rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; -#else - rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, - mem_info.host_address, mem_info.device_address}; -#endif rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); diff --git a/ge/graph/manager/host_mem_manager.h b/ge/graph/manager/host_mem_manager.h index f204c9e4..be3237c3 100644 --- a/ge/graph/manager/host_mem_manager.h +++ b/ge/graph/manager/host_mem_manager.h @@ -42,11 +42,7 @@ struct SharedMemInfo { uint64_t mem_size = 0; int fd = 0; uint8_t *device_address = nullptr; -#ifndef ONLY_COMPILE_OPEN_SRC std::shared_ptr host_aligned_ptr = nullptr; -#else - uint8_t *host_address = nullptr; -#endif SharedMemInfo() = default; SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} }; diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index cd80a956..8cca5b5d 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -127,6 +127,10 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std } Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { + if (GetContext().GetHostExecFlag()) { + // graph exec on host, no need OptimizeOriginalGraph + return SUCCESS; + } if (compute_graph == nullptr) { GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeOriginalGraph]: compute_graph is nullptr."); return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL; @@ -162,7 +166,7 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) { Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_graph) { GELOGD("OptimizeOriginalGraphJudgeInsert in"); if (GetContext().GetHostExecFlag()) { - // graph exec on host, no need OptimizeOriginalGraph + // graph exec on host, no need OptimizeOriginalGraphJudgeInsert return SUCCESS; } diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc index 5029b9c3..51e6e006 100644 --- a/ge/graph/passes/assign_remove_pass.cc +++ b/ge/graph/passes/assign_remove_pass.cc @@ -29,7 +29,6 @@ static const std::set kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, } namespace ge { -#ifndef ONLY_COMPILE_OPEN_SRC Status AssignRemovePass::Run(NodePtr &node) { GELOGD("AssignRemovePass running"); @@ -145,71 +144,7 @@ Status AssignRemovePass::TransformAttr(NodePtr &node) { } return SUCCESS; } -#else -Status AssignRemovePass::Run(NodePtr &node) { - GELOGD("AssignRemovePass running"); - if (node->GetType() != ASSIGN) { - GELOGD("No need run AssignRemovePass on [%s, %s].", node->GetName().c_str(), node->GetType().c_str()); - return SUCCESS; - } - - const auto &ref_in_anchor = node->GetInDataAnchor(kAssignRefInputIndex); - const auto &value_in_anchor = node->GetInDataAnchor(kAssignValueInputIndex); - if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) { - GELOGE(FAILED, "In data anchor is null, node:%s", node->GetName().c_str()); - return FAILED; - } - const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor(); - const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor(); - if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) { - GELOGE(FAILED, "Peer data anchor is null, node:%s", node->GetName().c_str()); - return FAILED; - } - - if (IsCondMatch(node, ref_peer_anchor, value_peer_anchor)) { - /// - /// variable not-const not-const - /// \ / | - /// \ / | - /// Assign ----> variable - /// | | - /// | | - /// node node - /// - GELOGI("Optimization for assign_node %s start", node->GetName().c_str()); - if (IsolateAndDeleteNode(node, {kAssignRefInputIndex}) != SUCCESS) { - GELOGE(FAILED, "Isolate and delete assign_node %s failed.", node->GetName().c_str()); - return FAILED; - } - AddNodeDeleted(node); - - const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc(); - const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc(); - if ((ref_input == nullptr) || (value_input == nullptr)) { - GELOGE(FAILED, "value input is null"); - return FAILED; - } - if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME, - ref_input->GetName())) { - GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); - return FAILED; - } - // variable has and only has one input - if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str()); - return FAILED; - } - if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) { - GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str()); - return FAILED; - } - } - - GELOGD("AssignRemovePass success"); - return SUCCESS; -} -#endif /// /// @brief Check if need optimize for assign_node /// @param [in] assign_node @@ -218,7 +153,7 @@ Status AssignRemovePass::Run(NodePtr &node) { /// @return Status /// bool AssignRemovePass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_peer_anchor, - const OutDataAnchorPtr &value_peer_anchor) { + const OutDataAnchorPtr &value_peer_anchor) { GELOGD("Check if assign_node %s match optimization condition, ref_input: %s, value_input: %s", node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), value_peer_anchor->GetOwnerNode()->GetName().c_str()); diff --git a/ge/graph/passes/assign_remove_pass.h b/ge/graph/passes/assign_remove_pass.h index f8ef2e13..6588df7b 100644 --- a/ge/graph/passes/assign_remove_pass.h +++ b/ge/graph/passes/assign_remove_pass.h @@ -25,7 +25,6 @@ class AssignRemovePass : public BaseNodePass { Status Run(NodePtr &node) override; private: -#ifndef ONLY_COMPILE_OPEN_SRC /// /// @brief Optimize for assign_node /// @param [in] assign_node @@ -39,7 +38,7 @@ class AssignRemovePass : public BaseNodePass { /// @return Status /// Status TransformAttr(NodePtr &node); -#endif + /// /// @brief Check if need optimize for assign_node /// @param [in] assign_node diff --git a/ge/graph/passes/constant_fuse_same_pass.cc b/ge/graph/passes/constant_fuse_same_pass.cc index 8ee89648..eb8b3470 100644 --- a/ge/graph/passes/constant_fuse_same_pass.cc +++ b/ge/graph/passes/constant_fuse_same_pass.cc @@ -115,21 +115,15 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, TypeUtils::DataTypeToSerialString(data_type).c_str()); continue; } -#ifndef ONLY_COMPILE_OPEN_SRC if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) { GELOGW("aligned_ptr is null while size is not 0"); continue; } -#endif ++insert_const_nums; SameConstKey map_key; map_key.data_size = type_size; -#ifndef ONLY_COMPILE_OPEN_SRC map_key.aligned_ptr = weight->MutableData().GetAlignedPtr(); -#else - map_key.data = weight->GetData().GetData(); -#endif map_key.data_type = data_type; map_key.format = output_tensor->GetFormat(); map_key.shape = output_tensor->GetShape().GetDims(); diff --git a/ge/graph/passes/constant_fuse_same_pass.h b/ge/graph/passes/constant_fuse_same_pass.h index ae39c707..3ff2d6b7 100755 --- a/ge/graph/passes/constant_fuse_same_pass.h +++ b/ge/graph/passes/constant_fuse_same_pass.h @@ -21,20 +21,14 @@ #include #include #include -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/aligned_ptr.h" -#endif #include "graph/types.h" #include "inc/graph_pass.h" namespace ge { struct SameConstKey { int data_size; -#ifndef ONLY_COMPILE_OPEN_SRC std::shared_ptr aligned_ptr; -#else - const uint8_t *data; -#endif DataType data_type; Format format; std::vector shape; @@ -44,19 +38,12 @@ struct SameConstKey { if (data_size != key.data_size) { return data_size < key.data_size; } -#ifndef ONLY_COMPILE_OPEN_SRC if (data_size != 0) { int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size); if (ret != 0) { return ret < 0; } } -#else - int ret = memcmp(data, key.data, data_size); - if (ret != 0) { - return ret < 0; - } -#endif if (data_type != key.data_type) { return data_type < key.data_type; } diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index f94633a1..0bfec241 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -38,9 +38,6 @@ #include "graph/passes/aicpu_constant_folding_pass.h" #include "graph/passes/assert_pass.h" #include "ge/ge_api_types.h" -#ifdef ONLY_COMPILE_OPEN_SRC -#include "graph/passes/assign_remove_pass.h" -#endif #include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/cond_pass.h" #include "graph/passes/cond_remove_pass.h" @@ -1865,9 +1862,6 @@ Status GraphPrepare::PrepareOptimize() { VarIsInitializedOpPass var_is_initialized_pass; ParallelConcatStartOpPass parallel_concat_start_op_pass; IdentityPass identity_pass(false); -#ifdef ONLY_COMPILE_OPEN_SRC - AssignRemovePass assign_remove_pass; -#endif SnapshotPass snapshot_pass; if (!options_.train_graph_flag) { names_to_passes.emplace_back("DropOutPass", &dropout_pass); @@ -1882,11 +1876,6 @@ Status GraphPrepare::PrepareOptimize() { names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); names_to_passes.emplace_back("IdentityPass", &identity_pass); -#ifdef ONLY_COMPILE_OPEN_SRC - if (GetContext().GetHostExecFlag()) { - names_to_passes.emplace_back("AssignRemovePass", &assign_remove_pass); - } -#endif GE_TIMESTAMP_START(names_to_passes); ret = ge_passes.Run(names_to_passes); GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index c2602f37..ccd6a624 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -20,9 +20,7 @@ #include "graph/manager/graph_caching_allocator.h" #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/rdma_pool_allocator.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/host_mem_allocator.h" -#endif namespace ge { namespace hybrid { @@ -67,11 +65,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { if (mem_type == RDMA_HBM) { buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); } else if (mem_type == HOST_DDR) { -#ifndef ONLY_COMPILE_OPEN_SRC buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size); -#else - buffer = malloc(allocate_size); -#endif } else { if (allocate_size > kMaxHbmMemorySize) { GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); @@ -108,11 +102,7 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) { if (mem_type == RDMA_HBM) { MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast(data), device_id_); } else if (mem_type == HOST_DDR) { -#ifndef ONLY_COMPILE_OPEN_SRC MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data); -#else - free(data); -#endif } else { MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast(data), device_id_); } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 32fc495a..d1f61985 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -25,10 +25,8 @@ #include "graph/manager/graph_var_manager.h" #include "graph/manager/host_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_allocator.h" -#endif #include "graph/utils/graph_utils.h" #include "hybrid/common/npu_memory_allocator.h" #include "hybrid/node_executor/node_executor.h" @@ -865,7 +863,6 @@ Status HybridModelBuilder::InitConstantOps() { std::unique_ptr var_tensor; if (GetContext().GetHostExecFlag()) { -#ifndef ONLY_COMPILE_OPEN_SRC GE_CHECK_NOTNULL(ge_tensor); // Address for eigen kernel should be aligned with 16 bytes // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned @@ -878,11 +875,6 @@ Status HybridModelBuilder::InitConstantOps() { } var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), aligned_tensor.GetData().size())); -#else - auto buffer = ge_tensor->MutableData(); - GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); - var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); -#endif } else { GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); @@ -937,7 +929,6 @@ Status HybridModelBuilder::InitVariableTensors() { GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); return GE_GRAPH_MALLOC_FAILED; } -#ifndef ONLY_COMPILE_OPEN_SRC if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, tensor_size) == nullptr) { GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); @@ -947,11 +938,6 @@ Status HybridModelBuilder::InitVariableTensors() { std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), tensor_size)); -#else - GELOGD("Host variable [%s] malloc success.", it.first.c_str()); - - std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); -#endif GE_CHECK_NOTNULL(tensor); hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); } diff --git a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc index 32522fe8..0cc635e4 100755 --- a/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc +++ b/ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc @@ -18,10 +18,8 @@ #include "hybrid/node_executor/host_cpu/kernel_factory.h" #include "graph/passes/folding_pass.h" #include "hybrid/model/hybrid_model.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "graph/manager/graph_mem_allocator.h" #include "graph/manager/host_mem_allocator.h" -#endif #include "ge_local_engine/engine/host_cpu_engine.h" namespace ge { @@ -54,18 +52,11 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { auto input_desc_ptr = context.GetInputDesc(i); GE_CHECK_NOTNULL(input_desc_ptr); const auto &input_desc = *input_desc_ptr; -#ifndef ONLY_COMPILE_OPEN_SRC auto tensor = context.GetInput(i); GE_CHECK_NOTNULL(tensor); auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); GE_CHECK_NOTNULL(item.second); auto in_tensor = MakeShared(input_desc, item.second, item.first); -#else - GE_CHECK_NOTNULL(context.GetInput(i)); - auto in_tensor = MakeShared(input_desc, - reinterpret_cast(context.GetInput(i)->GetData()), - context.GetInput(i)->GetSize()); -#endif GE_CHECK_NOTNULL(in_tensor); in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); @@ -84,15 +75,9 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { } auto tensor = context.GetOutput(i); GE_CHECK_NOTNULL(tensor); -#ifndef ONLY_COMPILE_OPEN_SRC auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); GE_CHECK_NOTNULL(item.second); auto out_tensor = MakeShared(output_desc, item.second, item.first); -#else - auto out_tensor = MakeShared(output_desc, - reinterpret_cast(tensor->GetData()), - tensor->GetSize()); -#endif GE_CHECK_NOTNULL(out_tensor); out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); From 6fdd3de6704a35435fd88ec2008f5aadaff7b56b Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 30 Dec 2020 16:23:13 +0800 Subject: [PATCH 41/54] rm compile macro --- ge/CMakeLists.txt | 2 -- ge/common/CMakeLists.txt | 3 -- ge/executor/CMakeLists.txt | 2 -- ge/ge_local_engine/CMakeLists.txt | 5 ---- ge/ge_runtime/CMakeLists.txt | 1 - ge/graph/build/memory/CMakeLists.txt | 44 ++++++++++++++++++++++++++++ ge/host_cpu_engine/CMakeLists.txt | 5 ---- ge/offline/CMakeLists.txt | 3 -- ge/plugin/engine/CMakeLists.txt | 1 - 9 files changed, 44 insertions(+), 22 deletions(-) create mode 100644 ge/graph/build/memory/CMakeLists.txt diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index e94258ac..5181bb61 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -718,7 +718,6 @@ target_compile_definitions(ge_compiler PRIVATE FMK_HOST_INFER COMPILE_OMG_PACKAGE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_compiler PRIVATE @@ -806,7 +805,6 @@ endif() target_compile_definitions(opensrc_ascendcl PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(opensrc_ascendcl PRIVATE diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index d2b8c8e7..bb08570a 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -73,7 +73,6 @@ target_compile_definitions(ge_common PRIVATE FMK_SUPPORT_DUMP OS_CENTOS google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_common PRIVATE @@ -133,7 +132,6 @@ target_compile_definitions(ge_common_static PRIVATE $,OS_TYPE=WIN,OS_TYPE=0> $<$:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_common_static PRIVATE @@ -182,7 +180,6 @@ target_compile_definitions(ge_common PRIVATE FMK_SUPPORT_DUMP OS_CENTOS google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_common PRIVATE diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index ac4d4ebd..755bdf97 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -175,7 +175,6 @@ target_compile_definitions(ge_executor PRIVATE $,OS_TYPE=WIN,OS_TYPE=0> $<$:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_executor PRIVATE @@ -218,7 +217,6 @@ target_compile_definitions(ge_executor_shared PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 DAVINCI_SUPPORT_PROFILING google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_executor_shared PRIVATE diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt index f963730b..7189e8ff 100755 --- a/ge/ge_local_engine/CMakeLists.txt +++ b/ge/ge_local_engine/CMakeLists.txt @@ -31,7 +31,6 @@ target_compile_options(ge_local_engine PRIVATE target_compile_definitions(ge_local_engine PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_local_engine PRIVATE @@ -73,7 +72,6 @@ target_compile_options(atc_ge_local_engine PRIVATE target_compile_definitions(atc_ge_local_engine PRIVATE COMPILE_OMG_PACKAGE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_ge_local_engine PRIVATE @@ -119,7 +117,6 @@ target_compile_options(ge_local_opskernel_builder PRIVATE target_compile_definitions(ge_local_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_local_opskernel_builder PRIVATE @@ -161,7 +158,6 @@ target_compile_options(atc_ge_local_opskernel_builder PRIVATE target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_ge_local_opskernel_builder PRIVATE @@ -209,7 +205,6 @@ target_compile_options(ge_local_opskernel_builder_static PRIVATE target_compile_definitions(ge_local_opskernel_builder_static PRIVATE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_local_opskernel_builder_static PRIVATE diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt index ca770b15..ce1b89ea 100644 --- a/ge/ge_runtime/CMakeLists.txt +++ b/ge/ge_runtime/CMakeLists.txt @@ -27,7 +27,6 @@ target_compile_options(ge_runtime PRIVATE target_compile_definitions(ge_runtime PRIVATE PROTOBUF_INLINE_NOT_IN_HEADERS=0 - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(ge_runtime PRIVATE diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt new file mode 100644 index 00000000..126e0187 --- /dev/null +++ b/ge/graph/build/memory/CMakeLists.txt @@ -0,0 +1,44 @@ +set(SRC_LIST + "memory_assigner.cc" + "graph_mem_assigner.cc" + "binary_block_mem_assigner.cc" + "block_mem_assigner.cc" + "hybrid_mem_assigner.cc" + "max_block_mem_assigner.cc" + "var_mem_assign_util.cc" +) + +############ libge_memory.a ############ +add_library(ge_memory STATIC ${SRC_LIST}) + +target_compile_options(ge_memory PRIVATE + -Werror + -O2 + -fno-common +) + +target_compile_definitions(ge_memory PRIVATE + google=ascend_private + LOG_CPP +) + +target_link_libraries(ge_memory PRIVATE + $ + ascend_protobuf + c_sec +) + +target_include_directories(ge_memory PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${GE_CODE_DIR}/inc/framework + #### yellow zone #### + ${GE_CODE_DIR}/../inc + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc +) diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt index f20f810e..cbd0bd8b 100644 --- a/ge/host_cpu_engine/CMakeLists.txt +++ b/ge/host_cpu_engine/CMakeLists.txt @@ -25,7 +25,6 @@ target_compile_options(host_cpu_engine PRIVATE target_compile_definitions(host_cpu_engine PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(host_cpu_engine PRIVATE @@ -66,7 +65,6 @@ target_compile_options(atc_host_cpu_engine PRIVATE target_compile_definitions(atc_host_cpu_engine PRIVATE COMPILE_OMG_PACKAGE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_host_cpu_engine PRIVATE @@ -111,7 +109,6 @@ target_compile_options(host_cpu_opskernel_builder PRIVATE target_compile_definitions(host_cpu_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(host_cpu_opskernel_builder PRIVATE @@ -152,7 +149,6 @@ target_compile_options(atc_host_cpu_opskernel_builder PRIVATE target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_host_cpu_opskernel_builder PRIVATE @@ -199,7 +195,6 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(host_cpu_opskernel_builder_static PRIVATE diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt index cb6a3a50..d195e06f 100644 --- a/ge/offline/CMakeLists.txt +++ b/ge/offline/CMakeLists.txt @@ -30,7 +30,6 @@ target_compile_definitions(atc PRIVATE COMPILE_OMG_PACKAGE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc PRIVATE @@ -93,7 +92,6 @@ target_compile_definitions(atc_atc.bin PRIVATE COMPILE_OMG_PACKAGE google=ascend_private LOG_CPP - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(atc_atc.bin PRIVATE @@ -154,7 +152,6 @@ target_compile_options(fwk_atc.bin PRIVATE -O2 -Wno-deprecated-declarations -fno-common - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_definitions(fwk_atc.bin PRIVATE diff --git a/ge/plugin/engine/CMakeLists.txt b/ge/plugin/engine/CMakeLists.txt index 65d5a8a1..f6353231 100644 --- a/ge/plugin/engine/CMakeLists.txt +++ b/ge/plugin/engine/CMakeLists.txt @@ -14,7 +14,6 @@ target_compile_options(engine PRIVATE target_compile_definitions(engine PRIVATE REUSE_MEMORY=1 PROTOBUF_INLINE_NOT_IN_HEADERS=0 - $<$:ONLY_COMPILE_OPEN_SRC> ) target_include_directories(engine PRIVATE From f95efe48a3e16db8c6973327600eb1072eeece9e Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 30 Dec 2020 16:25:13 +0800 Subject: [PATCH 42/54] rm compile macro --- tests/ut/common/graph/CMakeLists.txt | 1 - tests/ut/ge/CMakeLists.txt | 9 --------- 2 files changed, 10 deletions(-) diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt index e2490150..99b21182 100644 --- a/tests/ut/common/graph/CMakeLists.txt +++ b/tests/ut/common/graph/CMakeLists.txt @@ -110,7 +110,6 @@ target_compile_options(ut_libgraph PRIVATE target_compile_definitions(ut_libgraph PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ut_libgraph diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index db725dfb..9af3719b 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -898,10 +898,6 @@ target_compile_options(ut_libge_others_utest PRIVATE -g --coverage -fprofile-arcs -ftest-coverage ) -target_compile_definitions(ut_libge_others_utest PRIVATE - $<$:ONLY_COMPILE_OPEN_SRC> -) - target_link_libraries(ut_libge_others_utest $ ge_load_common ge_execute_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov @@ -919,10 +915,6 @@ target_compile_options(ut_libge_kernel_utest PRIVATE -g --coverage -fprofile-arcs -ftest-coverage ) -target_compile_definitions(ut_libge_kernel_utest PRIVATE - $<$:ONLY_COMPILE_OPEN_SRC> -) - target_link_libraries(ut_libge_kernel_utest $ ge_load_common ge_ut_common gtest gtest_main ascend_protobuf ${COMMON_SHARED_LIBRARIES} json -lrt -ldl -lgcov @@ -943,7 +935,6 @@ target_compile_options(ut_libge_distinct_load_utest PRIVATE target_compile_definitions(ut_libge_distinct_load_utest PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ut_libge_distinct_load_utest From 94bdb8e280fc6e44a1ad0a5877d86ac6918bda5f Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 30 Dec 2020 16:34:52 +0800 Subject: [PATCH 43/54] rm compile macro --- ge/graph/passes/assign_remove_pass.cc | 3 +-- ge/graph/passes/inplace_support_check_pass.cc | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc index 51e6e006..72e108c3 100644 --- a/ge/graph/passes/assign_remove_pass.cc +++ b/ge/graph/passes/assign_remove_pass.cc @@ -19,6 +19,7 @@ #include "graph/utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" +namespace ge { namespace { constexpr uint32_t kValidInputNodeOutputNum = 1; constexpr int32_t kAssignRefInputIndex = 0; @@ -27,8 +28,6 @@ static const std::set kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::CONSTANT, ge::CONSTANTOP, ge::VARIABLE, ge::VARIABLEV2 }; } - -namespace ge { Status AssignRemovePass::Run(NodePtr &node) { GELOGD("AssignRemovePass running"); diff --git a/ge/graph/passes/inplace_support_check_pass.cc b/ge/graph/passes/inplace_support_check_pass.cc index 73cc7f3b..44ad8361 100644 --- a/ge/graph/passes/inplace_support_check_pass.cc +++ b/ge/graph/passes/inplace_support_check_pass.cc @@ -19,6 +19,7 @@ #include "graph/utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" +namespace ge { namespace { constexpr uint32_t kInplaceSupportOutputIndex = 0; constexpr uint32_t kInplaceSupportOutputNum = 1; @@ -26,8 +27,6 @@ static const std::set kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge: ge::CONSTANT, ge::CONSTANTOP, ge::VARIABLE, ge::VARIABLEV2 }; } - -namespace ge { Status InplaceSupportCheckPass::Run(NodePtr &node) { GELOGD("InplaceSupportCheckPass running"); if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) { From be2a31e2289faccdb7712ff931ea60db3c037f3e Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 5 Jan 2021 19:28:51 +0800 Subject: [PATCH 44/54] rm macro --- ge/CMakeLists.txt | 1 - ge/client/ge_api.cc | 8 -------- inc/framework/common/ge_types.h | 3 --- inc/framework/omg/parser/model_parser.h | 2 -- tests/ut/ge/CMakeLists.txt | 5 ----- 5 files changed, 19 deletions(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 5181bb61..317ff00a 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -654,7 +654,6 @@ target_compile_definitions(ge_runner PRIVATE FMK_SUPPORT_DUMP DAVINCI_CLOUD google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_compile_options(ge_runner PRIVATE diff --git a/ge/client/ge_api.cc b/ge/client/ge_api.cc index 75cc92d4..d65d7667 100644 --- a/ge/client/ge_api.cc +++ b/ge/client/ge_api.cc @@ -32,9 +32,7 @@ #include "graph/common/ge_call_wrapper.h" #include "register/op_registry.h" #include "common/ge/tbe_plugin_manager.h" -#ifndef ONLY_COMPILE_OPEN_SRC #include "toolchain/plog.h" -#endif using domi::OpRegistry; using std::map; @@ -132,11 +130,9 @@ Status GEInitializeImpl(const std::map &options) { // Initialize GE, prepare for execution, call GELib::Initialize Status GEInitialize(const std::map &options) { -#ifndef ONLY_COMPILE_OPEN_SRC if (DlogReportInitialize() != SUCCESS) { GELOGW("Dlog report device log initialize failed."); } -#endif return GEInitializeImpl(options); } @@ -151,11 +147,9 @@ Status GEInitialize(const std::map &options) { std::string val = option.second.GetString(); str_options[key] = val; } -#ifndef ONLY_COMPILE_OPEN_SRC if (DlogReportInitialize() != SUCCESS) { GELOGW("Dlog report device log initialize failed."); } -#endif return GEInitializeImpl(str_options); } @@ -200,11 +194,9 @@ Status GEFinalize() { // to avoid memory fragment, use malloc_trim to back free stack to system malloc_trim(0); -#ifndef ONLY_COMPILE_OPEN_SRC if (DlogReportFinalize() != SUCCESS) { GELOGW("Dlog report device log finalize failed."); } -#endif GELOGT(TRACE_STOP, "GEFinalize finished"); return ret; diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h index d845654e..8327b72c 100644 --- a/inc/framework/common/ge_types.h +++ b/inc/framework/common/ge_types.h @@ -37,10 +37,7 @@ enum FrameworkType { MINDSPORE = 1, TENSORFLOW = 3, ANDROID_NN, -#ifndef ONLY_COMPILE_OPEN_SRC ONNX, -#endif - FRAMEWORK_RESERVED, }; enum OpEngineType { diff --git a/inc/framework/omg/parser/model_parser.h b/inc/framework/omg/parser/model_parser.h index 57cff9a7..9eda685d 100644 --- a/inc/framework/omg/parser/model_parser.h +++ b/inc/framework/omg/parser/model_parser.h @@ -65,7 +65,6 @@ class ModelParser { */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; -#ifndef ONLY_COMPILE_OPEN_SRC /** * @ingroup domi_omg * @brief Parse relevant data from memory and save it to graph @@ -77,7 +76,6 @@ class ModelParser { * @author */ virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; -#endif /** * @ingroup domi_omg diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 9af3719b..3a06507c 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -723,7 +723,6 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) target_compile_definitions(ge_ut_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_ut_common PRIVATE @@ -738,7 +737,6 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F target_compile_definitions(ge_ut_common_format PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_ut_common_format PRIVATE @@ -795,7 +793,6 @@ add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_SRCS} $ target_compile_definitions(ge_load_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_load_common PRIVATE @@ -810,7 +807,6 @@ add_library(ge_execute_common STATIC ${GRAPH_EXECUTE_COMMON_SRC_FILES} ${PROTO_S target_compile_definitions(ge_execute_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_execute_common PRIVATE @@ -825,7 +821,6 @@ add_library(ge_build_common STATIC ${GRAPH_BUILD_COMMON_SRC_FILES} ${PROTO_SRCS} target_compile_definitions(ge_build_common PRIVATE google=ascend_private - $<$:ONLY_COMPILE_OPEN_SRC> ) target_link_libraries(ge_build_common PRIVATE From f262eb8f2ce702b2ab33d0850fbc5f3df2c73009 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 5 Jan 2021 19:41:10 +0800 Subject: [PATCH 45/54] rm macro --- ge/graph/build/memory/CMakeLists.txt | 44 ---------------------------- 1 file changed, 44 deletions(-) delete mode 100644 ge/graph/build/memory/CMakeLists.txt diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt deleted file mode 100644 index 126e0187..00000000 --- a/ge/graph/build/memory/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -set(SRC_LIST - "memory_assigner.cc" - "graph_mem_assigner.cc" - "binary_block_mem_assigner.cc" - "block_mem_assigner.cc" - "hybrid_mem_assigner.cc" - "max_block_mem_assigner.cc" - "var_mem_assign_util.cc" -) - -############ libge_memory.a ############ -add_library(ge_memory STATIC ${SRC_LIST}) - -target_compile_options(ge_memory PRIVATE - -Werror - -O2 - -fno-common -) - -target_compile_definitions(ge_memory PRIVATE - google=ascend_private - LOG_CPP -) - -target_link_libraries(ge_memory PRIVATE - $ - ascend_protobuf - c_sec -) - -target_include_directories(ge_memory PRIVATE - ${CMAKE_CURRENT_LIST_DIR} - ${GE_CODE_DIR}/ge - ${GE_CODE_DIR}/inc - ${GE_CODE_DIR}/inc/external - ${METADEF_DIR}/inc - ${METADEF_DIR}/inc/external - ${METADEF_DIR}/inc/external/graph - ${GE_CODE_DIR}/inc/framework - #### yellow zone #### - ${GE_CODE_DIR}/../inc - #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc -) From 202bb6bdd35d6ea6e31410e3ff788f1d7ca10802 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 5 Jan 2021 21:03:14 +0800 Subject: [PATCH 46/54] rm macro --- CMakeLists.txt | 8 ++++---- cmake/intf_pub_linux.cmake | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9194f119..88ce15ff 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,7 @@ if (ENABLE_OPEN_SRC) endif() set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) set(STATIC_ACL_LIB ${GE_LIB_PATH}) - find_module(slog libslog.so ${GE_LIB_PATH}) + find_module(slog libalog.so ${GE_LIB_PATH}) find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) find_module(msprofiler_ext libmsprofiler.a ${GE_LIB_PATH}) find_module(hccl libhccl.so ${GE_LIB_PATH}) @@ -88,7 +88,7 @@ if (ENABLE_OPEN_SRC) elseif(ENABLE_GE_COV OR ENABLE_GE_UT) add_subdirectory(tests) else() - find_module(slog libslog.so ${ASCEND_ATC_DIR}) + find_module(slog libalog.so ${ASCEND_ATC_DIR}) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) if(PLATFORM STREQUAL "train") @@ -154,7 +154,7 @@ elseif (ENABLE_D OR ENABLE_ACL) include(cmake/intf_pub_linux.cmake) # common libraries - find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) + find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH}) find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) @@ -174,7 +174,7 @@ elseif(ENABLE_MS_TESTCASES) include(cmake/intf_pub_linux.cmake) # common libraries - find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) + find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH}) find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) diff --git a/cmake/intf_pub_linux.cmake b/cmake/intf_pub_linux.cmake index 40c6bca9..61237d11 100755 --- a/cmake/intf_pub_linux.cmake +++ b/cmake/intf_pub_linux.cmake @@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE $<$:CFG_BUILD_DEBUG> WIN64=1 LINUX=0 + LOG_CPP ) target_link_options(intf_pub INTERFACE -Wl,-z,relro From 739849bc74e56d845546c117792381b302423d0f Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 6 Jan 2021 09:58:01 +0800 Subject: [PATCH 47/54] rm macro --- metadef | 2 +- parser | 2 +- .../aicpu/aicpu_schedule/aicpu_op_type_list.h | 120 +++++++-------- third_party/fwkacllib/inc/hccl/hcom.h | 128 ++++------------ .../fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h | 3 +- .../fwkacllib/inc/mmpa/sub_inc/mmpa_win.h | 1 + .../inc/register/op_kernel_registry.h | 49 +++++++ .../fwkacllib/inc/register/op_registry.h | 96 ++++++++++++ third_party/fwkacllib/inc/runtime/base.h | 25 +--- third_party/fwkacllib/inc/runtime/config.h | 6 +- third_party/fwkacllib/inc/runtime/context.h | 2 +- third_party/fwkacllib/inc/runtime/dev.h | 2 +- third_party/fwkacllib/inc/runtime/kernel.h | 2 +- third_party/fwkacllib/inc/runtime/mem.h | 34 ++--- third_party/fwkacllib/inc/runtime/stream.h | 1 + .../fwkacllib/inc/soft_dp/ExternalSoftDp.h | 52 +++++++ third_party/fwkacllib/inc/toolchain/slog.h | 138 ++++++++++++++++++ 17 files changed, 460 insertions(+), 203 deletions(-) create mode 100644 third_party/fwkacllib/inc/register/op_kernel_registry.h create mode 100644 third_party/fwkacllib/inc/register/op_registry.h create mode 100644 third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h diff --git a/metadef b/metadef index fe37bc34..f08320a6 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit fe37bc343ea52c76d35e9e9ec83cea0151bfa900 +Subproject commit f08320a6d699f5b537bf66da572bf225b9cd330e diff --git a/parser b/parser index 336cd310..b2df31dc 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 336cd3107253d3fe41cfb9fec2db62b5f3d8a33b +Subproject commit b2df31dc5810283e2e483df5ba9517e2ece132a0 diff --git a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h index 7e0f94a8..8d16467c 100644 --- a/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h +++ b/third_party/fwkacllib/inc/aicpu/aicpu_schedule/aicpu_op_type_list.h @@ -1,60 +1,60 @@ -/** - * Copyright 2019-2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef AICPU_OP_TYPE_LIST_H_ -#define AICPU_OP_TYPE_LIST_H_ - -enum OpKernelType { - TF_KERNEL, - CPU_KERNEL -}; - -enum ReturnCode { - OP_TYPE_NOT_SUPPORT, - FORMAT_NOT_SUPPORT, - DTYPE_NOT_SUPPORT -}; - -#pragma pack(push, 1) -//One byte alignment -struct SysOpInfo { - uint64_t opLen; - uint64_t opType; - OpKernelType kernelsType; -}; - -struct OpParamInfo { - uint64_t num; - uint64_t dtypeList; - uint64_t formatList; -}; - -struct SysOpCheckInfo { - uint64_t opListNum; - uint64_t offSetLen; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; -}; - -struct SysOpCheckResp { - uint64_t opListNum; - bool isWithoutJson; - uint64_t returnCodeList; - uint64_t sysOpInfoList; - uint64_t opParamInfoList; -}; -#pragma pack(pop) -#endif // AICPU_OP_TYPE_LIST_H_ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef AICPU_OP_TYPE_LIST_H_ +#define AICPU_OP_TYPE_LIST_H_ + +enum OpKernelType { + TF_KERNEL, + CPU_KERNEL +}; + +enum ReturnCode { + OP_TYPE_NOT_SUPPORT, + FORMAT_NOT_SUPPORT, + DTYPE_NOT_SUPPORT +}; + +#pragma pack(push, 1) +//One byte alignment +struct SysOpInfo { + uint64_t opLen; + uint64_t opType; + OpKernelType kernelsType; +}; + +struct OpParamInfo { + uint64_t num; + uint64_t dtypeList; + uint64_t formatList; +}; + +struct SysOpCheckInfo { + uint64_t opListNum; + uint64_t offSetLen; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; + +struct SysOpCheckResp { + uint64_t opListNum; + bool isWithoutJson; + uint64_t returnCodeList; + uint64_t sysOpInfoList; + uint64_t opParamInfoList; +}; +#pragma pack(pop) +#endif // AICPU_OP_TYPE_LIST_H_ diff --git a/third_party/fwkacllib/inc/hccl/hcom.h b/third_party/fwkacllib/inc/hccl/hcom.h index e491d43f..972f470c 100644 --- a/third_party/fwkacllib/inc/hccl/hcom.h +++ b/third_party/fwkacllib/inc/hccl/hcom.h @@ -33,15 +33,6 @@ extern "C" { -/** - * @brief Get the rank number in the group. - * - * @param group A string identifying the group name. - * @param rankSize A pointer identifying the rank number. - * @return HcclResult - */ -HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); - /** * @brief Get the rank number in the group. * @@ -51,15 +42,6 @@ HcclResult hcom_get_rank_size(const char *group, u32 *rankSize); */ HcclResult HcomGetRankSize(const char *group, u32 *rankSize); -/** - * @brief Get the rank number of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankSize A pointer identifying the rank number. - * @return HcclResult - */ -HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); - /** * @brief Get the rank number of this rank's server within the group. * @@ -69,15 +51,6 @@ HcclResult hcom_get_local_rank_size(const char *group, u32 *localRankSize); */ HcclResult HcomGetLocalRankSize(const char *group, u32 *localRankSize); -/** - * @brief Get the rank id of this rank. - * - * @param group A string identifying the group name. - * @param rankId A pointer identifying the rank id. - * @return HcclResult - */ -HcclResult hcom_get_rank_id(const char *group, u32 *rankId); - /** * @brief Get the rank id of this rank. * @@ -87,15 +60,6 @@ HcclResult hcom_get_rank_id(const char *group, u32 *rankId); */ HcclResult HcomGetRankId(const char *group, u32 *rankId); -/** - * @brief Get the local rank id of this rank's server within the group. - * - * @param group A string identifying the group name. - * @param localRankId A pointer identifying the local rank id. - * @return HcclResult - */ -HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); - /** * @brief Get the local rank id of this rank's server within the group. * @@ -105,16 +69,6 @@ HcclResult hcom_get_local_rank_id(const char *group, u32 *localRankId); */ HcclResult HcomGetLocalRankId(const char *group, u32 *localRankId); -/** - * @brief Get the world rank id according to the group rank id. - * - * @param group A string identifying the group name. - * @param groupRank An integer(u32) identifying the group rank id. - * @param worldRank A pointer identifying the world rank id. - * @return HcclResult - */ -HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, u32 *worldRank); - /** * @brief Get the world rank id according to the group rank id. * @@ -125,16 +79,6 @@ HcclResult hcom_get_world_rank_from_group_rank(const char *group, u32 groupRank, */ HcclResult HcomGetWorldRankFromGroupRank(const char *group, u32 groupRank, u32 *worldRank); -/** - * @brief Get the group rank id according to the world rank id. - * - * @param worldRank An integer(u32) identifying the world rank id. - * @param group A string identifying the group name. - * @param groupRank A pointer identifying the group rank id. - * @return HcclResult - */ -HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, u32 *groupRank); - /** * @brief Get the group rank id according to the world rank id. * @@ -145,16 +89,6 @@ HcclResult hcom_get_group_rank_from_world_rank(u32 worldRank, const char *group, */ HcclResult HcomGetGroupRankFromWorldRank(u32 worldRank, const char *group, u32 *groupRank); -/** - * @brief Create group. - * - * @param group A string identifying the group name. - * @param rankNum An integer(u32) identifying the number of ranks in the group. - * @param rankIds A list identifying the ranks in the group. - * @return HcclResult - */ -HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); - /** * @brief Create group. * @@ -165,14 +99,6 @@ HcclResult hcom_create_group(const char *group, u32 rankNum, u32 *rankIds); */ HcclResult HcomCreateGroup(const char *group, u32 rankNum, u32 *rankIds); -/** - * @brief Destroy group - * - * @param group A string identifying the group name. - * @return HcclResult - */ -HcclResult hcom_destroy_group(const char *group); - /** * @brief Destroy group * @@ -189,46 +115,54 @@ HcclResult HcomDestroyGroup(const char *group); * @param IdxList A list identifying the index of end gradient in each segment. * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_index(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); /** - * @brief Set the gradient split strategy with in the group, according to gradient index. + * @brief Set the gradient split strategy with in the group, according to gradient data size. * * @param group A string identifying the group name. * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param IdxList A list identifying the index of end gradient in each segment. + * @param sizeList A list identifying the percent of each segment. * @return HcclResult */ -extern HcclResult HcomSetGradFusionByIndex(const char *group, u32 segmentNum, const u32 *IdxList); +extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Initialize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult hcom_set_split_strategy_by_size(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecInitialize(); /** - * @brief Set the gradient split strategy with in the group, according to gradient data size. + * @brief Finalize hcom executor. * - * @param group A string identifying the group name. - * @param segmentNum An integer(u32) identifying the segments number of gradients. - * @param sizeList A list identifying the percent of each segment. + * @param void * @return HcclResult */ -extern HcclResult HcomSetGradFusionBySize(const char *group, u32 segmentNum, const float *sizeList); +HcclResult HcomExecFinalize(); /** - * @brief Register memories and init resources for remote access. + * @brief Put collective communication operation into hcom executor. * - * @param addrList memory addresses for remote access. - * @param count number of remote memory addresses. + * @param opInfo information about collective communication operation. + * @param callback callback after collective communication operation. * @return HcclResult */ -extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrList, u32 count); +HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); + +/** + * @brief Put remote access operation into hcom executor. + * + * @param remoteAccessType operation type (read or write). + * @param addrInfos address information about collective communication operation. + * @param callback callback after collective communication operation. + * @return HcclResult + */ +HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, + const std::vector& addrInfos, + std::function callback); /** * @brief Register memories and init resources for remote access. @@ -239,16 +173,6 @@ extern HcclResult hcom_remote_access_mem_register(const MemRegisterAddr* addrLis */ extern HcclResult HcomRegRemoteAccessMem(const MemRegisterAddr* addrList, u32 count); -HcclResult HcomExecInitialize(); - -HcclResult HcomExecFinalize(); - -HcclResult HcomExecEnqueueOperation(HcomOperation opInfo, std::function callback); - -HcclResult HcomExecEnqueueRemoteAccess(const std::string& remoteAccessType, - const std::vector& addrInfos, - std::function callback); - #ifdef __cplusplus } #endif // __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index ad48f70b..005014ed 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -279,8 +279,9 @@ typedef struct { #define M_NAME_MAX MAX_FNAME #define M_F_OK F_OK -#define M_R_OK R_OK +#define M_X_OK X_OK #define M_W_OK W_OK +#define M_R_OK R_OK #define MM_DT_DIR DT_DIR #define MM_DT_REG DT_REG diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index cecdd4a7..49e97a5d 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -322,6 +322,7 @@ typedef VOID (*mmPf)(VOID); #define M_NAME_MAX _MAX_FNAME #define M_F_OK 0 +#define M_X_OK 1 #define M_W_OK 2 #define M_R_OK 4 diff --git a/third_party/fwkacllib/inc/register/op_kernel_registry.h b/third_party/fwkacllib/inc/register/op_kernel_registry.h new file mode 100644 index 00000000..5fed8960 --- /dev/null +++ b/third_party/fwkacllib/inc/register/op_kernel_registry.h @@ -0,0 +1,49 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_OP_KERNEL_REGISTRY_H_ +#define INC_REGISTER_OP_KERNEL_REGISTRY_H_ +#include +#include +#include "register/register_types.h" +#include "register.h" + +namespace ge { +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpKernelRegistry { + public: + using CreateFn = HostCpuOp* (*)(); + ~OpKernelRegistry(); + + static OpKernelRegistry& GetInstance() { + static OpKernelRegistry instance; + return instance; + } + + bool IsRegistered(const std::string &op_type); + + void RegisterHostCpuOp(const std::string &op_type, CreateFn create_fn); + + std::unique_ptr CreateHostCpuOp(const std::string &op_type); + + private: + OpKernelRegistry(); + class OpKernelRegistryImpl; + /*lint -e148*/ + std::unique_ptr impl_; +}; +} // namespace ge + +#endif // INC_REGISTER_OP_KERNEL_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h new file mode 100644 index 00000000..318eb3ba --- /dev/null +++ b/third_party/fwkacllib/inc/register/op_registry.h @@ -0,0 +1,96 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_OP_REGISTRY_H_ +#define INC_REGISTER_OP_REGISTRY_H_ + +#include +#include +#include +#include +#include + +#include "register/register.h" + +namespace domi { +enum RemoveInputType { + OMG_MOVE_TYPE_DTYPE = 0, + OMG_MOVE_TYPE_VALUE, + OMG_MOVE_TYPE_SHAPE, + OMG_MOVE_TYPE_FORMAT, + OMG_MOVE_TYPE_AXIS, + OMG_MOVE_TYPE_SCALAR_VALUE, + OMG_REMOVE_TYPE_WITH_COND = 1000, + OMG_REMOVE_INPUT_WITH_ORIGINAL_TYPE, + OMG_INPUT_REORDER, +}; + +struct RemoveInputConfigure { + int inputIdx = INT_MAX; + std::string attrName; + RemoveInputType moveType; + bool attrValue = false; + std::string originalType; + std::vector input_order; +}; + +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry { + public: + static OpRegistry *Instance(); + + std::vector registrationDatas; + + bool Register(const OpRegistrationData ®_data); + + domi::ImplyType GetImplyType(const std::string &op_type); + + void GetOpTypeByImplyType(std::vector &vec_op_type, const domi::ImplyType &imply_type); + + domi::ParseParamFunc GetParseParamFunc(const std::string &op_type, const std::string &ori_type); + + domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &ori_type); + + domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type, const std::string &ori_type); + + domi::FusionParseParamByOpFunc GetFusionParseParamByOpFunc(const std::string &op_type, + const std::string &ori_type); + + domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type); + + Status GetParseSubgraphPostFunc(const std::string &op_type, domi::ParseSubgraphFuncV2 &parse_subgraph_func); + + domi::ImplyType GetImplyTypeByOriOpType(const std::string &ori_optype); + + const std::vector &GetRemoveInputConfigure(const std::string &ori_optype) const; + + bool GetOmTypeByOriOpType(const std::string &ori_optype, std::string &om_type); + + ParseOpToGraphFunc GetParseOpToGraphFunc(const std::string &op_type, const std::string &ori_type); + + private: + std::unordered_map op_run_mode_map_; + std::unordered_map op_parse_params_fn_map_; + std::unordered_map parse_params_by_op_func_map_; + std::unordered_map fusion_op_parse_params_fn_map_; + std::unordered_map fusion_parse_params_by_op_fn_map_; + std::unordered_map op_types_to_parse_subgraph_post_func_; + std::unordered_map> remove_input_configure_map_; + std::unordered_map origin_type_to_om_type_; + std::unordered_map parse_op_to_graph_fn_map_; + std::unordered_map op_types_to_parse_subgraph_post_func_v2_; +}; +} // namespace domi +#endif // INC_REGISTER_OP_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index b9b2cbe5..ebfc09f3 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -81,26 +81,17 @@ typedef enum tagRtLimitType { } rtLimitType_t; typedef struct rtExceptionInfo { - uint32_t taskid; - uint32_t streamid; - uint32_t tid; - uint32_t deviceid; + uint32_t taskid; + uint32_t streamid; + uint32_t tid; + uint32_t deviceid; + uint32_t retcode; } rtExceptionInfo; -typedef struct rtTaskFailInfo { - uint32_t taskid; - uint32_t streamid; - uint32_t tid; - uint32_t deviceid; - uint32_t retcode; -} rtTaskFailInfo; - typedef void (*rtErrorCallback)(rtExceptionType); typedef void (*rtTaskFailCallback)(rtExceptionInfo *exceptionInfo); -typedef void (*rtTaskFailCallbackByModule)(rtTaskFailInfo *exceptionInfo); - typedef void (*rtDeviceStateCallback)(uint32_t devId, bool isOpen); /** @@ -143,13 +134,13 @@ RTS_API rtError_t rtProfilerConfig(uint16_t type); * @ingroup profiling_base * @brief start rts profiler. */ -RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); +RTS_API rtError_t rtProfilerStart(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList); /** * @ingroup profiling_base * @brief stop rts profiler. */ -RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t* deviceList); +RTS_API rtError_t rtProfilerStop(uint64_t profConfig, int32_t numsDev, uint32_t *deviceList); /** * @ingroup profiling_base @@ -209,7 +200,7 @@ RTS_API rtError_t rtRegDeviceStateCallback(const char *regName, rtDeviceStateCal * @param [out] NA * @return RT_ERROR_NONE for ok */ -RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallbackByModule callback); +RTS_API rtError_t rtRegTaskFailCallbackByModule(const char *moduleName, rtTaskFailCallback callback); /** * @ingroup dvrt_base diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index 12a407d7..8bfc9893 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -42,6 +42,7 @@ typedef enum tagRtChipType { CHIP_MDC, CHIP_LHISI, CHIP_DC, + CHIP_CLOUD_V2, CHIP_END, } rtChipType_t; @@ -62,6 +63,7 @@ typedef enum tagRtPlatformType { PLATFORM_LHISI_ES, PLATFORM_LHISI_CS, PLATFORM_DC, + PLATFORM_CLOUD_V2, PLATFORM_END, } rtPlatformType_t; @@ -119,7 +121,9 @@ typedef struct tagRtMemoryConfig { uint32_t compilerSize; } rtMemoryConfig_t; -typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; +typedef struct tagRtPlatformConfig { + uint32_t platformConfig; +} rtPlatformConfig_t; /** * @ingroup diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index 4be49a8c..ee0d8f0a 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -47,7 +47,7 @@ typedef struct tagRtGroupInfo { uint32_t aivectorNum; uint32_t sdmaNum; uint32_t activeStreamNum; - void* extrPtr; + void *extrPtr; } rtGroupInfo_t; /** diff --git a/third_party/fwkacllib/inc/runtime/dev.h b/third_party/fwkacllib/inc/runtime/dev.h index d1a91a9b..d6ffbc9a 100644 --- a/third_party/fwkacllib/inc/runtime/dev.h +++ b/third_party/fwkacllib/inc/runtime/dev.h @@ -185,7 +185,7 @@ RTS_API rtError_t rtDisableP2P(uint32_t devIdDes, uint32_t phyIdSrc); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtDeviceCanAccessPeer(int32_t* canAccessPeer, uint32_t device, uint32_t peerDevice); +RTS_API rtError_t rtDeviceCanAccessPeer(int32_t *canAccessPeer, uint32_t device, uint32_t peerDevice); /** * @ingroup dvrt_dev diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 5f519442..f44b181c 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -387,7 +387,7 @@ typedef void *rtModel_t; * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ - RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag); +RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag); /** * @ingroup rt_kernel diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index e65d8604..32bd9e6b 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -159,11 +159,11 @@ typedef struct rtAiCoreMemorySize { * @ingroup dvrt_mem * @brief memory type */ -typedef enum tagRtMemoryType { - RT_MEMORY_TYPE_HOST = 1, - RT_MEMORY_TYPE_DEVICE = 2 , - RT_MEMORY_TYPE_SVM = 3, - RT_MEMORY_TYPE_DVPP = 4 +typedef enum tagRtMemoryType { + RT_MEMORY_TYPE_HOST = 1, + RT_MEMORY_TYPE_DEVICE = 2, + RT_MEMORY_TYPE_SVM = 3, + RT_MEMORY_TYPE_DVPP = 4 } rtMemoryType_t; /** @@ -179,23 +179,23 @@ typedef struct tagRtPointerAttributes { typedef struct rtMallocHostSharedMemoryIn { - const char* name; - const uint64_t size; - uint32_t flag; + const char *name; + const uint64_t size; + uint32_t flag; } rtMallocHostSharedMemoryIn; typedef struct rtMallocHostSharedMemoryOut { - int fd; - void* ptr; - void* devPtr; + int fd; + void *ptr; + void *devPtr; } rtMallocHostSharedMemoryOut; typedef struct rtFreeHostSharedMemoryIn { - const char* name; - const uint64_t size; - int fd; - void* ptr; - void* devPtr; + const char *name; + const uint64_t size; + int fd; + void *ptr; + void *devPtr; } rtFreeHostSharedMemoryIn; @@ -267,7 +267,7 @@ RTS_API rtError_t rtFreeHost(void *hostPtr); */ RTS_API rtError_t rtMallocHostSharedMemory(rtMallocHostSharedMemoryIn *in, - rtMallocHostSharedMemoryOut *out); + rtMallocHostSharedMemoryOut *out); /** * @ingroup dvrt_mem diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 388fd3c2..6b9f80ae 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -36,6 +36,7 @@ extern "C" { #define RT_STREAM_FORBIDDEN_DEFAULT (0x10) #define RT_STREAM_HEAD (0x20) #define RT_STREAM_PRIMARY_DEFAULT (0x40) +#define RT_STREAM_PRIMARY_FIRST_DEFAULT (0x80) /** * @ingroup stream_type diff --git a/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h new file mode 100644 index 00000000..b642cbc8 --- /dev/null +++ b/third_party/fwkacllib/inc/soft_dp/ExternalSoftDp.h @@ -0,0 +1,52 @@ +/** +* @file ExternalSoftDp.h +* +* Copyright (c) Huawei Technologies Co., Ltd. 2012-2018. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#ifndef EXTERNALSOFTDP_H +#define EXTERNALSOFTDP_H + +#include + +extern "C" { +struct SoftDpProcsessInfo { + uint8_t* inputBuffer; + uint32_t inputBufferSize; + + uint8_t* outputBuffer; + uint32_t outputBufferSize; + + uint32_t outputWidth; + uint32_t outputHeight; + + uint32_t reserved; +}; + +struct DpCropInfo { + uint32_t left; + uint32_t right; + uint32_t up; + uint32_t down; +}; + +/* + * @brief decode and resize interface + * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct + * @return success: return 0, fail: return error number + */ +uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo); + +/* + * @brief decode crop and resize interface + * @param [in] SoftDpProcsessInfo& softDpProcsessInfo : soft dp struct + * @param [in] const DpCropInfo& cropInfo: crop struct + * @return success: return 0, fail: return error number + */ +uint32_t DecodeAndCropAndResizeJpeg(SoftDpProcsessInfo& softDpProcsessInfo, const DpCropInfo& cropInfo); +} +#endif // EXTERNALSOFTDP_H diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index bce58f32..2ebce7d9 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -18,7 +18,9 @@ #define D_SYSLOG_H_ #ifdef __cplusplus +#ifndef LOG_CPP extern "C" { +#endif #endif // __cplusplus #ifndef LINUX @@ -105,6 +107,7 @@ extern "C" { #define SECURITY_LOG_MASK (0x00100000) #define RUN_LOG_MASK (0x01000000) #define OPERATION_LOG_MASK (0x10000000) +#define RESERVERD_LENGTH 52 typedef struct tagDCODE { const char *cName; @@ -116,6 +119,18 @@ typedef struct tagKV { char *value; } KeyValue; +typedef enum { + APPLICATION = 0, + SYSTEM +} ProcessType; + +typedef struct { + ProcessType type; + unsigned int pid; + unsigned int deviceId; + char reserved[RESERVERD_LENGTH]; +} LogAttr; + /** * @ingroup slog * @@ -228,6 +243,14 @@ DLL_EXPORT int dlog_setlevel(int moduleId, int level, int enableEvent); */ DLL_EXPORT int CheckLogLevel(int moduleId, int logLevel); +/** + * @ingroup slog + * @brief DlogSetAttr: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttr(LogAttr logAttr); + /** * @ingroup slog * @brief dlog_error: print error log @@ -367,6 +390,121 @@ void DlogInner(int moduleId, int level, const char *fmt, ...); void DlogWithKVInner(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); #ifdef __cplusplus +#ifndef LOG_CPP } +#endif // LOG_CPP #endif // __cplusplus + +#ifdef LOG_CPP +#ifdef __cplusplus +extern "C" { +#endif +/** + * @ingroup slog + * @brief DlogGetlevelForC: get module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), others: invalid + * @param [out]enableEvent: 1: enable; 0: disable + * @return: module level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + */ +DLL_EXPORT int DlogGetlevelForC(int moduleId, int *enableEvent); + +/** + * @ingroup slog + * @brief DlogSetlevelForC: set module loglevel and enableEvent + * + * @param [in]moduleId: moudule id(see slog.h, eg: CCE), -1: all modules, others: invalid + * @param [in]level: log level(0: debug, 1: info, 2: warning, 3: error, 4: null output) + * @param [in]enableEvent: 1: enable; 0: disable, others:invalid + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetlevelForC(int moduleId, int level, int enableEvent); + +/** + * @ingroup slog + * @brief CheckLogLevelForC: check module level enable or not + * users no need to call it because all dlog interface(include inner interface) has already called + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG + * @return: 1:enable, 0:disable + */ +DLL_EXPORT int CheckLogLevelForC(int moduleId, int logLevel); + +/** + * @ingroup slog + * @brief DlogSetAttrForC: set log attr, default pid is 0, default device id is 0, default process type is APPLICATION + * @param [in]logAttr: attr info, include pid(must be larger than 0), process type and device id(chip ID) + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogSetAttrForC(LogAttr logAttr); + +/** + * @ingroup slog + * @brief DlogForC: print log, need caller to specify level + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define DlogForC(moduleId, level, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogSubForC: print log, need caller to specify level and submodule + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]submodule: eg: engine + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]fmt: log content + */ +#define DlogSubForC(moduleId, submodule, level, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogInnerForC(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogWithKVForC: print log, need caller to specify level and other paramters + * call CheckLogLevelForC in advance to optimize performance, call interface with fmt input take time + * + * @param [in]moduleId: module id, eg: CCE + * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event) + * @param [in]pstKVArray: key-value array + * @param [in]kvNum: key-value element num in array + * @param [in]fmt: log content + */ +#define DlogWithKVForC(moduleId, level, pstKVArray, kvNum, fmt, ...) \ + do { \ + if(CheckLogLevelForC(moduleId, level) == 1) { \ + DlogWithKVInnerForC(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \ + } \ + } while (0) + +/** + * @ingroup slog + * @brief DlogFlushForC: flush log buffer to file + */ +DLL_EXPORT void DlogFlushForC(void); + +/** + * @ingroup slog + * @brief Internal log interface, other modules are not allowed to call this interface + */ +void DlogInnerForC(int moduleId, int level, const char *fmt, ...); +void DlogWithKVInnerForC(int moduleId, int level, KeyValue *pstKVArray, int kvNum, const char *fmt, ...); + +#ifdef __cplusplus +} +#endif +#endif // LOG_CPP #endif // D_SYSLOG_H_ From 9c4dc808d4765b1c46309879935833ed6c43f3b3 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 30 Dec 2020 10:11:34 +0800 Subject: [PATCH 48/54] rm compile macro --- ge/graph/passes/assign_remove_pass.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/graph/passes/assign_remove_pass.cc b/ge/graph/passes/assign_remove_pass.cc index 72e108c3..e198c2db 100644 --- a/ge/graph/passes/assign_remove_pass.cc +++ b/ge/graph/passes/assign_remove_pass.cc @@ -28,6 +28,7 @@ static const std::set kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::CONSTANT, ge::CONSTANTOP, ge::VARIABLE, ge::VARIABLEV2 }; } + Status AssignRemovePass::Run(NodePtr &node) { GELOGD("AssignRemovePass running"); From 99479830ce5d034d40a08631190a1892f1e626af Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 30 Dec 2020 16:23:13 +0800 Subject: [PATCH 49/54] rm compile macro --- ge/graph/build/memory/CMakeLists.txt | 44 ++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 ge/graph/build/memory/CMakeLists.txt diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt new file mode 100644 index 00000000..126e0187 --- /dev/null +++ b/ge/graph/build/memory/CMakeLists.txt @@ -0,0 +1,44 @@ +set(SRC_LIST + "memory_assigner.cc" + "graph_mem_assigner.cc" + "binary_block_mem_assigner.cc" + "block_mem_assigner.cc" + "hybrid_mem_assigner.cc" + "max_block_mem_assigner.cc" + "var_mem_assign_util.cc" +) + +############ libge_memory.a ############ +add_library(ge_memory STATIC ${SRC_LIST}) + +target_compile_options(ge_memory PRIVATE + -Werror + -O2 + -fno-common +) + +target_compile_definitions(ge_memory PRIVATE + google=ascend_private + LOG_CPP +) + +target_link_libraries(ge_memory PRIVATE + $ + ascend_protobuf + c_sec +) + +target_include_directories(ge_memory PRIVATE + ${CMAKE_CURRENT_LIST_DIR} + ${GE_CODE_DIR}/ge + ${GE_CODE_DIR}/inc + ${GE_CODE_DIR}/inc/external + ${METADEF_DIR}/inc + ${METADEF_DIR}/inc/external + ${METADEF_DIR}/inc/external/graph + ${GE_CODE_DIR}/inc/framework + #### yellow zone #### + ${GE_CODE_DIR}/../inc + #### blue zone #### + ${GE_CODE_DIR}/third_party/fwkacllib/inc +) From 8b460476adffb7d74360e302bd4e3f2247f8a51c Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Tue, 5 Jan 2021 19:41:10 +0800 Subject: [PATCH 50/54] rm macro --- ge/graph/build/memory/CMakeLists.txt | 44 ---------------------------- 1 file changed, 44 deletions(-) delete mode 100644 ge/graph/build/memory/CMakeLists.txt diff --git a/ge/graph/build/memory/CMakeLists.txt b/ge/graph/build/memory/CMakeLists.txt deleted file mode 100644 index 126e0187..00000000 --- a/ge/graph/build/memory/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -set(SRC_LIST - "memory_assigner.cc" - "graph_mem_assigner.cc" - "binary_block_mem_assigner.cc" - "block_mem_assigner.cc" - "hybrid_mem_assigner.cc" - "max_block_mem_assigner.cc" - "var_mem_assign_util.cc" -) - -############ libge_memory.a ############ -add_library(ge_memory STATIC ${SRC_LIST}) - -target_compile_options(ge_memory PRIVATE - -Werror - -O2 - -fno-common -) - -target_compile_definitions(ge_memory PRIVATE - google=ascend_private - LOG_CPP -) - -target_link_libraries(ge_memory PRIVATE - $ - ascend_protobuf - c_sec -) - -target_include_directories(ge_memory PRIVATE - ${CMAKE_CURRENT_LIST_DIR} - ${GE_CODE_DIR}/ge - ${GE_CODE_DIR}/inc - ${GE_CODE_DIR}/inc/external - ${METADEF_DIR}/inc - ${METADEF_DIR}/inc/external - ${METADEF_DIR}/inc/external/graph - ${GE_CODE_DIR}/inc/framework - #### yellow zone #### - ${GE_CODE_DIR}/../inc - #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc -) From 343c93d67036a8bafdda9b432075bf7c525eabb7 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 6 Jan 2021 11:26:34 +0800 Subject: [PATCH 51/54] rm compile macro --- third_party/fwkacllib/inc/toolchain/plog.h | 59 ++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 third_party/fwkacllib/inc/toolchain/plog.h diff --git a/third_party/fwkacllib/inc/toolchain/plog.h b/third_party/fwkacllib/inc/toolchain/plog.h new file mode 100644 index 00000000..0d42e31d --- /dev/null +++ b/third_party/fwkacllib/inc/toolchain/plog.h @@ -0,0 +1,59 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _PLOG_H_ +#define _PLOG_H_ + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#ifndef LINUX +#define LINUX 0 +#endif // LINUX + +#ifndef WIN +#define WIN 1 +#endif + +#ifndef OS_TYPE +#define OS_TYPE 0 +#endif // OS_TYPE + +#if (OS_TYPE == LINUX) +#define DLL_EXPORT __attribute__((visibility("default"))) +#else +#define DLL_EXPORT _declspec(dllexport) +#endif + +/** + * @ingroup plog + * @brief DlogReportInitialize: init log in service process before all device setting. + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogReportInitialize(); + +/** + * @ingroup plog + * @brief DlogReportFinalize: release log resource in service process after all device reset. + * @return: 0: SUCCEED, others: FAILED + */ +DLL_EXPORT int DlogReportFinalize(); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // D_PLOG_H_ From 04105fb40f512818d471c9a6c53e17adbd25d300 Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 6 Jan 2021 11:44:12 +0800 Subject: [PATCH 52/54] rm compile macro --- tests/ut/common/graph/CMakeLists.txt | 1 + tests/ut/ge/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/ut/common/graph/CMakeLists.txt b/tests/ut/common/graph/CMakeLists.txt index 99b21182..1c64dce1 100644 --- a/tests/ut/common/graph/CMakeLists.txt +++ b/tests/ut/common/graph/CMakeLists.txt @@ -67,6 +67,7 @@ set(SRC_FILES "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" "${GE_CODE_DIR}/metadef/graph/attr_value.cc" "${GE_CODE_DIR}/metadef/graph/buffer.cc" + "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" "${GE_CODE_DIR}/metadef/graph/ge_attr_define.cc" "${GE_CODE_DIR}/metadef/graph/graph.cc" diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 3a06507c..72cbaf63 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -89,6 +89,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" "${GE_CODE_DIR}/metadef/graph/attr_value.cc" "${GE_CODE_DIR}/metadef/graph/buffer.cc" + "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" "${GE_CODE_DIR}/metadef/graph/graph.cc" "${GE_CODE_DIR}/metadef/graph/gnode.cc" From 46ea5518d1970968384e545d43cb072c88444b8e Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 6 Jan 2021 12:52:01 +0800 Subject: [PATCH 53/54] rm compile macro --- tests/depends/cce/CMakeLists.txt | 1 + tests/ut/ge/CMakeLists.txt | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/depends/cce/CMakeLists.txt b/tests/depends/cce/CMakeLists.txt index 85e69e6d..7550c63f 100644 --- a/tests/depends/cce/CMakeLists.txt +++ b/tests/depends/cce/CMakeLists.txt @@ -46,6 +46,7 @@ set(SRCS "${GE_CODE_DIR}/metadef/graph/anchor.cc" "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" "${GE_CODE_DIR}/metadef/graph/buffer.cc" + "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" "${GE_CODE_DIR}/metadef/graph/graph.cc" "${GE_CODE_DIR}/metadef/graph/model.cc" diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 72cbaf63..2ebe9fc9 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -89,7 +89,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/metadef/graph/ge_attr_value.cc" "${GE_CODE_DIR}/metadef/graph/attr_value.cc" "${GE_CODE_DIR}/metadef/graph/buffer.cc" - "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" + "${GE_CODE_DIR}/metadef/graph/aligned_ptr.cc" "${GE_CODE_DIR}/metadef/graph/compute_graph.cc" "${GE_CODE_DIR}/metadef/graph/graph.cc" "${GE_CODE_DIR}/metadef/graph/gnode.cc" @@ -228,6 +228,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/for_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/enter_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/assign_remove_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/inplace_support_check_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/addn_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/common_subexpression_elimination_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/transop_symmetry_elimination_pass.cc" @@ -304,6 +305,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/common/local_context.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" "${GE_CODE_DIR}/ge/common/model_saver.cc" "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" @@ -371,6 +373,7 @@ set(GRAPH_LOAD_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" + "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" "${GE_CODE_DIR}/ge/common/thread_pool.cc" ) From 0ad4302f4e3e7d2cdd8714d1e43e04be1b68345a Mon Sep 17 00:00:00 2001 From: chenyemeng Date: Wed, 6 Jan 2021 13:07:57 +0800 Subject: [PATCH 54/54] rm compile macro --- tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc index 6d34ab59..5c75bd01 100644 --- a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc +++ b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc @@ -230,7 +230,7 @@ TEST_F(UtestGeTensor, test_tensor_invalid_null) { GeTensor tensor(msg_owner, nullptr); EXPECT_EQ(tensor.GetData().size(), 0); EXPECT_EQ(tensor.MutableData().size(), 0); - EXPECT_EQ(tensor.SetData(Buffer(100)), ge::GRAPH_PARAM_INVALID); + EXPECT_EQ(tensor.SetData(Buffer(100)), GRAPH_SUCCESS); TensorUtils::SetWeightSize(tensor.MutableTensorDesc(), 100); EXPECT_EQ(TensorUtils::GetWeightSize(tensor), 0);