diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 2db0b6b7..12da803d 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -771,6 +771,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP GELOGI("Start AutoFindBpOpIndex"); NodePtr bp_node = nullptr; uint32_t current_idx = 0; + uint32_t netoutput_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -788,6 +789,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { if (bp_node == nullptr) { bp_node = node; + netoutput_idx = current_idx - 1; } } if (graph->GetNeedIteration()) { @@ -812,9 +814,13 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP if (bp_node == nullptr) { GELOGW("not find bp_node."); return SUCCESS; + } else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) { + profiling_point.bp_index = netoutput_idx; + GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx); + } else { + profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); } - profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); return SUCCESS; } diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index f8b61216..ff8ffeb3 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -984,11 +984,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod return SUCCESS; } - GELOGI("Init data node: %s.", op_desc->GetName().c_str()); auto data_index = data_op_index++; - if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { + const auto &index_attr = GraphUtils::FindRootGraph(graph) == graph ? ATTR_NAME_INDEX : ATTR_NAME_PARENT_NODE_INDEX; + if (AttrUtils::GetInt(op_desc, index_attr, data_index)) { GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); } + GELOGI("Init data node: %s, index: %u.", op_desc->GetName().c_str(), data_index); data_by_index[data_index] = op_desc; if (known_node_) { @@ -4007,13 +4008,11 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa iterator_count_++; } - if (!is_async_mode_) { - GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); - ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, - "[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); - GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); - } + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); + ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, + "[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); + GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); // report model time data GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 273d2195..40a7cf60 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -120,6 +120,7 @@ const char *const kCheckPointForGetVar = "CheckPointGraphForGetVar"; const char *const kCheckPointGraph = "checkpoint_graph"; const char *const kVectorEngine = "VectorEngine"; const char *const kAIcoreEngine = "AIcoreEngine"; +const char *const kRunFlagOffline = "0"; const int32_t kDynamicDimsTypeIsGetNext = 0; const int32_t kDynamicDimsTypeIsData = 1; const char *const kGetNextName = "IteratorV2"; @@ -2434,8 +2435,6 @@ Status GraphManager::RemoveIsolatedConstInThisGraph(ge::ComputeGraphPtr &compute continue; } if (n->GetOpDesc()->GetType() == CONSTANT || n->GetOpDesc()->GetType() == CONSTANTOP) { - // reset const type depend on train_flag - options_.train_graph_flag ? n->GetOpDesc()->SetType(CONSTANTOP) : n->GetOpDesc()->SetType(CONSTANT); if (n->GetOutAllNodes().empty() && n->GetInAllNodes().empty()) { // it is an isolated constant, just remove it if (GraphUtils::RemoveJustNode(compute_graph, n) != GRAPH_SUCCESS) { @@ -2762,21 +2761,35 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { "Please pay attention to it."); } - ChangeConstTypeWhenTraining(compute_graph); + GE_CHK_STATUS_RET(ChangeConstType(compute_graph)); GELOGI("End optimize after merge sub graph."); return SUCCESS; } -void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph) { - // The constant for train is CONSTANTOP, and is CONSTANT for inference. They will be unified in future. - if (options_.train_graph_flag) { - for (NodePtr &n : compute_graph->GetAllNodes()) { - // This can ensure that n is not a null pointer - if (n->GetOpDesc()->GetType() == CONSTANT) { - n->GetOpDesc()->SetType(CONSTANTOP); + +Status GraphManager::ChangeConstType(const ComputeGraphPtr &compute_graph) { + // run_flag off means offline, on means online + string run_flag; + (void)ge::GetContext().GetOption(ge::RUN_FLAG, run_flag); + // The constant for online is CONSTANTOP, and is CONSTANT for offline. They will be unified in future. + if (run_flag == kRunFlagOffline) { + GELOGI("Offline mode, change all Constant to Const."); + } else { + GELOGI("Online mode, change all Const to Constant."); + } + for (NodePtr &n : compute_graph->GetAllNodes()) { + GE_CHECK_NOTNULL(n); + if (n->GetType() == CONSTANT || n->GetType() == CONSTANTOP) { + auto op_desc = n->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + if (run_flag == kRunFlagOffline) { + op_desc->SetType(CONSTANT); + } else { + op_desc->SetType(CONSTANTOP); } } } + return SUCCESS; } Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index 945a5e5d..c8459b16 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -375,7 +375,7 @@ class GraphManager { static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, Status ret, const string &log); - void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); + Status ChangeConstType(const ComputeGraphPtr &compute_graph); Status PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, const std::vector &inputs, ge::ComputeGraphPtr &compute_graph, uint64_t session_id); diff --git a/ge/graph/passes/memcpy_addr_async_pass.cc b/ge/graph/passes/memcpy_addr_async_pass.cc index e8e4ebd8..84ef226a 100755 --- a/ge/graph/passes/memcpy_addr_async_pass.cc +++ b/ge/graph/passes/memcpy_addr_async_pass.cc @@ -47,6 +47,11 @@ Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { return RT_FAILED; } + if (value == RT_CAPABILITY_NOT_SUPPORT) { + GELOGW("Not support zero copy, skip it."); + return SUCCESS; + } + for (auto &node : graph->GetAllNodes()) { auto op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, continue); diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 32758f61..8eecbc80 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -428,7 +428,7 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const } int64_t expected_size; - GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, expected_size)); + (void)TensorUtils::GetSize(*tensor_desc, expected_size); GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); auto size_diff = expected_size - static_cast(input_tensor->GetSize()); if (size_diff > 0) { diff --git a/tests/ut/ge/graph/build/task_generator_unittest.cc b/tests/ut/ge/graph/build/task_generator_unittest.cc index 84c6e3fc..f869f1e0 100644 --- a/tests/ut/ge/graph/build/task_generator_unittest.cc +++ b/tests/ut/ge/graph/build/task_generator_unittest.cc @@ -60,10 +60,12 @@ class UtestTaskGeneratorTest : public testing::Test { ge::ut::GraphBuilder builder("graph"); auto data = builder.AddNode("data", "phony", 1, 1); auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); - auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0); - auto op_desc = data->GetOpDesc(); - (void)AttrUtils::SetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, "IteratorV2"); - op_desc->SetOpKernelLibName("GE"); + auto netoutput = builder.AddNode("Node_Output", "NetOutput", 2, 0); + auto data_desc = data->GetOpDesc(); + (void)AttrUtils::SetStr(data_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, "IteratorV2"); + data_desc->SetOpKernelLibName("GE"); + auto output_desc = netoutput->GetOpDesc(); + output_desc->SetOpKernelLibName("output"); builder.AddDataEdge(data, 0, addn1, 0); builder.AddControlEdge(addn1, netoutput); return builder.GetGraph(); @@ -112,7 +114,7 @@ TEST_F(UtestTaskGeneratorTest, AutoFindFpOpIndex) { TEST_F(UtestTaskGeneratorTest, FindLastBpFromBpNode) { auto graph = BuildGraphBpProfiling(); TaskGenerator task_generator(nullptr, 0); - auto net_output = graph->FindNode("netoutput"); + auto net_output = graph->FindNode("Node_Output"); // netoutput has no data input, return default value 0 EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); } @@ -138,3 +140,17 @@ TEST_F(UtestTaskGeneratorTest, UpdateOpIsVarAttr) { MemManager::Instance().Finalize(); } + +TEST_F(UtestTaskGeneratorTest, AutoFindBpOpIndex) { + auto graph = BuildGraphBpProfiling(); + TaskGenerator task_generator(nullptr, 0); + auto net_output = graph->FindNode("Node_Output"); + ProfilingPoint profiling_point; + vector all_reduce_nodes; + EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); + + auto output_desc = net_output->GetOpDesc(); + output_desc->SetType("HcomAllReduce"); + output_desc->SetName("hcom"); + EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); +} diff --git a/tests/ut/ge/graph/manager/graph_manager_unittest.cc b/tests/ut/ge/graph/manager/graph_manager_unittest.cc index f68b5080..5cc2a7f6 100644 --- a/tests/ut/ge/graph/manager/graph_manager_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_manager_unittest.cc @@ -114,6 +114,7 @@ #include "graph/common/local_context.h" #include "graph/common/omg_util.h" #include "common/formats/utils/formats_trans_utils.h" +#include "../passes/graph_builder_utils.h" #include "register/custom_pass_helper.h" #include "graph/ops_stub.h" #include "ge_attr_value.h" @@ -151,6 +152,24 @@ void CreateGraph(Graph &graph) { // Graph graph("test_graph"); graph.SetInputs(inputs).SetOutputs(outputs).SetTargets(targets); } +/* Data + * | + * Relu Const + * | + * Netoutput + */ + +ge::ComputeGraphPtr CreateGraphWithIsolatedConst() { + ge::ut::GraphBuilder builder("graph"); + auto data = builder.AddNode("data", "Data", 1, 1); + auto relu = builder.AddNode("addn1", "Relu", 1, 1); + auto netoutput = builder.AddNode("Node_Output", "NetOutput", 1, 0); + auto const1 = builder.AddNode("const1", "Const", 0, 1); + + builder.AddDataEdge(data, 0, relu, 0); + builder.AddDataEdge(relu, 0, netoutput, 0); + return builder.GetGraph(); +} TEST_F(UtestGraphManagerTest, set_and_get_add_graph_flag) { GraphId graph_id = 1; @@ -558,3 +577,32 @@ TEST_F(UtestGraphManagerTest, test_prerunthread_failed_2) { // auto ret = graph_manager.ParseInputsDimsForGetNexNosinkAndData(nodes, input_tensors); // EXPECT_EQ(ret, ge::SUCCESS); // } + +TEST_F(UtestGraphManagerTest, ChangeAndDeleteConst_success) { + std::map options_map; + options_map.insert({ge::RUN_FLAG, "0"}); + ge::GetThreadLocalContext().SetGraphOption(options_map); + + GraphId graph_id = 1; + GraphManager graph_manager; + graph_manager.options_.train_graph_flag = true; + + auto graph = CreateGraphWithIsolatedConst(); + Status status = graph_manager.ChangeConstType(graph); + EXPECT_EQ(status, ge::SUCCESS); + auto constant1 = graph->FindFirstNodeMatchType("Constant"); + EXPECT_EQ(constant1, nullptr); + + options_map.clear(); + options_map.insert({ge::RUN_FLAG, "1"}); + ge::GetThreadLocalContext().SetGraphOption(options_map); + status = graph_manager.ChangeConstType(graph); + EXPECT_EQ(status, ge::SUCCESS); + constant1 = graph->FindFirstNodeMatchType("Constant"); + EXPECT_NE(constant1, nullptr); + + status = graph_manager.RemoveIsolatedConstInThisGraph(graph); + EXPECT_EQ(status, ge::SUCCESS); + auto all_nodes = graph->GetDirectNode(); + EXPECT_EQ(all_nodes.size(), 3); +}