@@ -771,6 +771,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
GELOGI("Start AutoFindBpOpIndex"); | GELOGI("Start AutoFindBpOpIndex"); | ||||
NodePtr bp_node = nullptr; | NodePtr bp_node = nullptr; | ||||
uint32_t current_idx = 0; | uint32_t current_idx = 0; | ||||
uint32_t netoutput_idx = 0; | |||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
@@ -788,6 +789,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | ||||
if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
bp_node = node; | bp_node = node; | ||||
netoutput_idx = current_idx - 1; | |||||
} | } | ||||
} | } | ||||
if (graph->GetNeedIteration()) { | if (graph->GetNeedIteration()) { | ||||
@@ -812,9 +814,13 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
if (bp_node == nullptr) { | if (bp_node == nullptr) { | ||||
GELOGW("not find bp_node."); | GELOGW("not find bp_node."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) { | |||||
profiling_point.bp_index = netoutput_idx; | |||||
GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx); | |||||
} else { | |||||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||||
} | } | ||||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -984,11 +984,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GELOGI("Init data node: %s.", op_desc->GetName().c_str()); | |||||
auto data_index = data_op_index++; | auto data_index = data_op_index++; | ||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | |||||
const auto &index_attr = GraphUtils::FindRootGraph(graph) == graph ? ATTR_NAME_INDEX : ATTR_NAME_PARENT_NODE_INDEX; | |||||
if (AttrUtils::GetInt(op_desc, index_attr, data_index)) { | |||||
GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); | GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); | ||||
} | } | ||||
GELOGI("Init data node: %s, index: %u.", op_desc->GetName().c_str(), data_index); | |||||
data_by_index[data_index] = op_desc; | data_by_index[data_index] = op_desc; | ||||
if (known_node_) { | if (known_node_) { | ||||
@@ -4007,13 +4008,11 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
iterator_count_++; | iterator_count_++; | ||||
} | } | ||||
if (!is_async_mode_) { | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
} | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); | |||||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); | |||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); | |||||
// report model time data | // report model time data | ||||
GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); | GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); | ||||
@@ -120,6 +120,7 @@ const char *const kCheckPointForGetVar = "CheckPointGraphForGetVar"; | |||||
const char *const kCheckPointGraph = "checkpoint_graph"; | const char *const kCheckPointGraph = "checkpoint_graph"; | ||||
const char *const kVectorEngine = "VectorEngine"; | const char *const kVectorEngine = "VectorEngine"; | ||||
const char *const kAIcoreEngine = "AIcoreEngine"; | const char *const kAIcoreEngine = "AIcoreEngine"; | ||||
const char *const kRunFlagOffline = "0"; | |||||
const int32_t kDynamicDimsTypeIsGetNext = 0; | const int32_t kDynamicDimsTypeIsGetNext = 0; | ||||
const int32_t kDynamicDimsTypeIsData = 1; | const int32_t kDynamicDimsTypeIsData = 1; | ||||
const char *const kGetNextName = "IteratorV2"; | const char *const kGetNextName = "IteratorV2"; | ||||
@@ -2434,8 +2435,6 @@ Status GraphManager::RemoveIsolatedConstInThisGraph(ge::ComputeGraphPtr &compute | |||||
continue; | continue; | ||||
} | } | ||||
if (n->GetOpDesc()->GetType() == CONSTANT || n->GetOpDesc()->GetType() == CONSTANTOP) { | if (n->GetOpDesc()->GetType() == CONSTANT || n->GetOpDesc()->GetType() == CONSTANTOP) { | ||||
// reset const type depend on train_flag | |||||
options_.train_graph_flag ? n->GetOpDesc()->SetType(CONSTANTOP) : n->GetOpDesc()->SetType(CONSTANT); | |||||
if (n->GetOutAllNodes().empty() && n->GetInAllNodes().empty()) { | if (n->GetOutAllNodes().empty() && n->GetInAllNodes().empty()) { | ||||
// it is an isolated constant, just remove it | // it is an isolated constant, just remove it | ||||
if (GraphUtils::RemoveJustNode(compute_graph, n) != GRAPH_SUCCESS) { | if (GraphUtils::RemoveJustNode(compute_graph, n) != GRAPH_SUCCESS) { | ||||
@@ -2762,21 +2761,35 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
"Please pay attention to it."); | "Please pay attention to it."); | ||||
} | } | ||||
ChangeConstTypeWhenTraining(compute_graph); | |||||
GE_CHK_STATUS_RET(ChangeConstType(compute_graph)); | |||||
GELOGI("End optimize after merge sub graph."); | GELOGI("End optimize after merge sub graph."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph) { | |||||
// The constant for train is CONSTANTOP, and is CONSTANT for inference. They will be unified in future. | |||||
if (options_.train_graph_flag) { | |||||
for (NodePtr &n : compute_graph->GetAllNodes()) { | |||||
// This can ensure that n is not a null pointer | |||||
if (n->GetOpDesc()->GetType() == CONSTANT) { | |||||
n->GetOpDesc()->SetType(CONSTANTOP); | |||||
Status GraphManager::ChangeConstType(const ComputeGraphPtr &compute_graph) { | |||||
// run_flag off means offline, on means online | |||||
string run_flag; | |||||
(void)ge::GetContext().GetOption(ge::RUN_FLAG, run_flag); | |||||
// The constant for online is CONSTANTOP, and is CONSTANT for offline. They will be unified in future. | |||||
if (run_flag == kRunFlagOffline) { | |||||
GELOGI("Offline mode, change all Constant to Const."); | |||||
} else { | |||||
GELOGI("Online mode, change all Const to Constant."); | |||||
} | |||||
for (NodePtr &n : compute_graph->GetAllNodes()) { | |||||
GE_CHECK_NOTNULL(n); | |||||
if (n->GetType() == CONSTANT || n->GetType() == CONSTANTOP) { | |||||
auto op_desc = n->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
if (run_flag == kRunFlagOffline) { | |||||
op_desc->SetType(CONSTANT); | |||||
} else { | |||||
op_desc->SetType(CONSTANTOP); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
return SUCCESS; | |||||
} | } | ||||
Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | ||||
@@ -375,7 +375,7 @@ class GraphManager { | |||||
static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, | static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, | ||||
Status ret, const string &log); | Status ret, const string &log); | ||||
void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); | |||||
Status ChangeConstType(const ComputeGraphPtr &compute_graph); | |||||
Status PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | Status PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | ||||
ge::ComputeGraphPtr &compute_graph, uint64_t session_id); | ge::ComputeGraphPtr &compute_graph, uint64_t session_id); | ||||
@@ -47,6 +47,11 @@ Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { | |||||
return RT_FAILED; | return RT_FAILED; | ||||
} | } | ||||
if (value == RT_CAPABILITY_NOT_SUPPORT) { | |||||
GELOGW("Not support zero copy, skip it."); | |||||
return SUCCESS; | |||||
} | |||||
for (auto &node : graph->GetAllNodes()) { | for (auto &node : graph->GetAllNodes()) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | ||||
@@ -428,7 +428,7 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const | |||||
} | } | ||||
int64_t expected_size; | int64_t expected_size; | ||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, expected_size)); | |||||
(void)TensorUtils::GetSize(*tensor_desc, expected_size); | |||||
GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); | GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); | ||||
auto size_diff = expected_size - static_cast<int64_t>(input_tensor->GetSize()); | auto size_diff = expected_size - static_cast<int64_t>(input_tensor->GetSize()); | ||||
if (size_diff > 0) { | if (size_diff > 0) { | ||||
@@ -60,10 +60,12 @@ class UtestTaskGeneratorTest : public testing::Test { | |||||
ge::ut::GraphBuilder builder("graph"); | ge::ut::GraphBuilder builder("graph"); | ||||
auto data = builder.AddNode("data", "phony", 1, 1); | auto data = builder.AddNode("data", "phony", 1, 1); | ||||
auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); | auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); | ||||
auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0); | |||||
auto op_desc = data->GetOpDesc(); | |||||
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, "IteratorV2"); | |||||
op_desc->SetOpKernelLibName("GE"); | |||||
auto netoutput = builder.AddNode("Node_Output", "NetOutput", 2, 0); | |||||
auto data_desc = data->GetOpDesc(); | |||||
(void)AttrUtils::SetStr(data_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, "IteratorV2"); | |||||
data_desc->SetOpKernelLibName("GE"); | |||||
auto output_desc = netoutput->GetOpDesc(); | |||||
output_desc->SetOpKernelLibName("output"); | |||||
builder.AddDataEdge(data, 0, addn1, 0); | builder.AddDataEdge(data, 0, addn1, 0); | ||||
builder.AddControlEdge(addn1, netoutput); | builder.AddControlEdge(addn1, netoutput); | ||||
return builder.GetGraph(); | return builder.GetGraph(); | ||||
@@ -112,7 +114,7 @@ TEST_F(UtestTaskGeneratorTest, AutoFindFpOpIndex) { | |||||
TEST_F(UtestTaskGeneratorTest, FindLastBpFromBpNode) { | TEST_F(UtestTaskGeneratorTest, FindLastBpFromBpNode) { | ||||
auto graph = BuildGraphBpProfiling(); | auto graph = BuildGraphBpProfiling(); | ||||
TaskGenerator task_generator(nullptr, 0); | TaskGenerator task_generator(nullptr, 0); | ||||
auto net_output = graph->FindNode("netoutput"); | |||||
auto net_output = graph->FindNode("Node_Output"); | |||||
// netoutput has no data input, return default value 0 | // netoutput has no data input, return default value 0 | ||||
EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); | EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); | ||||
} | } | ||||
@@ -138,3 +140,17 @@ TEST_F(UtestTaskGeneratorTest, UpdateOpIsVarAttr) { | |||||
MemManager::Instance().Finalize(); | MemManager::Instance().Finalize(); | ||||
} | } | ||||
TEST_F(UtestTaskGeneratorTest, AutoFindBpOpIndex) { | |||||
auto graph = BuildGraphBpProfiling(); | |||||
TaskGenerator task_generator(nullptr, 0); | |||||
auto net_output = graph->FindNode("Node_Output"); | |||||
ProfilingPoint profiling_point; | |||||
vector<uint32_t> all_reduce_nodes; | |||||
EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); | |||||
auto output_desc = net_output->GetOpDesc(); | |||||
output_desc->SetType("HcomAllReduce"); | |||||
output_desc->SetName("hcom"); | |||||
EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); | |||||
} |
@@ -114,6 +114,7 @@ | |||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
#include "graph/common/omg_util.h" | #include "graph/common/omg_util.h" | ||||
#include "common/formats/utils/formats_trans_utils.h" | #include "common/formats/utils/formats_trans_utils.h" | ||||
#include "../passes/graph_builder_utils.h" | |||||
#include "register/custom_pass_helper.h" | #include "register/custom_pass_helper.h" | ||||
#include "graph/ops_stub.h" | #include "graph/ops_stub.h" | ||||
#include "ge_attr_value.h" | #include "ge_attr_value.h" | ||||
@@ -151,6 +152,24 @@ void CreateGraph(Graph &graph) { | |||||
// Graph graph("test_graph"); | // Graph graph("test_graph"); | ||||
graph.SetInputs(inputs).SetOutputs(outputs).SetTargets(targets); | graph.SetInputs(inputs).SetOutputs(outputs).SetTargets(targets); | ||||
} | } | ||||
/* Data | |||||
* | | |||||
* Relu Const | |||||
* | | |||||
* Netoutput | |||||
*/ | |||||
ge::ComputeGraphPtr CreateGraphWithIsolatedConst() { | |||||
ge::ut::GraphBuilder builder("graph"); | |||||
auto data = builder.AddNode("data", "Data", 1, 1); | |||||
auto relu = builder.AddNode("addn1", "Relu", 1, 1); | |||||
auto netoutput = builder.AddNode("Node_Output", "NetOutput", 1, 0); | |||||
auto const1 = builder.AddNode("const1", "Const", 0, 1); | |||||
builder.AddDataEdge(data, 0, relu, 0); | |||||
builder.AddDataEdge(relu, 0, netoutput, 0); | |||||
return builder.GetGraph(); | |||||
} | |||||
TEST_F(UtestGraphManagerTest, set_and_get_add_graph_flag) { | TEST_F(UtestGraphManagerTest, set_and_get_add_graph_flag) { | ||||
GraphId graph_id = 1; | GraphId graph_id = 1; | ||||
@@ -558,3 +577,32 @@ TEST_F(UtestGraphManagerTest, test_prerunthread_failed_2) { | |||||
// auto ret = graph_manager.ParseInputsDimsForGetNexNosinkAndData(nodes, input_tensors); | // auto ret = graph_manager.ParseInputsDimsForGetNexNosinkAndData(nodes, input_tensors); | ||||
// EXPECT_EQ(ret, ge::SUCCESS); | // EXPECT_EQ(ret, ge::SUCCESS); | ||||
// } | // } | ||||
TEST_F(UtestGraphManagerTest, ChangeAndDeleteConst_success) { | |||||
std::map<string, string> options_map; | |||||
options_map.insert({ge::RUN_FLAG, "0"}); | |||||
ge::GetThreadLocalContext().SetGraphOption(options_map); | |||||
GraphId graph_id = 1; | |||||
GraphManager graph_manager; | |||||
graph_manager.options_.train_graph_flag = true; | |||||
auto graph = CreateGraphWithIsolatedConst(); | |||||
Status status = graph_manager.ChangeConstType(graph); | |||||
EXPECT_EQ(status, ge::SUCCESS); | |||||
auto constant1 = graph->FindFirstNodeMatchType("Constant"); | |||||
EXPECT_EQ(constant1, nullptr); | |||||
options_map.clear(); | |||||
options_map.insert({ge::RUN_FLAG, "1"}); | |||||
ge::GetThreadLocalContext().SetGraphOption(options_map); | |||||
status = graph_manager.ChangeConstType(graph); | |||||
EXPECT_EQ(status, ge::SUCCESS); | |||||
constant1 = graph->FindFirstNodeMatchType("Constant"); | |||||
EXPECT_NE(constant1, nullptr); | |||||
status = graph_manager.RemoveIsolatedConstInThisGraph(graph); | |||||
EXPECT_EQ(status, ge::SUCCESS); | |||||
auto all_nodes = graph->GetDirectNode(); | |||||
EXPECT_EQ(all_nodes.size(), 3); | |||||
} |