@@ -771,6 +771,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
GELOGI("Start AutoFindBpOpIndex"); | |||
NodePtr bp_node = nullptr; | |||
uint32_t current_idx = 0; | |||
uint32_t netoutput_idx = 0; | |||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | |||
OpDescPtr op_desc = node->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
@@ -788,6 +789,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT) { | |||
if (bp_node == nullptr) { | |||
bp_node = node; | |||
netoutput_idx = current_idx - 1; | |||
} | |||
} | |||
if (graph->GetNeedIteration()) { | |||
@@ -812,9 +814,13 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||
if (bp_node == nullptr) { | |||
GELOGW("not find bp_node."); | |||
return SUCCESS; | |||
} else if (bp_node->GetName() == NODE_NAME_NET_OUTPUT) { | |||
profiling_point.bp_index = netoutput_idx; | |||
GELOGI("First bp name %s, idx %u", bp_node->GetName().c_str(), netoutput_idx); | |||
} else { | |||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||
} | |||
profiling_point.bp_index = FindLastBpFromBpNode(graph, bp_node); | |||
return SUCCESS; | |||
} | |||
@@ -984,11 +984,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||
return SUCCESS; | |||
} | |||
GELOGI("Init data node: %s.", op_desc->GetName().c_str()); | |||
auto data_index = data_op_index++; | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | |||
const auto &index_attr = GraphUtils::FindRootGraph(graph) == graph ? ATTR_NAME_INDEX : ATTR_NAME_PARENT_NODE_INDEX; | |||
if (AttrUtils::GetInt(op_desc, index_attr, data_index)) { | |||
GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); | |||
} | |||
GELOGI("Init data node: %s, index: %u.", op_desc->GetName().c_str(), data_index); | |||
data_by_index[data_index] = op_desc; | |||
if (known_node_) { | |||
@@ -4007,13 +4008,11 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
iterator_count_++; | |||
} | |||
if (!is_async_mode_) { | |||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); | |||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, | |||
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); | |||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); | |||
} | |||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START)); | |||
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR, | |||
"[Copy][OutputData] to user failed, ret:%d, model_id:%u.", ret, model_id_); | |||
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END)); | |||
// report model time data | |||
GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data)); | |||
@@ -120,6 +120,7 @@ const char *const kCheckPointForGetVar = "CheckPointGraphForGetVar"; | |||
const char *const kCheckPointGraph = "checkpoint_graph"; | |||
const char *const kVectorEngine = "VectorEngine"; | |||
const char *const kAIcoreEngine = "AIcoreEngine"; | |||
const char *const kRunFlagOffline = "0"; | |||
const int32_t kDynamicDimsTypeIsGetNext = 0; | |||
const int32_t kDynamicDimsTypeIsData = 1; | |||
const char *const kGetNextName = "IteratorV2"; | |||
@@ -2434,8 +2435,6 @@ Status GraphManager::RemoveIsolatedConstInThisGraph(ge::ComputeGraphPtr &compute | |||
continue; | |||
} | |||
if (n->GetOpDesc()->GetType() == CONSTANT || n->GetOpDesc()->GetType() == CONSTANTOP) { | |||
// reset const type depend on train_flag | |||
options_.train_graph_flag ? n->GetOpDesc()->SetType(CONSTANTOP) : n->GetOpDesc()->SetType(CONSTANT); | |||
if (n->GetOutAllNodes().empty() && n->GetInAllNodes().empty()) { | |||
// it is an isolated constant, just remove it | |||
if (GraphUtils::RemoveJustNode(compute_graph, n) != GRAPH_SUCCESS) { | |||
@@ -2762,21 +2761,35 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
"Please pay attention to it."); | |||
} | |||
ChangeConstTypeWhenTraining(compute_graph); | |||
GE_CHK_STATUS_RET(ChangeConstType(compute_graph)); | |||
GELOGI("End optimize after merge sub graph."); | |||
return SUCCESS; | |||
} | |||
void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph) { | |||
// The constant for train is CONSTANTOP, and is CONSTANT for inference. They will be unified in future. | |||
if (options_.train_graph_flag) { | |||
for (NodePtr &n : compute_graph->GetAllNodes()) { | |||
// This can ensure that n is not a null pointer | |||
if (n->GetOpDesc()->GetType() == CONSTANT) { | |||
n->GetOpDesc()->SetType(CONSTANTOP); | |||
Status GraphManager::ChangeConstType(const ComputeGraphPtr &compute_graph) { | |||
// run_flag off means offline, on means online | |||
string run_flag; | |||
(void)ge::GetContext().GetOption(ge::RUN_FLAG, run_flag); | |||
// The constant for online is CONSTANTOP, and is CONSTANT for offline. They will be unified in future. | |||
if (run_flag == kRunFlagOffline) { | |||
GELOGI("Offline mode, change all Constant to Const."); | |||
} else { | |||
GELOGI("Online mode, change all Const to Constant."); | |||
} | |||
for (NodePtr &n : compute_graph->GetAllNodes()) { | |||
GE_CHECK_NOTNULL(n); | |||
if (n->GetType() == CONSTANT || n->GetType() == CONSTANTOP) { | |||
auto op_desc = n->GetOpDesc(); | |||
GE_CHECK_NOTNULL(op_desc); | |||
if (run_flag == kRunFlagOffline) { | |||
op_desc->SetType(CONSTANT); | |||
} else { | |||
op_desc->SetType(CONSTANTOP); | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) { | |||
@@ -375,7 +375,7 @@ class GraphManager { | |||
static void ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_node, RunAsyncCallback callback, | |||
Status ret, const string &log); | |||
void ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_graph); | |||
Status ChangeConstType(const ComputeGraphPtr &compute_graph); | |||
Status PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, const std::vector<GeTensor> &inputs, | |||
ge::ComputeGraphPtr &compute_graph, uint64_t session_id); | |||
@@ -47,6 +47,11 @@ Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { | |||
return RT_FAILED; | |||
} | |||
if (value == RT_CAPABILITY_NOT_SUPPORT) { | |||
GELOGW("Not support zero copy, skip it."); | |||
return SUCCESS; | |||
} | |||
for (auto &node : graph->GetAllNodes()) { | |||
auto op_desc = node->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||
@@ -428,7 +428,7 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const | |||
} | |||
int64_t expected_size; | |||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, expected_size)); | |||
(void)TensorUtils::GetSize(*tensor_desc, expected_size); | |||
GELOGD("[%s] Input[%d] expects [%ld] bytes.", task_context.GetNodeName(), i, expected_size); | |||
auto size_diff = expected_size - static_cast<int64_t>(input_tensor->GetSize()); | |||
if (size_diff > 0) { | |||
@@ -60,10 +60,12 @@ class UtestTaskGeneratorTest : public testing::Test { | |||
ge::ut::GraphBuilder builder("graph"); | |||
auto data = builder.AddNode("data", "phony", 1, 1); | |||
auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); | |||
auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0); | |||
auto op_desc = data->GetOpDesc(); | |||
(void)AttrUtils::SetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, "IteratorV2"); | |||
op_desc->SetOpKernelLibName("GE"); | |||
auto netoutput = builder.AddNode("Node_Output", "NetOutput", 2, 0); | |||
auto data_desc = data->GetOpDesc(); | |||
(void)AttrUtils::SetStr(data_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, "IteratorV2"); | |||
data_desc->SetOpKernelLibName("GE"); | |||
auto output_desc = netoutput->GetOpDesc(); | |||
output_desc->SetOpKernelLibName("output"); | |||
builder.AddDataEdge(data, 0, addn1, 0); | |||
builder.AddControlEdge(addn1, netoutput); | |||
return builder.GetGraph(); | |||
@@ -112,7 +114,7 @@ TEST_F(UtestTaskGeneratorTest, AutoFindFpOpIndex) { | |||
TEST_F(UtestTaskGeneratorTest, FindLastBpFromBpNode) { | |||
auto graph = BuildGraphBpProfiling(); | |||
TaskGenerator task_generator(nullptr, 0); | |||
auto net_output = graph->FindNode("netoutput"); | |||
auto net_output = graph->FindNode("Node_Output"); | |||
// netoutput has no data input, return default value 0 | |||
EXPECT_EQ(task_generator.FindLastBpFromBpNode(graph, net_output), 0); | |||
} | |||
@@ -138,3 +140,17 @@ TEST_F(UtestTaskGeneratorTest, UpdateOpIsVarAttr) { | |||
MemManager::Instance().Finalize(); | |||
} | |||
TEST_F(UtestTaskGeneratorTest, AutoFindBpOpIndex) { | |||
auto graph = BuildGraphBpProfiling(); | |||
TaskGenerator task_generator(nullptr, 0); | |||
auto net_output = graph->FindNode("Node_Output"); | |||
ProfilingPoint profiling_point; | |||
vector<uint32_t> all_reduce_nodes; | |||
EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); | |||
auto output_desc = net_output->GetOpDesc(); | |||
output_desc->SetType("HcomAllReduce"); | |||
output_desc->SetName("hcom"); | |||
EXPECT_EQ(task_generator.AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes), SUCCESS); | |||
} |
@@ -114,6 +114,7 @@ | |||
#include "graph/common/local_context.h" | |||
#include "graph/common/omg_util.h" | |||
#include "common/formats/utils/formats_trans_utils.h" | |||
#include "../passes/graph_builder_utils.h" | |||
#include "register/custom_pass_helper.h" | |||
#include "graph/ops_stub.h" | |||
#include "ge_attr_value.h" | |||
@@ -151,6 +152,24 @@ void CreateGraph(Graph &graph) { | |||
// Graph graph("test_graph"); | |||
graph.SetInputs(inputs).SetOutputs(outputs).SetTargets(targets); | |||
} | |||
/* Data | |||
* | | |||
* Relu Const | |||
* | | |||
* Netoutput | |||
*/ | |||
ge::ComputeGraphPtr CreateGraphWithIsolatedConst() { | |||
ge::ut::GraphBuilder builder("graph"); | |||
auto data = builder.AddNode("data", "Data", 1, 1); | |||
auto relu = builder.AddNode("addn1", "Relu", 1, 1); | |||
auto netoutput = builder.AddNode("Node_Output", "NetOutput", 1, 0); | |||
auto const1 = builder.AddNode("const1", "Const", 0, 1); | |||
builder.AddDataEdge(data, 0, relu, 0); | |||
builder.AddDataEdge(relu, 0, netoutput, 0); | |||
return builder.GetGraph(); | |||
} | |||
TEST_F(UtestGraphManagerTest, set_and_get_add_graph_flag) { | |||
GraphId graph_id = 1; | |||
@@ -558,3 +577,32 @@ TEST_F(UtestGraphManagerTest, test_prerunthread_failed_2) { | |||
// auto ret = graph_manager.ParseInputsDimsForGetNexNosinkAndData(nodes, input_tensors); | |||
// EXPECT_EQ(ret, ge::SUCCESS); | |||
// } | |||
TEST_F(UtestGraphManagerTest, ChangeAndDeleteConst_success) { | |||
std::map<string, string> options_map; | |||
options_map.insert({ge::RUN_FLAG, "0"}); | |||
ge::GetThreadLocalContext().SetGraphOption(options_map); | |||
GraphId graph_id = 1; | |||
GraphManager graph_manager; | |||
graph_manager.options_.train_graph_flag = true; | |||
auto graph = CreateGraphWithIsolatedConst(); | |||
Status status = graph_manager.ChangeConstType(graph); | |||
EXPECT_EQ(status, ge::SUCCESS); | |||
auto constant1 = graph->FindFirstNodeMatchType("Constant"); | |||
EXPECT_EQ(constant1, nullptr); | |||
options_map.clear(); | |||
options_map.insert({ge::RUN_FLAG, "1"}); | |||
ge::GetThreadLocalContext().SetGraphOption(options_map); | |||
status = graph_manager.ChangeConstType(graph); | |||
EXPECT_EQ(status, ge::SUCCESS); | |||
constant1 = graph->FindFirstNodeMatchType("Constant"); | |||
EXPECT_NE(constant1, nullptr); | |||
status = graph_manager.RemoveIsolatedConstInThisGraph(graph); | |||
EXPECT_EQ(status, ge::SUCCESS); | |||
auto all_nodes = graph->GetDirectNode(); | |||
EXPECT_EQ(all_nodes.size(), 3); | |||
} |