Browse Source

!1214 add superkernel off attr for graph

From: @ni100die
Reviewed-by: @xchu42,@ji_chen
Signed-off-by:
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
e119b3c951
5 changed files with 31 additions and 18 deletions
  1. +6
    -6
      ge/graph/build/memory/graph_mem_assigner.cc
  2. +6
    -3
      ge/graph/build/model_builder.cc
  3. +7
    -7
      ge/graph/load/model_manager/davinci_model.cc
  4. +9
    -0
      ge/graph/manager/graph_manager.cc
  5. +3
    -2
      ge/host_kernels/dynamic_stitch_kernel.cc

+ 6
- 6
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -434,7 +434,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
"Assign node %s continuous input memory failed.", node->GetName().c_str())
}
for (auto pair : memory_offset_) {
GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
GELOGD("After reassign continuous memory, memory type = %ld, mem_offset = %zu.", pair.first,
pair.second.mem_offset_);
}
return ge::SUCCESS;
@@ -512,11 +512,11 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
peer_op_desc->SetOutputOffset(output_list);
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(),
GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld.", node->GetName().c_str(),
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(),
output_list_this.at(out2ins.begin()->first), peer_output_offset);
} else {
GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(),
GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
}
// first input is beginning offset
@@ -542,7 +542,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
}

GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
"size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
"size[%zu] realsize[%ld] nopadding size[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
@@ -1549,7 +1549,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
auto continuous_type = iter->second;
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
if (continuous_input) {
GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.",
GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly.",
input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
return false;
}
@@ -1559,7 +1559,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
node_2_continuous_type.emplace(out_node, continuous_type);
bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
if (continuous_input) {
GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.",
GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly.",
input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
return false;
}


+ 6
- 3
ge/graph/build/model_builder.cc View File

@@ -366,8 +366,11 @@ void ModelBuilder::InitL1FusionOption() {
string buffer_optimize = "off_optimize";
graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize);
if (ret == GRAPH_SUCCESS) {
is_l1_fusion_enable_ = (buffer_optimize == "l1_optimize");
GELOGD("The value of %s is %s.", BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str());
bool off_superkernel = false;
(void)AttrUtils::GetBool(compute_graph_, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel);
is_l1_fusion_enable_ = ((buffer_optimize == "l1_optimize") && (!off_superkernel));
GELOGI("Compute graph %s the value of %s is %s, superkernel flag %d.", compute_graph_->GetName().c_str(),
BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str(), is_l1_fusion_enable_);
} else {
GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str());
}
@@ -709,7 +712,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
GE_TIMESTAMP_START(SetInputOutputOffset);
SetInputOutputOffsetPass input_output_offset;
GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed.");
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run.");
GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run");

// Compile single op in graph build stage
GE_TIMESTAMP_START(CompileSingleOp);


+ 7
- 7
ge/graph/load/model_manager/davinci_model.cc View File

@@ -532,20 +532,20 @@ Status DavinciModel::DoTaskSink() {
GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed.");

if (known_node_) {
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed.");
GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node's args failed");
}

GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed.");
GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed");

GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed");

GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed");

GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");
GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed");

GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed.");
GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed");

GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed");

GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_));



+ 9
- 0
ge/graph/manager/graph_manager.cc View File

@@ -3090,6 +3090,15 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
sub_graph->SetSessionID(session_id);
sub_graph->SetGraphID(graph_node->GetGraphId());
}
bool off_superkernel = false;
if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel);
if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(),
off_superkernel);
return FAILED;
}
}
GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph");
GE_DUMP(merged_compute_graph, "mergedComputeGraph");
compute_graph = merged_compute_graph;


+ 3
- 2
ge/host_kernels/dynamic_stitch_kernel.cc View File

@@ -111,8 +111,9 @@ void DynamicStitchKernel::ComputeMergedShape(const vector<ConstGeTensorPtr> &inp
int32_t merged_first_dim = 0;
int64_t indices_shape_size = 0;
for (int i = 0; i < n_; i++) {
indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize();
indices_shape_size = indices_shape_size == 0 ? 1 : indices_shape_size;
// shape is [] means scalar
indices_shape_size =
input[i]->GetTensorDesc().GetShape().GetDims().empty() ? 1 : input[i]->GetTensorDesc().GetShape().GetShapeSize();
const int32_t *input_indices = reinterpret_cast<const int32_t *>(input[i]->GetData().data());
for (int64_t j = 0; j < indices_shape_size; j++) {
merged_first_dim = std::max(merged_first_dim, input_indices[j]);


Loading…
Cancel
Save