diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index f62f6875..e3736ee4 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -434,7 +434,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { "Assign node %s continuous input memory failed.", node->GetName().c_str()) } for (auto pair : memory_offset_) { - GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, + GELOGD("After reassign continuous memory, memory type = %ld, mem_offset = %zu.", pair.first, pair.second.mem_offset_); } return ge::SUCCESS; @@ -512,11 +512,11 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); peer_op_desc->SetOutputOffset(output_list); - GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), + GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld.", node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset); } else { - GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), + GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); } // first input is beginning offset @@ -542,7 +542,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, } GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " - "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), + "size[%zu] realsize[%ld] nopadding size[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); @@ -1549,7 +1549,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( auto continuous_type = iter->second; bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); if (continuous_input) { - GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.", + GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly.", input_continuous_node->GetName().c_str(), in_node->GetName().c_str()); return false; } @@ -1559,7 +1559,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( node_2_continuous_type.emplace(out_node, continuous_type); bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); if (continuous_input) { - GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.", + GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly.", input_continuous_node->GetName().c_str(), out_node->GetName().c_str()); return false; } diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc index 8d4a17d8..1a14374d 100755 --- a/ge/graph/build/model_builder.cc +++ b/ge/graph/build/model_builder.cc @@ -366,8 +366,11 @@ void ModelBuilder::InitL1FusionOption() { string buffer_optimize = "off_optimize"; graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize); if (ret == GRAPH_SUCCESS) { - is_l1_fusion_enable_ = (buffer_optimize == "l1_optimize"); - GELOGD("The value of %s is %s.", BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str()); + bool off_superkernel = false; + (void)AttrUtils::GetBool(compute_graph_, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel); + is_l1_fusion_enable_ = ((buffer_optimize == "l1_optimize") && (!off_superkernel)); + GELOGI("Compute graph %s the value of %s is %s, superkernel flag %d.", compute_graph_->GetName().c_str(), + BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str(), is_l1_fusion_enable_); } else { GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str()); } @@ -709,7 +712,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GE_TIMESTAMP_START(SetInputOutputOffset); SetInputOutputOffsetPass input_output_offset; GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed."); - GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run."); + GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run"); // Compile single op in graph build stage GE_TIMESTAMP_START(CompileSingleOp); diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index c2ba4bf4..933aba5a 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -532,20 +532,20 @@ Status DavinciModel::DoTaskSink() { GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed."); if (known_node_) { - GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed."); + GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node's args failed"); } - GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed."); + GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed"); - GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed."); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed"); - GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed."); + GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed"); - GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed."); + GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed"); - GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed."); + GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed"); GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_)); diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 1cbb3fc8..5c97b12e 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -3090,6 +3090,15 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra sub_graph->SetSessionID(session_id); sub_graph->SetGraphID(graph_node->GetGraphId()); } + bool off_superkernel = false; + if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { + GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel); + if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) { + GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(), + off_superkernel); + return FAILED; + } + } GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph"); GE_DUMP(merged_compute_graph, "mergedComputeGraph"); compute_graph = merged_compute_graph; diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc index 3037934e..52f6cdcf 100644 --- a/ge/host_kernels/dynamic_stitch_kernel.cc +++ b/ge/host_kernels/dynamic_stitch_kernel.cc @@ -111,8 +111,9 @@ void DynamicStitchKernel::ComputeMergedShape(const vector &inp int32_t merged_first_dim = 0; int64_t indices_shape_size = 0; for (int i = 0; i < n_; i++) { - indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize(); - indices_shape_size = indices_shape_size == 0 ? 1 : indices_shape_size; + // shape is [] means scalar + indices_shape_size = + input[i]->GetTensorDesc().GetShape().GetDims().empty() ? 1 : input[i]->GetTensorDesc().GetShape().GetShapeSize(); const int32_t *input_indices = reinterpret_cast(input[i]->GetData().data()); for (int64_t j = 0; j < indices_shape_size; j++) { merged_first_dim = std::max(merged_first_dim, input_indices[j]);