From e3d2723cb6a16aa5b7ca23172c957499f363452e Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Mon, 8 Mar 2021 15:34:42 +0800
Subject: [PATCH 1/4] add superkernel off attr for graph

---
 ge/graph/build/model_builder.cc   | 7 +++++--
 ge/graph/manager/graph_manager.cc | 9 +++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 8d4a17d8..04126f92 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -366,8 +366,11 @@ void ModelBuilder::InitL1FusionOption() {
   string buffer_optimize = "off_optimize";
   graphStatus ret = ge::GetContext().GetOption(BUFFER_OPTIMIZE, buffer_optimize);
   if (ret == GRAPH_SUCCESS) {
-    is_l1_fusion_enable_ = (buffer_optimize == "l1_optimize");
-    GELOGD("The value of %s is %s.", BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str());
+    bool off_superkernel = false;
+    (void)AttrUtils::GetBool(compute_graph_, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel);
+    is_l1_fusion_enable_ = ((buffer_optimize == "l1_optimize") && (!off_superkernel));
+    GELOGI("Compute graph %s the value of %s is %s, superkernel flag %d.", compute_graph_->GetName().c_str(),
+           BUFFER_OPTIMIZE.c_str(), buffer_optimize.c_str(), is_l1_fusion_enable_);
   } else {
     GELOGW("The value of %s is empty.", kEnableL1Fusion.c_str());
   }
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index 1cbb3fc8..5c97b12e 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -3090,6 +3090,15 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
     sub_graph->SetSessionID(session_id);
     sub_graph->SetGraphID(graph_node->GetGraphId());
   }
+  bool off_superkernel = false;
+  if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
+    GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel);
+    if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
+      GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(),
+             off_superkernel);
+      return FAILED;
+    }
+  }
   GE_TIMESTAMP_EVENT_END(MergeSubgraph, "OptimizeSubgraph::MergeSubGraph");
   GE_DUMP(merged_compute_graph, "mergedComputeGraph");
   compute_graph = merged_compute_graph;

From 1ada541c75a14b5a1c8f19644d2622a086dbef5a Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Tue, 9 Mar 2021 19:51:26 +0800
Subject: [PATCH 2/4] 1.add superkernel off attr for graph 2.bugfix for
 dynamic_stitch_kernel.cc

---
 ge/host_kernels/dynamic_stitch_kernel.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ge/host_kernels/dynamic_stitch_kernel.cc b/ge/host_kernels/dynamic_stitch_kernel.cc
index 3037934e..52f6cdcf 100644
--- a/ge/host_kernels/dynamic_stitch_kernel.cc
+++ b/ge/host_kernels/dynamic_stitch_kernel.cc
@@ -111,8 +111,9 @@ void DynamicStitchKernel::ComputeMergedShape(const vector<ConstGeTensorPtr> &inp
   int32_t merged_first_dim = 0;
   int64_t indices_shape_size = 0;
   for (int i = 0; i < n_; i++) {
-    indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize();
-    indices_shape_size = indices_shape_size == 0 ? 1 : indices_shape_size;
+    // shape is [] means scalar
+    indices_shape_size =
+      input[i]->GetTensorDesc().GetShape().GetDims().empty() ? 1 : input[i]->GetTensorDesc().GetShape().GetShapeSize();
     const int32_t *input_indices = reinterpret_cast<const int32_t *>(input[i]->GetData().data());
     for (int64_t j = 0; j < indices_shape_size; j++) {
       merged_first_dim = std::max(merged_first_dim, input_indices[j]);

From 279e065b448fa1b08094b381e17be86ba4139132 Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Tue, 9 Mar 2021 22:00:27 +0800
Subject: [PATCH 3/4] 1.add superkernel off attr for graph 2.bugfix for
 dynamic_stitch_kernel.cc

---
 ge/graph/build/memory/graph_mem_assigner.cc  | 12 ++++++------
 ge/graph/build/model_builder.cc              |  2 +-
 ge/graph/load/model_manager/davinci_model.cc | 14 +++++++-------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc
index f62f6875..e3736ee4 100755
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
@@ -434,7 +434,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
                       "Assign node %s continuous input memory failed.", node->GetName().c_str())
   }
   for (auto pair : memory_offset_) {
-    GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
+    GELOGD("After reassign continuous memory, memory type = %ld, mem_offset = %zu.", pair.first,
            pair.second.mem_offset_);
   }
   return ge::SUCCESS;
@@ -512,11 +512,11 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
         auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
         output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
         peer_op_desc->SetOutputOffset(output_list);
-        GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(),
+        GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld.", node->GetName().c_str(),
                out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(),
                output_list_this.at(out2ins.begin()->first), peer_output_offset);
       } else {
-        GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(),
+        GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
                out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
       }
       // first input is beginning offset
@@ -542,7 +542,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
     }
 
     GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
-        "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
+        "size[%zu] realsize[%ld] nopadding size[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
         peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
         output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
         is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
@@ -1549,7 +1549,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
     auto continuous_type = iter->second;
     bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
     if (continuous_input) {
-      GELOGI("node %s 's precursor node %s need assign continuous input memory, store node firstly.",
+      GELOGI("Node %s 's precursor node %s need assign continuous input memory, store node firstly.",
              input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
       return false;
     }
@@ -1559,7 +1559,7 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
     node_2_continuous_type.emplace(out_node, continuous_type);
     bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
     if (continuous_input) {
-      GELOGI("node %s 's succeed node %s need assign continuous input memory, store node firstly.",
+      GELOGI("Node %s 's succeed node %s need assign continuous input memory, store node firstly.",
              input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
       return false;
     }
diff --git a/ge/graph/build/model_builder.cc b/ge/graph/build/model_builder.cc
index 04126f92..1a14374d 100755
--- a/ge/graph/build/model_builder.cc
+++ b/ge/graph/build/model_builder.cc
@@ -712,7 +712,7 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
   GE_TIMESTAMP_START(SetInputOutputOffset);
   SetInputOutputOffsetPass input_output_offset;
   GE_CHK_STATUS_RET(input_output_offset.Run(compute_graph_), "Set input output offset failed.");
-  GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run.");
+  GE_TIMESTAMP_END(SetInputOutputOffset, "SetInputOutputOffsetPass::Run");
 
   // Compile single op in graph build stage
   GE_TIMESTAMP_START(CompileSingleOp);
diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index b7bb97ce..350ab08d 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -532,20 +532,20 @@ Status DavinciModel::DoTaskSink() {
   GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed.");
 
   if (known_node_) {
-    GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed.");
+    GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed");
   }
 
-  GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed.");
+  GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed");
 
-  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed");
 
-  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");
+  GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed");
 
-  GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");
+  GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed");
 
-  GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed.");
+  GE_CHK_STATUS_RET(InitL1DataDumperArgs(), "InitL1DataDumperArgs failed");
 
-  GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
+  GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed");
 
   GE_CHK_RT_RET(rtModelLoadComplete(rt_model_handle_));
 

From 6d419e00ef54a96f4c87355a93e8f378d0a23e37 Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Tue, 9 Mar 2021 22:41:15 +0800
Subject: [PATCH 4/4] 1.add superkernel off attr for graph 2.bugfix for
 dynamic_stitch_kernel.cc

---
 ge/graph/load/model_manager/davinci_model.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index 350ab08d..b052c9f7 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -532,7 +532,7 @@ Status DavinciModel::DoTaskSink() {
   GE_CHK_STATUS_RET(BindModelStream(), "Bind model stream failed.");
 
   if (known_node_) {
-    GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node args failed");
+    GE_CHK_STATUS_RET(MallocKnownArgs(), "Mallloc known node's args failed");
   }
 
   GE_CHK_STATUS_RET(InitTaskInfo(*model_task_def.get()), "InitTaskInfo failed");