From ec7bb516652e0d3f631bbf48586f6f0e6168a507 Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Thu, 6 May 2021 20:19:07 +0800
Subject: [PATCH 1/7] MemcpyAsync in aicore executor.

---
 .../node_executor/aicore/aicore_op_task.cc     | 13 ++++++-------
 tests/ut/ge/hybrid/ge_hybrid_unittest.cc       | 18 +++++++++++++++---
 2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 8bb871fb..36f65bbe 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -354,8 +354,6 @@ Status AiCoreOpTask::PrepareWithShape(TaskContext &context) {
 Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
   auto node = context.GetNodeItem().node;
   GE_CHECK_NOTNULL(node);
-  auto op_desc = node->GetOpDesc();
-  GE_CHECK_NOTNULL(op_desc);
 
   GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str());
   OpRunInfo tiling_info;
@@ -370,12 +368,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
 
   // update op args by tiling info
   block_dim_ = static_cast<uint32_t>(tiling_info.block_dim);
-  op_desc->SetWorkspaceBytes(tiling_info.workspaces);
   clear_atomic_ = tiling_info.clear_atomic;
-
   tiling_data_ = tiling_info.tiling_data.str();
   tiling_key_ = tiling_info.tiling_key;
   GELOGD("Successfully getting [tiling_key] : %u", tiling_key_);
+
+  auto op_desc = node->GetOpDesc();
+  GE_CHECK_NOTNULL(op_desc);
+  op_desc->SetWorkspaceBytes(tiling_info.workspaces);
   if (tiling_data_.empty()) {
     GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str());
     return SUCCESS;
@@ -401,9 +401,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
   }
 
   RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] Start");
-  GE_CHK_RT_RET(rtMemcpy(tiling_buffer_->GetData(), tiling_buffer_->GetSize(),
-                         tiling_data_.c_str(), tiling_data_.size(),
-                         RT_MEMCPY_HOST_TO_DEVICE));
+  GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), tiling_data_.c_str(),
+                              tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, context.GetStream()));
   RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] End");
 
   GELOGD("[%s] Done updating tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str());
diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
index b5aac527..4eae475d 100644
--- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
+++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc
@@ -111,14 +111,26 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) {
 
 TEST_F(UtestGeHybrid, task_update_tiling_info) {
   auto aicore_task = std::unique_ptr<hybrid::AiCoreOpTask>(new(std::nothrow)hybrid::AiCoreOpTask());
-  aicore_task->is_single_op_ = true;
   auto graph = make_shared<ComputeGraph>("graph");
   OpDescPtr op_desc = CreateOpDesc("Add", "Add");
   ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key");
   ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json");
+  ge::AttrUtils::SetBool(op_desc, "support_dynamicshape", true);
+  ge::AttrUtils::SetInt(op_desc, "op_para_size", 1);
   auto node = graph->AddNode(op_desc);
-  optiling::OpRunInfo tiling_info;
-  ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS);
+
+  std::unique_ptr<NodeItem> node_item;
+  NodeItem::Create(node, node_item);
+  node_item->input_start = 0;
+  node_item->output_start = 0;
+
+  GraphExecutionContext execution_context;
+  SubgraphContext subgraph_context(nullptr, &execution_context);
+  NodeState node_state(*node_item, &subgraph_context);
+  auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context);
+  ASSERT_TRUE(task_context != nullptr);
+  ASSERT_EQ(aicore_task->InitTilingInfo(*op_desc), SUCCESS);
+  ASSERT_EQ(aicore_task->UpdateTilingInfo(*task_context), SUCCESS);
 }
 
 TEST_F(UtestGeHybrid, index_taskdefs_failed) {

From a48a3fa01c13805616085c40200372f05cdff97b Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Thu, 6 May 2021 20:39:31 +0800
Subject: [PATCH 2/7] MemcpyAsync in aicore executor.

---
 ge/graph/passes/reshape_recovery_pass.cc               |  8 +++++++-
 ge/hybrid/node_executor/aicore/aicore_op_task.cc       | 10 +++++-----
 .../ge/graph/passes/reshape_recovery_pass_unittest.cc  |  8 ++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc
index 7a9d085b..ba12ba15 100644
--- a/ge/graph/passes/reshape_recovery_pass.cc
+++ b/ge/graph/passes/reshape_recovery_pass.cc
@@ -60,7 +60,7 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
              node->GetName().c_str(), src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx());
       GE_CHECK_NOTNULL(dst_node);
       GE_CHECK_NOTNULL(dst_node->GetOpDesc());
-      auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx());
+      auto dst_tensor = dst_node->GetOpDesc()->MutableInputDesc(dst_anchor->GetIdx());
       GE_CHECK_NOTNULL(dst_tensor);
       bool is_dynamic = false;
       const auto &src_tensor_dims = src_tensor->GetShape().GetDims();
@@ -71,6 +71,12 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
                dst_node->GetName().c_str());
         is_dynamic = true;
       }
+      if (dst_node->GetType() == NETOUTPUT && is_dynamic) {
+        // NetOutput shape must be continuous when dynamic shape.
+        // Otherwise, there may be an error waiting for the shape refresh to time out during execution.
+        dst_tensor->SetShape(src_tensor->GetShape());
+        continue;
+      }
       bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims &&
                                     !is_dynamic;
       if (is_need_insert_reshape) {
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 36f65bbe..68fbf93b 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -354,6 +354,8 @@ Status AiCoreOpTask::PrepareWithShape(TaskContext &context) {
 Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
   auto node = context.GetNodeItem().node;
   GE_CHECK_NOTNULL(node);
+  auto op_desc = node->GetOpDesc();
+  GE_CHECK_NOTNULL(op_desc);
 
   GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str());
   OpRunInfo tiling_info;
@@ -368,16 +370,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
 
   // update op args by tiling info
   block_dim_ = static_cast<uint32_t>(tiling_info.block_dim);
+  op_desc->SetWorkspaceBytes(tiling_info.workspaces);
   clear_atomic_ = tiling_info.clear_atomic;
+
   tiling_data_ = tiling_info.tiling_data.str();
   tiling_key_ = tiling_info.tiling_key;
   GELOGD("Successfully getting [tiling_key] : %u", tiling_key_);
-
-  auto op_desc = node->GetOpDesc();
-  GE_CHECK_NOTNULL(op_desc);
-  op_desc->SetWorkspaceBytes(tiling_info.workspaces);
   if (tiling_data_.empty()) {
-    GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str());
+    GELOGD("[%s] Tiling data is empty.", op_desc->GsetName().c_str());
     return SUCCESS;
   }
   if (tiling_buffer_ == nullptr) {
diff --git a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
index af60021c..3be11452 100644
--- a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
+++ b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
@@ -42,8 +42,8 @@ ut::GraphBuilder Graph1Builder() {
   auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1});
   auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224});
   auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224});
-  auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224});
-  auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0);
+  auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {-1, 224});
+  auto netoutput1 = builder.AddNode("netoutput1", "NetOutput", 2, 0);
 
   builder.AddDataEdge(var1, 0, transdata1, 0);
   builder.AddDataEdge(const1, 0, transdata2, 0);
@@ -58,10 +58,10 @@ TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) {
   auto builder = Graph1Builder();
   auto graph = builder.GetGraph();
   ReshapeRecoveryPass reshape_recovery_pass;
-  EXPECT_EQ(graph->GetDirectNodesSize(),5);
+  EXPECT_EQ(graph->GetDirectNodesSize(), 5);
   Status ret = reshape_recovery_pass.Run(graph);
   EXPECT_EQ(ret, SUCCESS);
-  EXPECT_EQ(graph->GetDirectNodesSize(),8);
+  EXPECT_EQ(graph->GetDirectNodesSize(), 7);
 
   auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0");
   EXPECT_NE(reshape1, nullptr);

From 5f63b5ee912e9cd3146987d9dfce50c55ac763f3 Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Thu, 6 May 2021 20:41:07 +0800
Subject: [PATCH 3/7] MemcpyAsync in aicore executor.

---
 ge/hybrid/node_executor/aicore/aicore_op_task.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 68fbf93b..0fcc6299 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -377,7 +377,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
   tiling_key_ = tiling_info.tiling_key;
   GELOGD("Successfully getting [tiling_key] : %u", tiling_key_);
   if (tiling_data_.empty()) {
-    GELOGD("[%s] Tiling data is empty.", op_desc->GsetName().c_str());
+    GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str());
     return SUCCESS;
   }
   if (tiling_buffer_ == nullptr) {

From e93b37621f9331ab1200144e613efd5cdb7824dc Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Fri, 7 May 2021 15:39:08 +0800
Subject: [PATCH 4/7] Optimize performance of single_op executor.

---
 ge/single_op/single_op.cc                     | 32 +++++++++++++++++++
 ge/single_op/single_op.h                      |  3 ++
 ge/single_op/single_op_model.cc               | 32 +++++++++++++++++++
 ge/single_op/single_op_model.h                |  2 ++
 .../ge/single_op/single_op_model_unittest.cc  | 17 ++++++++++
 tests/ut/ge/single_op/single_op_unittest.cc   | 21 +++++++++++-
 6 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc
index 4b3f17cf..e326f3e0 100755
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -361,6 +361,37 @@ Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, u
   return SUCCESS;
 }
 
+Status DynamicSingleOp::SetHostTensorValue(const vector<GeTensorDesc> &input_desc,
+                                           const vector<DataBuffer> &input_buffers) {
+  for (auto &tensor_map : tensor_with_hostmem_) {
+    auto index = tensor_map.first;
+    if (index >= input_desc.size() || index >= input_buffers.size()) {
+      GELOGE(INTERNAL_ERROR, "[Check][Size]Index %d should smaller then input desc size %zu "
+             "and input buffers size %zu.", index, input_desc.size(), input_buffers.size());
+      return INTERNAL_ERROR;
+    }
+    auto ge_tensor_desc = input_desc[index];
+    // reconstruct GeTensor by DataBuffer
+    GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc);
+    GE_CHECK_NOTNULL(ge_tensor);
+    GELOGD("The %d tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
+           index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
+    if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
+                           static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor.");
+      return INTERNAL_ERROR;
+    }
+    for (auto &tensor_desc : tensor_map.second) {
+      GE_CHECK_NOTNULL(tensor_desc);
+      if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) {
+        GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE.");
+        return FAILED;
+      }
+    }
+  }
+  return SUCCESS;
+}
+
 Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
                                      const vector<DataBuffer> &input_buffers,
                                      vector<GeTensorDesc> &output_desc,
@@ -374,6 +405,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
   if (!inputs_size.empty()) {
     StreamResource *stream_resource  = SingleOpManager::GetInstance().GetResource(resource_id_, stream_);
     GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers));
+    GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers));
   }
 
   if (hybrid_model_executor_ != nullptr) {
diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h
index 01d6dfc0..deb4532e 100755
--- a/ge/single_op/single_op.h
+++ b/ge/single_op/single_op.h
@@ -81,9 +81,12 @@ class DynamicSingleOp {
                         std::vector<DataBuffer> &outputs) const;
   Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
                             const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers);
+  Status SetHostTensorValue(const vector<GeTensorDesc> &input_desc, const vector<DataBuffer> &input_buffers);
   std::unique_ptr<OpTask> op_task_;
   std::unique_ptr<hybrid::HybridModel> hybrid_model_;
   std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
+  std::map<int32_t, std::vector<GeTensorDescPtr>> tensor_with_hostmem_;
+
   uintptr_t resource_id_ = 0;
   std::mutex *stream_mutex_;
   rtStream_t stream_ = nullptr;
diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc
index a4135999..6959c6b3 100755
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -235,6 +235,13 @@ Status SingleOpModel::LoadAllNodes() {
 
     if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) {
       data_ops_.emplace_back(op_desc);
+      auto tensor = op_desc->MutableInputDesc(0);
+      if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
+        int32_t index = 0;
+        (void) AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index);
+        GELOGD("Node %s, index %d, has host mem.", node->GetName().c_str(), index);
+        op_with_hostmem_[index] = node;
+      }
       continue;
     }
 
@@ -616,6 +623,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
   if (need_hybrid_model) {
     GELOGD("Build single op HybridModel.");
     GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
+    GE_CHK_STATUS(SetHostMemTensor(single_op), "[Init][HostMem]Failed.");
     auto root_model = model_helper_.GetGeRootModel();
     GE_CHECK_NOTNULL(root_model);
     root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
@@ -634,4 +642,28 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
   }
   return BuildTaskListForDynamicOp(&resource, single_op);
 }
+
+Status SingleOpModel::SetHostMemTensor(DynamicSingleOp &single_op) {
+  for (auto &node_map : op_with_hostmem_) {
+    auto node = node_map.second;
+    auto out_anchor = node->GetOutDataAnchor(0);
+    GE_CHECK_NOTNULL(out_anchor);
+    auto in_anchors = out_anchor->GetPeerInDataAnchors();
+    vector<GeTensorDescPtr> tensor_descs;
+    auto idx = node_map.first;
+    for (auto anchor : in_anchors) {
+      GE_CHECK_NOTNULL(anchor);
+      auto output_node = anchor->GetOwnerNode();
+      GE_CHECK_NOTNULL(output_node);
+      auto op_desc = output_node->GetOpDesc();
+      GE_CHECK_NOTNULL(op_desc);
+      auto tensor_desc = op_desc->MutableInputDesc(anchor->GetIdx());
+      tensor_descs.emplace_back(tensor_desc);
+      GELOGD("Get %d th input tensor desc of %s by %d data node: %s.", anchor->GetIdx(),
+             output_node->GetName().c_str(), idx, node->GetName().c_str());
+    }
+    single_op.tensor_with_hostmem_[idx] = tensor_descs;
+  }
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h
index d900f09f..e7d07ee0 100755
--- a/ge/single_op/single_op_model.h
+++ b/ge/single_op/single_op_model.h
@@ -77,6 +77,7 @@ class SingleOpModel {
   static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);
   void ParseArgTable(OpTask *task, SingleOp &op);
   Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op);
+  Status SetHostMemTensor(DynamicSingleOp &single_op);
 
   std::string model_name_;
   uint32_t model_id_ = 0;
@@ -86,6 +87,7 @@ class SingleOpModel {
   ModelHelper model_helper_;
 
   map<uint32_t, NodePtr> op_list_;
+  map<int32_t, NodePtr> op_with_hostmem_;
   SingleOpModelParam model_params_;
 
   std::vector<ptrdiff_t> input_offset_list_;
diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc
index dadabaf6..f5d1a83c 100644
--- a/tests/ut/ge/single_op/single_op_model_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_model_unittest.cc
@@ -27,6 +27,7 @@
 #include "single_op/task/tbe_task_builder.h"
 #undef private
 #undef protected
+#include "graph/passes/graph_builder_utils.h"
 
 using namespace std;
 using namespace testing;
@@ -223,3 +224,19 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) {
   model.BuildDynamicOp(res, dynamic_single_op);
 }
 
+TEST_F(UtestSingleOpModel, test_host_mem) {
+  string model_data_str = "123456789";
+  SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());
+
+  // make graph
+  ut::GraphBuilder builder = ut::GraphBuilder("graph");
+  auto data = builder.AddNode("Data", "Data", 0, 1);
+  auto netoutput = builder.AddNode("Netoutput", "NetOutput", 1, 0);
+  builder.AddDataEdge(data, 0, netoutput, 0);
+  auto graph = builder.GetGraph();
+  model.op_with_hostmem_[0] = data;
+
+  std::mutex stream_mu_;
+  DynamicSingleOp single_op(0, &stream_mu_, nullptr);
+  ASSERT_EQ(model.SetHostMemTensor(single_op), SUCCESS);
+}
diff --git a/tests/ut/ge/single_op/single_op_unittest.cc b/tests/ut/ge/single_op/single_op_unittest.cc
index 8c2f6e51..3519811b 100644
--- a/tests/ut/ge/single_op/single_op_unittest.cc
+++ b/tests/ut/ge/single_op/single_op_unittest.cc
@@ -160,4 +160,23 @@ TEST_F(UtestSingleOp, test_singleop_execute_async2) {
   EXPECT_EQ(single_op.running_param_->mem_base, nullptr);
   EXPECT_EQ(single_op.tasks_.size(), 0);
   EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID);
-}
\ No newline at end of file
+}
+
+TEST_F(UtestSingleOp, test_set_host_mem) {
+  std::mutex stream_mu_;
+  DynamicSingleOp single_op(0, &stream_mu_, nullptr);
+  
+  vector<DataBuffer> input_buffers;
+  DataBuffer data_buffer;
+  input_buffers.emplace_back(data_buffer);
+
+  vector<GeTensorDesc> input_descs;
+  GeTensorDesc tensor_desc1;
+  input_descs.emplace_back(tensor_desc1);
+
+  vector<GeTensorDescPtr> op_input_descs;
+  auto tensor_desc2 = std::make_shared<GeTensorDesc>();
+  op_input_descs.emplace_back(tensor_desc2);
+  single_op.tensor_with_hostmem_[0] = op_input_descs;
+  EXPECT_EQ(single_op.SetHostTensorValue(input_descs, input_buffers), SUCCESS);
+}

From 3ace2a36b6f927c3cd7a496d6d9bece44584be38 Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Fri, 7 May 2021 16:07:23 +0800
Subject: [PATCH 5/7] Optimize performance of single_op executor.

---
 ge/single_op/single_op.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc
index e326f3e0..36ca1850 100755
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -364,9 +364,9 @@ Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, u
 Status DynamicSingleOp::SetHostTensorValue(const vector<GeTensorDesc> &input_desc,
                                            const vector<DataBuffer> &input_buffers) {
   for (auto &tensor_map : tensor_with_hostmem_) {
-    auto index = tensor_map.first;
+    auto index = static_cast<size_t>(tensor_map.first);
     if (index >= input_desc.size() || index >= input_buffers.size()) {
-      GELOGE(INTERNAL_ERROR, "[Check][Size]Index %d should smaller then input desc size %zu "
+      GELOGE(INTERNAL_ERROR, "[Check][Size]Index %zu should smaller then input desc size %zu "
              "and input buffers size %zu.", index, input_desc.size(), input_buffers.size());
       return INTERNAL_ERROR;
     }
@@ -374,7 +374,7 @@ Status DynamicSingleOp::SetHostTensorValue(const vector<GeTensorDesc> &input_des
     // reconstruct GeTensor by DataBuffer
     GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc);
     GE_CHECK_NOTNULL(ge_tensor);
-    GELOGD("The %d tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
+    GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
            index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
     if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
                            static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {

From ef1ed8d6accab27943dab84ccfd506e87ea3091a Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Sat, 8 May 2021 09:55:13 +0800
Subject: [PATCH 6/7] Optimize performance of single_op executor.

---
 ge/hybrid/model/hybrid_model_builder.cc | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index a047a05b..7949ae7f 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -364,6 +364,16 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
     const auto &src_node = peer_out_anchor->GetOwnerNode();
     GE_CHECK_NOTNULL(src_node);
     auto src_node_item = MutableNodeItem(src_node);
+    GE_CHECK_NOTNULL(src_node_item);
+    if (src_node_item->NodeType() == DATA) {
+      auto op_desc = src_node_item->GetOpDesc();
+      GE_CHECK_NOTNULL(op_desc);
+      auto tensor = op_desc->MutableInputDesc(0);
+      if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
+        GELOGD("Skip d2h memcpy, get hostmem from node %s.", src_node_item->NodeName().c_str());
+        continue;
+      }
+    }
     src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx());
     dependent_for_shape_inference.emplace(src_node);
     host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item);

From a933ea880c8b047755915c5bb8a0c15eb6d510d4 Mon Sep 17 00:00:00 2001
From: zhaozhixuan <zhaozhixuan2@hisilicon.com>
Date: Mon, 10 May 2021 09:40:51 +0800
Subject: [PATCH 7/7] Optimize performance of single_op executor.

---
 ge/hybrid/model/hybrid_model_builder.cc | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index 7949ae7f..a047a05b 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -364,16 +364,6 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
     const auto &src_node = peer_out_anchor->GetOwnerNode();
     GE_CHECK_NOTNULL(src_node);
     auto src_node_item = MutableNodeItem(src_node);
-    GE_CHECK_NOTNULL(src_node_item);
-    if (src_node_item->NodeType() == DATA) {
-      auto op_desc = src_node_item->GetOpDesc();
-      GE_CHECK_NOTNULL(op_desc);
-      auto tensor = op_desc->MutableInputDesc(0);
-      if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
-        GELOGD("Skip d2h memcpy, get hostmem from node %s.", src_node_item->NodeName().c_str());
-        continue;
-      }
-    }
     src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx());
     dependent_for_shape_inference.emplace(src_node);
     host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item);