From bc0bd9b1285cd3831f4011f093a67fa3370e4386 Mon Sep 17 00:00:00 2001
From: wqtshg <wangtao123@huawei.com>
Date: Mon, 31 May 2021 22:00:45 +0800
Subject: [PATCH] Zero copy nodes are not allocated memory in the known
 subgraph

---
 ge/graph/load/model_manager/davinci_model.cc  | 20 +++++++++++++++++++
 ge/graph/load/model_manager/davinci_model.h   |  8 ++++++++
 .../load/model_manager/task_info/task_info.h  |  3 ++-
 .../compiledsubgraph/known_node_executor.cc   | 18 +++++++++--------
 .../ge/graph/load/davinci_model_unittest.cc   | 11 ++++++++++
 5 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc
index b52796c8..05c50a58 100755
--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@@ -488,6 +488,8 @@ void DavinciModel::InitRuntimeParams() {
   session_scope_mem_info.memory_size = static_cast<size_t>(ret ? value : 0);
   runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info);
 
+  ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, value);
+  runtime_param_.zero_copy_size = ret ? value : 0;
   GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str());
 }
 
@@ -4505,4 +4507,22 @@ void DavinciModel::UpdateOpIOAddrs(uint32_t task_id, uint32_t stream_id, const s
   op_desc_info->output_addrs = output_addrs;
   GELOGD("[Update][OpIOAddrs] Op [%s] update input output addr success.", op_desc_info->op_name.c_str());
 }
+
+///
+/// @ingroup ge
+/// @brief Get total useful size, in known subgraph, no need to allocate zero copy memory during initialization.
+/// @param [in] total_useful_size: total mem size - zero copy size.
+/// @return Status
+///
+Status DavinciModel::GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size) {
+  if (runtime_param_.mem_size < static_cast<uint64_t>(runtime_param_.zero_copy_size)) {
+    REPORT_CALL_ERROR("E19999", "total mem size[%lu] is less than zero copy size[%ld] ", runtime_param_.mem_size,
+                      runtime_param_.zero_copy_size);
+    GELOGE(FAILED, "[Check][TotalMemSizeExcludeZeroCopy] failed, total mem size[%lu] is less than zero copy size[%ld]",
+           runtime_param_.mem_size, runtime_param_.zero_copy_size);
+    return FAILED;
+  }
+  total_useful_size = runtime_param_.mem_size - runtime_param_.zero_copy_size;
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h
index e4898dec..8a8fb35e 100755
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@@ -248,6 +248,14 @@ class DavinciModel {
   // get total mem size
   size_t TotalMemSize() const { return runtime_param_.mem_size; }
 
+  ///
+  /// @ingroup ge
+  /// @brief Get total useful size, in known subgraph, no need to allocate zero copy memory during initialization.
+  /// @param [in] total_useful_size: total mem size - zero copy size.
+  /// @return Status
+  ///
+  Status GetTotalMemSizeExcludeZeroCopy(int64_t &total_useful_size);
+
   // model name
   string Name() const { return name_; }
 
diff --git a/ge/graph/load/model_manager/task_info/task_info.h b/ge/graph/load/model_manager/task_info/task_info.h
index 5657f003..9f849b20 100644
--- a/ge/graph/load/model_manager/task_info/task_info.h
+++ b/ge/graph/load/model_manager/task_info/task_info.h
@@ -49,7 +49,7 @@ struct RuntimeParam {
        << ", label_num:" << label_num << ", logic_mem_base:" << logic_mem_base
        << ", logic_weight_base:" << logic_weight_base << ", logic_var_base:" << logic_var_base
        << ", memory_size:" << mem_size << ", weight_size:" << weight_size << ", var_size:" << var_size
-       << ", ex_memory_info:";
+       << ", zero_copy_size:" << zero_copy_size << ", ex_memory_info:";
     for (auto it : memory_infos) {
       ss << "[memory_type:" << it.first << ", memory_size:" << it.second.memory_size << "]";
     }
@@ -65,6 +65,7 @@ struct RuntimeParam {
   uint64_t var_size = 0;
   uint64_t logic_var_base = 0;
   uint8_t *var_base = nullptr;
+  int64_t zero_copy_size = 0;
   std::map<uint64_t, MemInfo> memory_infos;
   uint32_t batch_num = 0;
   uint32_t stream_num = 0;
diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
index c800e93d..8b839849 100755
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@@ -101,18 +101,20 @@ Status KnownNodeTask::Init(TaskContext &context) {
   GE_CHK_STATUS_RET(context.AllocateOutputs(), "[Allocate][Outputs] failed for %s.", context.GetNodeName());
   // allocate mem base
   void *buffer = nullptr;
-  if (davinci_model_->TotalMemSize() != 0) {
+  int64_t total_useful_size = 0;
+  GE_CHK_STATUS_RET(davinci_model_->GetTotalMemSizeExcludeZeroCopy(total_useful_size),
+                    "[Get][TotalMemSizeExcludeZeroCopy] failed.");
+  if (total_useful_size != 0) {
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(),
                            "[KnownNodeTask_AllocateWorkspace] Start");
-    GE_CHK_STATUS_RET(context.AllocateWorkspace(davinci_model_->TotalMemSize(), &buffer,
-                                                davinci_model_->GetRuntimeParam().mem_base),
+    GE_CHK_STATUS_RET(context.AllocateWorkspace(total_useful_size, &buffer, davinci_model_->GetRuntimeParam().mem_base),
                       "[Allocate][Workspace] failed for %s.", context.GetNodeName());
     RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(),
-                           "[KnownNodeTask_AllocateWorkspace] End, size %zu", davinci_model_->TotalMemSize());
+                           "[KnownNodeTask_AllocateWorkspace] End, size %ld", total_useful_size);
     // update mem base
     davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer));
-    GELOGI("KnownNodeTask::Init mem base is %p, size %lu.",
-           davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size);
+    GELOGI("KnownNodeTask::Init mem base is %p, size %ld.",
+           davinci_model_->GetRuntimeParam().mem_base, total_useful_size);
   }
   GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
                                                                     davinci_model_->Id(),
@@ -154,8 +156,8 @@ Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *w
     weight_size = weight_buffer->GetSize();
   }
   GELOGD("Start to init davinci model, weight size = %zu", weight_size);
-  GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model.");
-  GELOGD("[Init][Model] success");
+  GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][DavinciModel] Failed to init davinci model.");
+  GELOGD("[Init][DavinciModel] success");
   return SUCCESS;
 }
 
diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc
index 4771ca8d..3f9cc850 100644
--- a/tests/ut/ge/graph/load/davinci_model_unittest.cc
+++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc
@@ -1048,4 +1048,15 @@ TEST_F(UtestDavinciModel, update_io_addr_success) {
   vector<void *> io_addr = {nullptr, nullptr};
   model.UpdateOpIOAddrs(task_id, stream_id, io_addr);
 }
+TEST_F(UtestDavinciModel, get_total_memsize_exclude_zero_copy) {
+  DavinciModel model(0, nullptr);
+  model.runtime_param_.mem_size = 1024;
+  model.runtime_param_.zero_copy_size = 2048;
+  int64_t total_useful_size = 0;
+  EXPECT_EQ(model.GetTotalMemSizeExcludeZeroCopy(total_useful_size), FAILED);
+  EXPECT_EQ(total_useful_size, 0);
+  model.runtime_param_.zero_copy_size = 512;
+  EXPECT_EQ(model.GetTotalMemSizeExcludeZeroCopy(total_useful_size), SUCCESS);
+  EXPECT_EQ(total_useful_size, 512);
+}
 }  // namespace ge