From 061df1c781a886ae8f78842c85c86a39821373da Mon Sep 17 00:00:00 2001
From: chuxing <chuxing@huawei.com>
Date: Wed, 6 Jan 2021 11:45:12 +0800
Subject: [PATCH] support dynamic inputs

---
 .../executor/hybrid_model_async_executor.cc   | 94 +++++++++++--------
 .../executor/hybrid_model_async_executor.h    |  4 +-
 2 files changed, 57 insertions(+), 41 deletions(-)
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc
index 4d23cd55..9cef4968 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -94,7 +94,7 @@ Status HybridModelAsyncExecutor::Init() {
   executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
   GE_CHECK_NOTNULL(executor_);
   GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
-  GE_CHK_STATUS_RET(InitInputTensors(), "Failed to init input tensors");
+  GE_CHK_STATUS_RET(InitInputDesc(), "Failed to init input tensors");
   return SUCCESS;
 }
 
@@ -198,50 +198,66 @@ Status HybridModelAsyncExecutor::SyncVarData() {
 }
 
 Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
-  args.inputs.resize(input_tensors_.size());
+  if (current_data.blobs.size() < input_tensor_desc_.size()) {
+    GELOGE(PARAM_INVALID, "Blob size mismatches, expect at least %zu, but got %zu",
+           input_tensor_desc_.size(), current_data.blobs.size());
+    return PARAM_INVALID;
+  }
+
+  auto allocator = NpuMemoryAllocator::GetAllocator(device_id_);
+  GE_CHECK_NOTNULL(allocator);
   args.input_desc.resize(input_tensor_desc_.size());
   const std::vector<DataBuffer> &blobs = current_data.blobs;
-  for (const auto &it : input_tensors_) {
-    auto input_index = it.first;
-    auto input_tensor = it.second;
-    auto data_size = input_tensor.GetSize();
-    GELOGD("To copy input data for input[%u]", input_index);
-    if (input_index >= blobs.size()) {
-      GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld",
-             blobs.size(), model_->input_nodes_.size(), input_index, data_size);
-      return FAILED;
+  for (size_t input_index = 0; input_index < input_tensor_desc_.size(); ++input_index) {
+    auto tensor_size = input_sizes_[input_index];
+    if (is_input_dynamic_[input_index]) {
+      if (input_index >= current_data.shapes.size()) {
+        GELOGE(PARAM_INVALID, "Shape index out of range, index = %zu, shape size = %zu",
+               input_index, current_data.shapes.size());
+        return PARAM_INVALID;
+      }
+      auto &tensor_desc = input_tensor_desc_[input_index];
+      tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
+      args.input_desc[input_index] = tensor_desc;
+      GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
+      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size),
+                              "Failed to calc tensor size, index = %zu, shape = [%s]",
+                              input_index,
+                              tensor_desc->GetShape().ToString().c_str());
+      GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size);
     }
 
+    GE_CHECK_GE(tensor_size, 0);
+    auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
+    GE_CHECK_NOTNULL(tensor_buffer);
+    args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release()));
+
+    GELOGD("To copy input data for input[%u]", input_index);
     const DataBuffer &data_buf = blobs[input_index];
-    auto mem_size = static_cast<uint32_t>(data_size);
+    auto mem_size = static_cast<uint64_t>(tensor_size);
     GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length,
                            PARAM_INVALID,
-                           "input data size(%lu) does not match model required size(%u), ret failed.",
+                           "input data size(%lu) does not match model required size(%lu), ret failed.",
                            data_buf.length,
                            mem_size);
 
     GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]",
-           model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length);
-    GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(),
+           model_->root_runtime_param_.graph_id,
+           input_index,
+           args.inputs[input_index].GetData(),
+           mem_size,
+           data_buf.length);
+    GE_CHK_RT_RET(rtMemcpy(args.inputs[input_index].MutableData(),
                            mem_size,
                            data_buf.data,
                            data_buf.length,
                            RT_MEMCPY_HOST_TO_DEVICE));
-    args.inputs[input_index] = input_tensor;
-    if (is_input_dynamic_[input_index]) {
-      auto &tensor_desc = input_tensor_desc_[input_index];
-      tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
-      args.input_desc[input_index] = tensor_desc;
-      GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
-    }
   }
 
   return SUCCESS;
 }
 
-Status HybridModelAsyncExecutor::InitInputTensors() {
-  auto allocator = NpuMemoryAllocator::GetAllocator(device_id_);
-  GE_CHECK_NOTNULL(allocator);
+Status HybridModelAsyncExecutor::InitInputDesc() {
   int input_index = 0;
   for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) {
     GELOGD("Init input[%u], node = %s, is_dynamic = %d",
@@ -250,21 +266,21 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
            input_node->is_dynamic);
     auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex);
     GE_CHECK_NOTNULL(output_desc);
-    int64_t tensor_size = 0;
-    GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),
-                            "Failed to get size from %s",
-                            input_node->NodeName().c_str());
-    if (tensor_size == 0) {
-      GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str());
-      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size),
-                              "Failed to calc tensor size");
-      GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size);
+    int64_t tensor_size = -1;
+    if (!input_node->is_dynamic) {
+      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),
+                              "Failed to get size from %s",
+                              input_node->NodeName().c_str());
+
+      if (tensor_size == 0) {
+        GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str());
+        GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size),
+                                "Failed to calc tensor size");
+        GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size);
+      }
     }
-    auto buffer = TensorBuffer::Create(allocator, tensor_size);
-    GE_CHECK_NOTNULL(buffer);
-    TensorValue tensor(shared_ptr<TensorBuffer>(buffer.release()));
-    tensor.SetName("Input_" + input_node->NodeName());
-    input_tensors_.emplace(input_index, tensor);
+
+    input_sizes_.emplace(input_index, tensor_size);
     input_tensor_desc_.emplace(input_index, output_desc);
     is_input_dynamic_.push_back(input_node->is_dynamic);
     input_index += 1;
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h
index ad39cac5..21d2d033 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -53,7 +53,7 @@ class HybridModelAsyncExecutor {
   Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data);
 
  private:
-  Status InitInputTensors();
+  Status InitInputDesc();
 
   Status RunInternal();
 
@@ -85,7 +85,7 @@ class HybridModelAsyncExecutor {
   uint64_t iterator_count_ = 0;
 
   rtStream_t stream_ = nullptr;
-  std::map<uint32_t, TensorValue> input_tensors_;
+  std::map<uint32_t, int64_t> input_sizes_;
   std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
   std::vector<bool> is_input_dynamic_;
   std::shared_ptr<ModelListener> listener_;