!876 Supporting dynamic graph with dynamic inputs

From: @xchu42 Reviewed-by: @ji_chen,@wqtshg Signed-off-by: @ji_chen
4 years ago · 689b059f67
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -94,7 +94,7 @@ Status HybridModelAsyncExecutor::Init() {
  executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
  GE_CHECK_NOTNULL(executor_);
  GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
  GE_CHK_STATUS_RET(InitInputTensors(), "Failed to init input tensors");
  GE_CHK_STATUS_RET(InitInputDesc(), "Failed to init input tensors");
  return SUCCESS;
 }

@@ -199,50 +199,66 @@ Status HybridModelAsyncExecutor::SyncVarData() {
 }

 Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
  args.inputs.resize(input_tensors_.size());
  if (current_data.blobs.size() < input_tensor_desc_.size()) {
    GELOGE(PARAM_INVALID, "Blob size mismatches, expect at least %zu, but got %zu",
           input_tensor_desc_.size(), current_data.blobs.size());
    return PARAM_INVALID;
  }

  auto allocator = NpuMemoryAllocator::GetAllocator(device_id_);
  GE_CHECK_NOTNULL(allocator);
  args.input_desc.resize(input_tensor_desc_.size());
  const std::vector<DataBuffer> &blobs = current_data.blobs;
  for (const auto &it : input_tensors_) {
    auto input_index = it.first;
    auto input_tensor = it.second;
    auto data_size = input_tensor.GetSize();
    GELOGD("To copy input data for input[%u]", input_index);
    if (input_index >= blobs.size()) {
      GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld",
             blobs.size(), model_->input_nodes_.size(), input_index, data_size);
      return FAILED;
  for (size_t input_index = 0; input_index < input_tensor_desc_.size(); ++input_index) {
    auto tensor_size = input_sizes_[input_index];
    if (is_input_dynamic_[input_index]) {
      if (input_index >= current_data.shapes.size()) {
        GELOGE(PARAM_INVALID, "Shape index out of range, index = %zu, shape size = %zu",
               input_index, current_data.shapes.size());
        return PARAM_INVALID;
      }
      auto &tensor_desc = input_tensor_desc_[input_index];
      tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
      args.input_desc[input_index] = tensor_desc;
      GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size),
                              "Failed to calc tensor size, index = %zu, shape = [%s]",
                              input_index,
                              tensor_desc->GetShape().ToString().c_str());
      GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size);
    }

    GE_CHECK_GE(tensor_size, 0);
    auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size);
    GE_CHECK_NOTNULL(tensor_buffer);
    args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release()));

    GELOGD("To copy input data for input[%u]", input_index);
    const DataBuffer &data_buf = blobs[input_index];
    auto mem_size = static_cast<uint32_t>(data_size);
    auto mem_size = static_cast<uint64_t>(tensor_size);
    GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length,
                           PARAM_INVALID,
                           "input data size(%lu) does not match model required size(%u), ret failed.",
                           "input data size(%lu) does not match model required size(%lu), ret failed.",
                           data_buf.length,
                           mem_size);

    GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]",
           model_->root_runtime_param_.graph_id, input_index, input_tensor.GetData(), mem_size, data_buf.length);
    GE_CHK_RT_RET(rtMemcpy(input_tensor.MutableData(),
           model_->root_runtime_param_.graph_id,
           input_index,
           args.inputs[input_index].GetData(),
           mem_size,
           data_buf.length);
    GE_CHK_RT_RET(rtMemcpy(args.inputs[input_index].MutableData(),
                           mem_size,
                           data_buf.data,
                           data_buf.length,
                           RT_MEMCPY_HOST_TO_DEVICE));
    args.inputs[input_index] = input_tensor;
    if (is_input_dynamic_[input_index]) {
      auto &tensor_desc = input_tensor_desc_[input_index];
      tensor_desc->SetShape(GeShape(current_data.shapes[input_index]));
      args.input_desc[input_index] = tensor_desc;
      GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
    }
  }

  return SUCCESS;
 }

 Status HybridModelAsyncExecutor::InitInputTensors() {
  auto allocator = NpuMemoryAllocator::GetAllocator(device_id_);
  GE_CHECK_NOTNULL(allocator);
 Status HybridModelAsyncExecutor::InitInputDesc() {
  int input_index = 0;
  for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) {
    GELOGD("Init input[%u], node = %s, is_dynamic = %d",
@@ -251,21 +267,21 @@ Status HybridModelAsyncExecutor::InitInputTensors() {
           input_node->is_dynamic);
    auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex);
    GE_CHECK_NOTNULL(output_desc);
    int64_t tensor_size = 0;
    GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),
                            "Failed to get size from %s",
                            input_node->NodeName().c_str());
    if (tensor_size == 0) {
      GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str());
      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size),
                              "Failed to calc tensor size");
      GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size);
    int64_t tensor_size = -1;
    if (!input_node->is_dynamic) {
      GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),
                              "Failed to get size from %s",
                              input_node->NodeName().c_str());

      if (tensor_size == 0) {
        GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str());
        GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size),
                                "Failed to calc tensor size");
        GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size);
      }
    }
    auto buffer = TensorBuffer::Create(allocator, tensor_size);
    GE_CHECK_NOTNULL(buffer);
    TensorValue tensor(shared_ptr<TensorBuffer>(buffer.release()));
    tensor.SetName("Input_" + input_node->NodeName());
    input_tensors_.emplace(input_index, tensor);

    input_sizes_.emplace(input_index, tensor_size);
    input_tensor_desc_.emplace(input_index, output_desc);
    is_input_dynamic_.push_back(input_node->is_dynamic);
    input_index += 1;
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -53,7 +53,7 @@ class HybridModelAsyncExecutor {
  Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data);

 private:
  Status InitInputTensors();
  Status InitInputDesc();

  Status RunInternal();

@@ -85,7 +85,7 @@ class HybridModelAsyncExecutor {
  uint64_t iterator_count_ = 0;

  rtStream_t stream_ = nullptr;
  std::map<uint32_t, TensorValue> input_tensors_;
  std::map<uint32_t, int64_t> input_sizes_;
  std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
  std::vector<bool> is_input_dynamic_;
  std::shared_ptr<ModelListener> listener_;
--- a/ge/hybrid/executor/node_state.cc
+++ b/ge/hybrid/executor/node_state.cc
@@ -61,7 +61,7 @@ Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target
  tensor_desc->SetShape(target.GetShape());
  tensor_desc->SetOriginShape(target.GetOriginShape());
  (void) TensorUtils::SetSize(*tensor_desc, tensor_size);
  if (--num_pending_shapes_ == 0) {
  if (--num_pending_shapes_ <= 0) {
    ready_cv_.notify_all();
  }

--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -163,6 +163,25 @@ Status NodeItem::ResolveDynamicState() {

 Status NodeItem::ResolveStaticInputsAndOutputs() {
  for (int i = 0; i < num_inputs; ++i) {
    // Data has unconnected input but set by framework
    if (node_type != DATA) {
      int origin_index = i;
      if (has_optional_inputs) {
        origin_index = input_desc_indices_[i];
      }
      auto in_data_anchor = node->GetInDataAnchor(origin_index);
      GE_CHECK_NOTNULL(in_data_anchor);

      // If no node was connected to the current input anchor
      // increase num_static_input_shapes in case dead wait in ShapeInferenceState::AwaitShapesReady
      if (in_data_anchor->GetPeerOutAnchor() == nullptr ||
          in_data_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) {
        num_static_input_shapes++;
        is_input_shape_static_.push_back(true);
        GELOGW("[%s] Peer node of input[%d] is empty", NodeName().c_str(), i);
        continue;
      }
    }
    const auto &input_desc = MutableInputDesc(i);
    GE_CHECK_NOTNULL(input_desc);
    if (input_desc->MutableShape().IsUnknownShape()) {