fix dynamic shape with while

5 years ago · 5e8f1579e2
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -649,7 +649,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  for (const ge::NodePtr &node : compute_graph->GetDirectNode()) {
    auto op_desc = node->GetOpDesc();
    GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
    GetFixedAddrAttr(op_desc);
    GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue);
    GE_IF_BOOL_EXEC(IsBroadCastOpData(node),
                    (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore"););
@@ -838,7 +837,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
      }
      continue;
    }

    // for dynamic shape with control flow
    SetLabelForDynamic(node);
    if (IsNoTaskAndDumpNeeded(op_desc)) {
      GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str());
      const RuntimeParam &rts_param = GetRuntimeParam();
@@ -912,6 +912,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr
  return SUCCESS;
 }

 void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
  if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) {
    for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
      auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
      if (peer_out_data_anchor != nullptr) {
        string tensor_name = node->GetName();
        auto peer_node = peer_out_data_anchor->GetOwnerNode();
        (void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name);
        (void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0);
        tensor_name_to_peer_output_index_[tensor_name] = 0;
      }
    }
  }
 }

 /// @ingroup ge
 /// @brief Data Op Initialize.
 /// @param [in] NodePtr: Data Op.
@@ -3948,15 +3963,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
  }
 }

 void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) {
  if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) {
    string tensor_name;
    (void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name);
    int64_t index = -1;
    (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index);
    if (index >= 0) {
      tensor_name_to_peer_output_index_[tensor_name] = index;
    }
  }
 }
 }  // namespace ge
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -838,7 +838,7 @@ class DavinciModel {
                             std::vector<ge::OutputTensorInfo> &outputs);

  void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);
  void GetFixedAddrAttr(const OpDescPtr &op_desc);
  void SetLabelForDynamic(const NodePtr &node);

  bool is_model_has_inited_;
  uint32_t model_id_;
--- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc
@@ -144,7 +144,7 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def,
    GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
    return FAILED;
  }
  string input_tensor_name = op_desc->GetInputNameByIndex(0);
  string input_tensor_name = op_desc->GetName();
  fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name);
  auto tensor_desc = op_desc->GetInputDesc(0);
  int64_t tensor_size = 0;
--- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc
@@ -35,6 +35,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
  kind_ = memcpy_async_.kind();
  dst_max_ = memcpy_async_.dst_max();
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index());
  op_desc_ = op_desc;
  if (op_desc == nullptr) {
    GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index());
    return INTERNAL_ERROR;
@@ -45,7 +46,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
    dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *));
    // for zero copy
    kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE;
    GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_);
    GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.",
           op_desc->GetName().c_str(), src_, dst_, args_offset_);
    return SUCCESS;
  }

@@ -93,12 +95,22 @@ Status MemcpyAsyncTaskInfo::Distribute() {
 }

 Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  OpDescPtr op_desc = davinci_model_->GetOpByIndex(task_def.memcpy_async().op_index());
  // the num of src and dst size is 2
  uint32_t args_size = sizeof(void *) * 2;
  args_offset_ = davinci_model->GetTotalArgsSize();
  davinci_model->SetTotalArgsSize(args_size);
  davinci_model_ = davinci_model;
  GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_);
  string peer_input_name;
  if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
    uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
    fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
    auto tensor_desc = op_desc->GetOutputDesc(output_index);
    int64_t tensor_size = 0;
    GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
    davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
  }
  return SUCCESS;
 }

@@ -117,8 +129,12 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() {

  vector<void *> io_addrs;
  io_addrs.emplace_back(reinterpret_cast<void *>(src_));
  io_addrs.emplace_back(reinterpret_cast<void *>(dst_));

  if (op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
    void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
    io_addrs.emplace_back(fixed_addr);
  } else {
    io_addrs.emplace_back(reinterpret_cast<void *>(dst_));
  }
  davinci_model_->SetTotalIOAddrs(io_addrs);

  GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success.");
--- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h
@@ -44,6 +44,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo {
  uint8_t *src_;
  uint64_t count_;
  uint32_t kind_;
  OpDescPtr op_desc_;
  int64_t fixed_addr_offset_;
  DavinciModel *davinci_model_ = nullptr;
  uint32_t args_offset_ = 0;
  domi::MemcpyAsyncDef memcpy_async_;
--- a/ge/graph/passes/subgraph_pass.cc
+++ b/ge/graph/passes/subgraph_pass.cc
@@ -149,7 +149,8 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node
    std::string op_type;
    bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) ||
                       IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) ||
                       ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0));
                       ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) ||
                         (NodeUtils::IsDynamicShape(node) && (kWhileOpTypes.count(in_node->GetType()) != 0));
    if (insert_flag) {
      GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
      std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
@@ -212,6 +213,19 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP
    return SUCCESS;
  }

  // insert identity between data and labelswitch in while cond subgraph
  if (NodeUtils::IsDynamicShape(node)) {
    ComputeGraphPtr while_cond = NodeUtils::GetSubgraph(*node, 0);
    GE_CHECK_NOTNULL(while_cond);
    std::vector<NodePtr> cond_data_nodes;
    for (const auto &n : while_cond->GetDirectNode()) {
      if (n->GetType() == DATA) {
        cond_data_nodes.emplace_back(n);
      }
    }
    GE_CHK_STATUS_RET(InsertInputMemcpy(while_cond, cond_data_nodes), "InsertInputMemcpy failed.");
  }

  std::vector<NodePtr> data_nodes;
  std::set<uint32_t> bypass_index;
  NodePtr output_node = nullptr;
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -31,7 +31,6 @@
 #include "task/aicpu_task_builder.h"
 #include "task/aicpu_kernel_task_builder.h"
 #include "task/tbe_task_builder.h"
 #include "graph/load/new_model_manager/model_manager.h"

 static std::atomic<std::uint64_t> aicpu_sessionid(0);

@@ -278,7 +277,6 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
      GELOGD("Skip task type: %d", static_cast<int>(task_type));
    }
  }
  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
  return SUCCESS;
 }

@@ -450,8 +448,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
      GELOGD("Skip task type: %d", static_cast<int>(task_type));
    }
  }
  GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");

  return SUCCESS;
 }

--- a/ge/single_op/task/aicpu_kernel_task_builder.cc
+++ b/ge/single_op/task/aicpu_kernel_task_builder.cc
@@ -63,6 +63,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) {
    task.is_custom_ = true;
    task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed");
    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
  }

  task.num_inputs_ = op_desc_->GetInputsSize();