From 5e8f1579e24b3cac117b33177c0cef27767ae7ce Mon Sep 17 00:00:00 2001 From: weiyang Date: Sat, 31 Oct 2020 14:26:34 +0800 Subject: [PATCH] fix dynamic shape with while --- .../load/new_model_manager/davinci_model.cc | 30 +++++++++++-------- .../load/new_model_manager/davinci_model.h | 2 +- .../label_switch_by_index_task_info.cc | 2 +- .../task_info/memcpy_async_task_info.cc | 22 ++++++++++++-- .../task_info/memcpy_async_task_info.h | 2 ++ ge/graph/passes/subgraph_pass.cc | 16 +++++++++- ge/single_op/single_op_model.cc | 4 --- .../task/aicpu_kernel_task_builder.cc | 1 + 8 files changed, 56 insertions(+), 23 deletions(-) diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index cf6493cc..cb37182c 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -649,7 +649,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size for (const ge::NodePtr &node : compute_graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, continue); - GetFixedAddrAttr(op_desc); GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue); GE_IF_BOOL_EXEC(IsBroadCastOpData(node), (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); @@ -838,7 +837,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } continue; } - + // for dynamic shape with control flow + SetLabelForDynamic(node); if (IsNoTaskAndDumpNeeded(op_desc)) { GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); const RuntimeParam &rts_param = GetRuntimeParam(); @@ -912,6 +912,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr return SUCCESS; } +void DavinciModel::SetLabelForDynamic(const NodePtr &node) { + if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) { + for (auto &in_data_anchor : node->GetAllInDataAnchors()) { + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_out_data_anchor != nullptr) { + string tensor_name = node->GetName(); + auto peer_node = peer_out_data_anchor->GetOwnerNode(); + (void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); + (void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0); + tensor_name_to_peer_output_index_[tensor_name] = 0; + } + } + } +} + /// @ingroup ge /// @brief Data Op Initialize. /// @param [in] NodePtr: Data Op. @@ -3948,15 +3963,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { } } -void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) { - if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) { - string tensor_name; - (void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); - int64_t index = -1; - (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index); - if (index >= 0) { - tensor_name_to_peer_output_index_[tensor_name] = index; - } - } -} } // namespace ge diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index f41817bb..964057a4 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -838,7 +838,7 @@ class DavinciModel { std::vector &outputs); void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); - void GetFixedAddrAttr(const OpDescPtr &op_desc); + void SetLabelForDynamic(const NodePtr &node); bool is_model_has_inited_; uint32_t model_id_; diff --git a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc index f26c19a6..ae7865a4 100644 --- a/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc @@ -144,7 +144,7 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def, GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); return FAILED; } - string input_tensor_name = op_desc->GetInputNameByIndex(0); + string input_tensor_name = op_desc->GetName(); fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); auto tensor_desc = op_desc->GetInputDesc(0); int64_t tensor_size = 0; diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc index 51e822e2..6eb53c8a 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc @@ -35,6 +35,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da kind_ = memcpy_async_.kind(); dst_max_ = memcpy_async_.dst_max(); OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); + op_desc_ = op_desc; if (op_desc == nullptr) { GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); return INTERNAL_ERROR; @@ -45,7 +46,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da dst_ = reinterpret_cast(reinterpret_cast(src_) + sizeof(void *)); // for zero copy kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE; - GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); + GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.", + op_desc->GetName().c_str(), src_, dst_, args_offset_); return SUCCESS; } @@ -93,12 +95,22 @@ Status MemcpyAsyncTaskInfo::Distribute() { } Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { + OpDescPtr op_desc = davinci_model_->GetOpByIndex(task_def.memcpy_async().op_index()); // the num of src and dst size is 2 uint32_t args_size = sizeof(void *) * 2; args_offset_ = davinci_model->GetTotalArgsSize(); davinci_model->SetTotalArgsSize(args_size); davinci_model_ = davinci_model; GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_); + string peer_input_name; + if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { + uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); + fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); + auto tensor_desc = op_desc->GetOutputDesc(output_index); + int64_t tensor_size = 0; + GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); + davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); + } return SUCCESS; } @@ -117,8 +129,12 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { vector io_addrs; io_addrs.emplace_back(reinterpret_cast(src_)); - io_addrs.emplace_back(reinterpret_cast(dst_)); - + if (op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { + void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); + io_addrs.emplace_back(fixed_addr); + } else { + io_addrs.emplace_back(reinterpret_cast(dst_)); + } davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); diff --git a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h index 320e7fbc..7e74ab6f 100755 --- a/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h @@ -44,6 +44,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo { uint8_t *src_; uint64_t count_; uint32_t kind_; + OpDescPtr op_desc_; + int64_t fixed_addr_offset_; DavinciModel *davinci_model_ = nullptr; uint32_t args_offset_ = 0; domi::MemcpyAsyncDef memcpy_async_; diff --git a/ge/graph/passes/subgraph_pass.cc b/ge/graph/passes/subgraph_pass.cc index 04e28aaf..d8cc5676 100755 --- a/ge/graph/passes/subgraph_pass.cc +++ b/ge/graph/passes/subgraph_pass.cc @@ -149,7 +149,8 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node std::string op_type; bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || - ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)); + ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || + (NodeUtils::IsDynamicShape(node) && (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; @@ -212,6 +213,19 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP return SUCCESS; } + // insert identity between data and labelswitch in while cond subgraph + if (NodeUtils::IsDynamicShape(node)) { + ComputeGraphPtr while_cond = NodeUtils::GetSubgraph(*node, 0); + GE_CHECK_NOTNULL(while_cond); + std::vector cond_data_nodes; + for (const auto &n : while_cond->GetDirectNode()) { + if (n->GetType() == DATA) { + cond_data_nodes.emplace_back(n); + } + } + GE_CHK_STATUS_RET(InsertInputMemcpy(while_cond, cond_data_nodes), "InsertInputMemcpy failed."); + } + std::vector data_nodes; std::set bypass_index; NodePtr output_node = nullptr; diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 98d56046..ea9df11d 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -31,7 +31,6 @@ #include "task/aicpu_task_builder.h" #include "task/aicpu_kernel_task_builder.h" #include "task/tbe_task_builder.h" -#include "graph/load/new_model_manager/model_manager.h" static std::atomic aicpu_sessionid(0); @@ -278,7 +277,6 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { GELOGD("Skip task type: %d", static_cast(task_type)); } } - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); return SUCCESS; } @@ -450,8 +448,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { GELOGD("Skip task type: %d", static_cast(task_type)); } } - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); - return SUCCESS; } diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc index b9c5b9d0..150c66e7 100755 --- a/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -63,6 +63,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); } task.num_inputs_ = op_desc_->GetInputsSize();