| @@ -649,7 +649,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
| for (const ge::NodePtr &node : compute_graph->GetDirectNode()) { | |||
| auto op_desc = node->GetOpDesc(); | |||
| GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||
| GetFixedAddrAttr(op_desc); | |||
| GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue); | |||
| GE_IF_BOOL_EXEC(IsBroadCastOpData(node), | |||
| (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); | |||
| @@ -838,7 +837,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
| } | |||
| continue; | |||
| } | |||
| // for dynamic shape with control flow | |||
| SetLabelForDynamic(node); | |||
| if (IsNoTaskAndDumpNeeded(op_desc)) { | |||
| GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); | |||
| const RuntimeParam &rts_param = GetRuntimeParam(); | |||
| @@ -912,6 +912,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr | |||
| return SUCCESS; | |||
| } | |||
| void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||
| if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) { | |||
| for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
| auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
| if (peer_out_data_anchor != nullptr) { | |||
| string tensor_name = node->GetName(); | |||
| auto peer_node = peer_out_data_anchor->GetOwnerNode(); | |||
| (void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | |||
| (void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0); | |||
| tensor_name_to_peer_output_index_[tensor_name] = 0; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| /// @ingroup ge | |||
| /// @brief Data Op Initialize. | |||
| /// @param [in] NodePtr: Data Op. | |||
| @@ -3948,15 +3963,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { | |||
| } | |||
| } | |||
| void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) { | |||
| if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) { | |||
| string tensor_name; | |||
| (void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | |||
| int64_t index = -1; | |||
| (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index); | |||
| if (index >= 0) { | |||
| tensor_name_to_peer_output_index_[tensor_name] = index; | |||
| } | |||
| } | |||
| } | |||
| } // namespace ge | |||
| @@ -838,7 +838,7 @@ class DavinciModel { | |||
| std::vector<ge::OutputTensorInfo> &outputs); | |||
| void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | |||
| void GetFixedAddrAttr(const OpDescPtr &op_desc); | |||
| void SetLabelForDynamic(const NodePtr &node); | |||
| bool is_model_has_inited_; | |||
| uint32_t model_id_; | |||
| @@ -144,7 +144,7 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def, | |||
| GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); | |||
| return FAILED; | |||
| } | |||
| string input_tensor_name = op_desc->GetInputNameByIndex(0); | |||
| string input_tensor_name = op_desc->GetName(); | |||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); | |||
| auto tensor_desc = op_desc->GetInputDesc(0); | |||
| int64_t tensor_size = 0; | |||
| @@ -35,6 +35,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
| kind_ = memcpy_async_.kind(); | |||
| dst_max_ = memcpy_async_.dst_max(); | |||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); | |||
| op_desc_ = op_desc; | |||
| if (op_desc == nullptr) { | |||
| GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); | |||
| return INTERNAL_ERROR; | |||
| @@ -45,7 +46,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||
| dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *)); | |||
| // for zero copy | |||
| kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE; | |||
| GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); | |||
| GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.", | |||
| op_desc->GetName().c_str(), src_, dst_, args_offset_); | |||
| return SUCCESS; | |||
| } | |||
| @@ -93,12 +95,22 @@ Status MemcpyAsyncTaskInfo::Distribute() { | |||
| } | |||
| Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | |||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(task_def.memcpy_async().op_index()); | |||
| // the num of src and dst size is 2 | |||
| uint32_t args_size = sizeof(void *) * 2; | |||
| args_offset_ = davinci_model->GetTotalArgsSize(); | |||
| davinci_model->SetTotalArgsSize(args_size); | |||
| davinci_model_ = davinci_model; | |||
| GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_); | |||
| string peer_input_name; | |||
| if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||
| uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||
| auto tensor_desc = op_desc->GetOutputDesc(output_index); | |||
| int64_t tensor_size = 0; | |||
| GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||
| davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||
| } | |||
| return SUCCESS; | |||
| } | |||
| @@ -117,8 +129,12 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { | |||
| vector<void *> io_addrs; | |||
| io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | |||
| io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||
| if (op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||
| void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||
| io_addrs.emplace_back(fixed_addr); | |||
| } else { | |||
| io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||
| } | |||
| davinci_model_->SetTotalIOAddrs(io_addrs); | |||
| GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | |||
| @@ -44,6 +44,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo { | |||
| uint8_t *src_; | |||
| uint64_t count_; | |||
| uint32_t kind_; | |||
| OpDescPtr op_desc_; | |||
| int64_t fixed_addr_offset_; | |||
| DavinciModel *davinci_model_ = nullptr; | |||
| uint32_t args_offset_ = 0; | |||
| domi::MemcpyAsyncDef memcpy_async_; | |||
| @@ -149,7 +149,8 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node | |||
| std::string op_type; | |||
| bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || | |||
| IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || | |||
| ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)); | |||
| ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || | |||
| (NodeUtils::IsDynamicShape(node) && (kWhileOpTypes.count(in_node->GetType()) != 0)); | |||
| if (insert_flag) { | |||
| GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | |||
| std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | |||
| @@ -212,6 +213,19 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP | |||
| return SUCCESS; | |||
| } | |||
| // insert identity between data and labelswitch in while cond subgraph | |||
| if (NodeUtils::IsDynamicShape(node)) { | |||
| ComputeGraphPtr while_cond = NodeUtils::GetSubgraph(*node, 0); | |||
| GE_CHECK_NOTNULL(while_cond); | |||
| std::vector<NodePtr> cond_data_nodes; | |||
| for (const auto &n : while_cond->GetDirectNode()) { | |||
| if (n->GetType() == DATA) { | |||
| cond_data_nodes.emplace_back(n); | |||
| } | |||
| } | |||
| GE_CHK_STATUS_RET(InsertInputMemcpy(while_cond, cond_data_nodes), "InsertInputMemcpy failed."); | |||
| } | |||
| std::vector<NodePtr> data_nodes; | |||
| std::set<uint32_t> bypass_index; | |||
| NodePtr output_node = nullptr; | |||
| @@ -31,7 +31,6 @@ | |||
| #include "task/aicpu_task_builder.h" | |||
| #include "task/aicpu_kernel_task_builder.h" | |||
| #include "task/tbe_task_builder.h" | |||
| #include "graph/load/new_model_manager/model_manager.h" | |||
| static std::atomic<std::uint64_t> aicpu_sessionid(0); | |||
| @@ -278,7 +277,6 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
| } | |||
| } | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||
| return SUCCESS; | |||
| } | |||
| @@ -450,8 +448,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
| } | |||
| } | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||
| return SUCCESS; | |||
| } | |||
| @@ -63,6 +63,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||
| task.is_custom_ = true; | |||
| task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); | |||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||
| } | |||
| task.num_inputs_ = op_desc_->GetInputsSize(); | |||