| @@ -649,7 +649,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
| for (const ge::NodePtr &node : compute_graph->GetDirectNode()) { | for (const ge::NodePtr &node : compute_graph->GetDirectNode()) { | ||||
| auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
| GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | ||||
| GetFixedAddrAttr(op_desc); | |||||
| GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue); | GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue); | ||||
| GE_IF_BOOL_EXEC(IsBroadCastOpData(node), | GE_IF_BOOL_EXEC(IsBroadCastOpData(node), | ||||
| (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); | (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); | ||||
| @@ -838,7 +837,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| continue; | continue; | ||||
| } | } | ||||
| // for dynamic shape with control flow | |||||
| SetLabelForDynamic(node); | |||||
| if (IsNoTaskAndDumpNeeded(op_desc)) { | if (IsNoTaskAndDumpNeeded(op_desc)) { | ||||
| GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); | GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); | ||||
| const RuntimeParam &rts_param = GetRuntimeParam(); | const RuntimeParam &rts_param = GetRuntimeParam(); | ||||
| @@ -912,6 +912,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||||
| if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) { | |||||
| for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
| auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| if (peer_out_data_anchor != nullptr) { | |||||
| string tensor_name = node->GetName(); | |||||
| auto peer_node = peer_out_data_anchor->GetOwnerNode(); | |||||
| (void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | |||||
| (void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0); | |||||
| tensor_name_to_peer_output_index_[tensor_name] = 0; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| /// @ingroup ge | /// @ingroup ge | ||||
| /// @brief Data Op Initialize. | /// @brief Data Op Initialize. | ||||
| /// @param [in] NodePtr: Data Op. | /// @param [in] NodePtr: Data Op. | ||||
| @@ -3948,15 +3963,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { | |||||
| } | } | ||||
| } | } | ||||
| void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) { | |||||
| if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) { | |||||
| string tensor_name; | |||||
| (void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | |||||
| int64_t index = -1; | |||||
| (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index); | |||||
| if (index >= 0) { | |||||
| tensor_name_to_peer_output_index_[tensor_name] = index; | |||||
| } | |||||
| } | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -838,7 +838,7 @@ class DavinciModel { | |||||
| std::vector<ge::OutputTensorInfo> &outputs); | std::vector<ge::OutputTensorInfo> &outputs); | ||||
| void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | ||||
| void GetFixedAddrAttr(const OpDescPtr &op_desc); | |||||
| void SetLabelForDynamic(const NodePtr &node); | |||||
| bool is_model_has_inited_; | bool is_model_has_inited_; | ||||
| uint32_t model_id_; | uint32_t model_id_; | ||||
| @@ -144,7 +144,7 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def, | |||||
| GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); | GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); | ||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| string input_tensor_name = op_desc->GetInputNameByIndex(0); | |||||
| string input_tensor_name = op_desc->GetName(); | |||||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); | fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); | ||||
| auto tensor_desc = op_desc->GetInputDesc(0); | auto tensor_desc = op_desc->GetInputDesc(0); | ||||
| int64_t tensor_size = 0; | int64_t tensor_size = 0; | ||||
| @@ -35,6 +35,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
| kind_ = memcpy_async_.kind(); | kind_ = memcpy_async_.kind(); | ||||
| dst_max_ = memcpy_async_.dst_max(); | dst_max_ = memcpy_async_.dst_max(); | ||||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); | OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); | ||||
| op_desc_ = op_desc; | |||||
| if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
| GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); | GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| @@ -45,7 +46,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da | |||||
| dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *)); | dst_ = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(src_) + sizeof(void *)); | ||||
| // for zero copy | // for zero copy | ||||
| kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE; | kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE; | ||||
| GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); | |||||
| GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.", | |||||
| op_desc->GetName().c_str(), src_, dst_, args_offset_); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -93,12 +95,22 @@ Status MemcpyAsyncTaskInfo::Distribute() { | |||||
| } | } | ||||
| Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
| OpDescPtr op_desc = davinci_model_->GetOpByIndex(task_def.memcpy_async().op_index()); | |||||
| // the num of src and dst size is 2 | // the num of src and dst size is 2 | ||||
| uint32_t args_size = sizeof(void *) * 2; | uint32_t args_size = sizeof(void *) * 2; | ||||
| args_offset_ = davinci_model->GetTotalArgsSize(); | args_offset_ = davinci_model->GetTotalArgsSize(); | ||||
| davinci_model->SetTotalArgsSize(args_size); | davinci_model->SetTotalArgsSize(args_size); | ||||
| davinci_model_ = davinci_model; | davinci_model_ = davinci_model; | ||||
| GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_); | GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_); | ||||
| string peer_input_name; | |||||
| if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||||
| uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||||
| fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||||
| auto tensor_desc = op_desc->GetOutputDesc(output_index); | |||||
| int64_t tensor_size = 0; | |||||
| GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||||
| davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||||
| } | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -117,8 +129,12 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { | |||||
| vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
| io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | io_addrs.emplace_back(reinterpret_cast<void *>(src_)); | ||||
| io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||||
| if (op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||||
| void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||||
| io_addrs.emplace_back(fixed_addr); | |||||
| } else { | |||||
| io_addrs.emplace_back(reinterpret_cast<void *>(dst_)); | |||||
| } | |||||
| davinci_model_->SetTotalIOAddrs(io_addrs); | davinci_model_->SetTotalIOAddrs(io_addrs); | ||||
| GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); | ||||
| @@ -44,6 +44,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo { | |||||
| uint8_t *src_; | uint8_t *src_; | ||||
| uint64_t count_; | uint64_t count_; | ||||
| uint32_t kind_; | uint32_t kind_; | ||||
| OpDescPtr op_desc_; | |||||
| int64_t fixed_addr_offset_; | |||||
| DavinciModel *davinci_model_ = nullptr; | DavinciModel *davinci_model_ = nullptr; | ||||
| uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
| domi::MemcpyAsyncDef memcpy_async_; | domi::MemcpyAsyncDef memcpy_async_; | ||||
| @@ -149,7 +149,8 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node | |||||
| std::string op_type; | std::string op_type; | ||||
| bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || | bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || | ||||
| IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || | IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || | ||||
| ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)); | |||||
| ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || | |||||
| (NodeUtils::IsDynamicShape(node) && (kWhileOpTypes.count(in_node->GetType()) != 0)); | |||||
| if (insert_flag) { | if (insert_flag) { | ||||
| GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); | ||||
| std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; | ||||
| @@ -212,6 +213,19 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| // insert identity between data and labelswitch in while cond subgraph | |||||
| if (NodeUtils::IsDynamicShape(node)) { | |||||
| ComputeGraphPtr while_cond = NodeUtils::GetSubgraph(*node, 0); | |||||
| GE_CHECK_NOTNULL(while_cond); | |||||
| std::vector<NodePtr> cond_data_nodes; | |||||
| for (const auto &n : while_cond->GetDirectNode()) { | |||||
| if (n->GetType() == DATA) { | |||||
| cond_data_nodes.emplace_back(n); | |||||
| } | |||||
| } | |||||
| GE_CHK_STATUS_RET(InsertInputMemcpy(while_cond, cond_data_nodes), "InsertInputMemcpy failed."); | |||||
| } | |||||
| std::vector<NodePtr> data_nodes; | std::vector<NodePtr> data_nodes; | ||||
| std::set<uint32_t> bypass_index; | std::set<uint32_t> bypass_index; | ||||
| NodePtr output_node = nullptr; | NodePtr output_node = nullptr; | ||||
| @@ -31,7 +31,6 @@ | |||||
| #include "task/aicpu_task_builder.h" | #include "task/aicpu_task_builder.h" | ||||
| #include "task/aicpu_kernel_task_builder.h" | #include "task/aicpu_kernel_task_builder.h" | ||||
| #include "task/tbe_task_builder.h" | #include "task/tbe_task_builder.h" | ||||
| #include "graph/load/new_model_manager/model_manager.h" | |||||
| static std::atomic<std::uint64_t> aicpu_sessionid(0); | static std::atomic<std::uint64_t> aicpu_sessionid(0); | ||||
| @@ -278,7 +277,6 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
| } | } | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -450,8 +448,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
| GELOGD("Skip task type: %d", static_cast<int>(task_type)); | GELOGD("Skip task type: %d", static_cast<int>(task_type)); | ||||
| } | } | ||||
| } | } | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -63,6 +63,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||||
| task.is_custom_ = true; | task.is_custom_ = true; | ||||
| task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); | GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); | ||||
| GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); | |||||
| } | } | ||||
| task.num_inputs_ = op_desc_->GetInputsSize(); | task.num_inputs_ = op_desc_->GetInputsSize(); | ||||