| @@ -859,6 +859,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| // for dynamic shape with control flow | |||||
| SetLabelForDynamic(node); | |||||
| auto it = op_desc_handle.find(op_desc->GetType()); | auto it = op_desc_handle.find(op_desc->GetType()); | ||||
| if (it != op_desc_handle.end()) { | if (it != op_desc_handle.end()) { | ||||
| if ((this->*it->second)(op_desc) != SUCCESS) { | if ((this->*it->second)(op_desc) != SUCCESS) { | ||||
| @@ -867,8 +869,6 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| continue; | continue; | ||||
| } | } | ||||
| // for dynamic shape with control flow | |||||
| SetLabelForDynamic(node); | |||||
| if (IsNoTaskAndDumpNeeded(op_desc)) { | if (IsNoTaskAndDumpNeeded(op_desc)) { | ||||
| GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); | GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); | ||||
| const RuntimeParam &rts_param = GetRuntimeParam(); | const RuntimeParam &rts_param = GetRuntimeParam(); | ||||
| @@ -910,14 +910,14 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
| } | } | ||||
| void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | ||||
| if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) { | |||||
| if (known_node_ && (node->GetType() == LABELSWITCHBYINDEX || node->GetType() == STREAMSWITCH)) { | |||||
| for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | ||||
| auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | ||||
| if (peer_out_data_anchor != nullptr) { | if (peer_out_data_anchor != nullptr) { | ||||
| string tensor_name = node->GetName(); | |||||
| // name+index as the label of switch input | |||||
| string tensor_name = node->GetName() + std::to_string(in_data_anchor->GetIdx()); | |||||
| auto peer_node = peer_out_data_anchor->GetOwnerNode(); | auto peer_node = peer_out_data_anchor->GetOwnerNode(); | ||||
| (void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | (void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); | ||||
| (void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0); | |||||
| tensor_name_to_peer_output_index_[tensor_name] = 0; | tensor_name_to_peer_output_index_[tensor_name] = 0; | ||||
| } | } | ||||
| } | } | ||||
| @@ -123,7 +123,7 @@ Status StreamSwitchTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davinc | |||||
| return FAILED; | return FAILED; | ||||
| } | } | ||||
| for (uint32_t i = 0; i < STREAM_SWITCH_INPUT_NUM; ++i) { | for (uint32_t i = 0; i < STREAM_SWITCH_INPUT_NUM; ++i) { | ||||
| string input_tensor_name = op_desc->GetInputNameByIndex(i); | |||||
| string input_tensor_name = op_desc->GetName() + std::to_string(i); | |||||
| int64_t fixed_addr_offset = davinci_model->GetFixedAddrsSize(input_tensor_name); | int64_t fixed_addr_offset = davinci_model->GetFixedAddrsSize(input_tensor_name); | ||||
| fixed_addr_offset_.emplace_back(fixed_addr_offset); | fixed_addr_offset_.emplace_back(fixed_addr_offset); | ||||
| auto tensor_desc = op_desc->GetInputDesc(i); | auto tensor_desc = op_desc->GetInputDesc(i); | ||||
| @@ -25,7 +25,17 @@ | |||||
| namespace ge { | namespace ge { | ||||
| Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { | Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { | ||||
| GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
| if (graph->GetGraphUnknownFlag()) { | if (graph->GetGraphUnknownFlag()) { | ||||
| // insert memcpyasync node when parent is unknown graph | |||||
| for (const auto &node : graph->GetAllNodes()) { | |||||
| if (node->GetType() == STREAMSWITCH) { | |||||
| auto sub_graph = node->GetOwnerComputeGraph(); | |||||
| if (sub_graph != nullptr && !sub_graph->GetGraphUnknownFlag()) { | |||||
| GE_CHK_STATUS_RET(AddMemcpyAsyncNode(node), "Add memcpyasync node failed in known subgraph."); | |||||
| } | |||||
| } | |||||
| } | |||||
| GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str()); | GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str()); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -63,6 +73,26 @@ Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status MemcpyAddrAsyncPass::AddMemcpyAsyncNode(const NodePtr &node) { | |||||
| GELOGI("Start add memcpyasync node in front of node %s", node->GetName().c_str()); | |||||
| auto sub_graph = node->GetOwnerComputeGraph(); | |||||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
| OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||||
| auto memcpy_async_node = CreateMemcpyAsyncNode(sub_graph, peer_out_anchor, node); | |||||
| if (memcpy_async_node == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "Create memcpyasync node failed."); | |||||
| return INTERNAL_ERROR; | |||||
| } | |||||
| Status ret = InsertMemcpyAddrAsyncNode(peer_out_anchor, in_data_anchor, memcpy_async_node); | |||||
| if (ret != SUCCESS) { | |||||
| GELOGE(ret, "Insert memcpyasync node failed."); | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| Status MemcpyAddrAsyncPass::AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const NodePtr &node) { | Status MemcpyAddrAsyncPass::AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const NodePtr &node) { | ||||
| GELOGI("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str()); | GELOGI("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str()); | ||||
| for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { | ||||
| @@ -256,6 +286,35 @@ NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &gr | |||||
| return memcpy_addr_async_node; | return memcpy_addr_async_node; | ||||
| } | } | ||||
| // create memcpy async node for known sub graph | |||||
| NodePtr MemcpyAddrAsyncPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, | |||||
| const OutDataAnchorPtr &out_data_anchor, | |||||
| const NodePtr &out_of_user_data) { | |||||
| GELOGD("Start CreateMemcpyAsyncNode."); | |||||
| static uint32_t new_node_index = 0; | |||||
| OpDescPtr pre_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| GE_CHK_BOOL_EXEC(pre_op_desc != nullptr, return nullptr, "Op_desc of pre node is invalid."); | |||||
| string node_name = pre_op_desc->GetName() + "_" + MEMCPYASYNC + "_" + std::to_string(new_node_index++); | |||||
| OpDescPtr op_desc = MakeShared<OpDesc>(node_name, MEMCPYASYNC); | |||||
| GE_CHECK_NOTNULL_EXEC(op_desc, return nullptr); | |||||
| if (op_desc->AddInputDesc(pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Add memcpyasync input desc failed."); | |||||
| return nullptr; | |||||
| } | |||||
| if (op_desc->AddOutputDesc(pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())) != GRAPH_SUCCESS) { | |||||
| GELOGE(INTERNAL_ERROR, "Add memcpyasync output desc failed."); | |||||
| return nullptr; | |||||
| } | |||||
| NodePtr memcpy_async_node = graph->AddNode(op_desc); | |||||
| GE_CHECK_NOTNULL_EXEC(memcpy_async_node, return nullptr); | |||||
| return memcpy_async_node; | |||||
| } | |||||
| Status MemcpyAddrAsyncPass::InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &out_anchor, | Status MemcpyAddrAsyncPass::InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &out_anchor, | ||||
| const InDataAnchorPtr &in_anchor, const NodePtr &node) { | const InDataAnchorPtr &in_anchor, const NodePtr &node) { | ||||
| // insert memcpy_addr of each user_data and out_of_user_data | // insert memcpy_addr of each user_data and out_of_user_data | ||||
| @@ -27,6 +27,7 @@ class MemcpyAddrAsyncPass : public GraphPass { | |||||
| private: | private: | ||||
| Status AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const NodePtr &node); | Status AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const NodePtr &node); | ||||
| Status AddMemcpyAsyncNode(const NodePtr &node); | |||||
| void FindUserData(const NodePtr &node, uint32_t &parent_index); | void FindUserData(const NodePtr &node, uint32_t &parent_index); | ||||
| void FindUserDataForKnown(const NodePtr &parent_node, uint32_t &parent_index); | void FindUserDataForKnown(const NodePtr &parent_node, uint32_t &parent_index); | ||||
| void FindUserDataForNonDynamic(const ge::NodePtr &parent_node, uint32_t &parent_index); | void FindUserDataForNonDynamic(const ge::NodePtr &parent_node, uint32_t &parent_index); | ||||
| @@ -34,6 +35,8 @@ class MemcpyAddrAsyncPass : public GraphPass { | |||||
| NodePtr CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, | NodePtr CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, | ||||
| const NodePtr &out_of_user_data); | const NodePtr &out_of_user_data); | ||||
| NodePtr CreateMemcpyAsyncNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_data_anchor, | |||||
| const NodePtr &out_of_user_data); | |||||
| Status InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &out_anchor, const InDataAnchorPtr &in_anchor, | Status InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &out_anchor, const InDataAnchorPtr &in_anchor, | ||||
| const NodePtr &node); | const NodePtr &node); | ||||
| Status InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeGraphPtr &graph, const NodePtr &node); | Status InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeGraphPtr &graph, const NodePtr &node); | ||||