From ec7bb516652e0d3f631bbf48586f6f0e6168a507 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 6 May 2021 20:19:07 +0800 Subject: [PATCH 1/7] MemcpyAsync in aicore executor. --- .../node_executor/aicore/aicore_op_task.cc | 13 ++++++------- tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 8bb871fb..36f65bbe 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -354,8 +354,6 @@ Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { auto node = context.GetNodeItem().node; GE_CHECK_NOTNULL(node); - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); OpRunInfo tiling_info; @@ -370,12 +368,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { // update op args by tiling info block_dim_ = static_cast(tiling_info.block_dim); - op_desc->SetWorkspaceBytes(tiling_info.workspaces); clear_atomic_ = tiling_info.clear_atomic; - tiling_data_ = tiling_info.tiling_data.str(); tiling_key_ = tiling_info.tiling_key; GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); + + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + op_desc->SetWorkspaceBytes(tiling_info.workspaces); if (tiling_data_.empty()) { GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); return SUCCESS; @@ -401,9 +401,8 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { } RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] Start"); - GE_CHK_RT_RET(rtMemcpy(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), - tiling_data_.c_str(), tiling_data_.size(), - RT_MEMCPY_HOST_TO_DEVICE)); + GE_CHK_RT_RET(rtMemcpyAsync(tiling_buffer_->GetData(), tiling_buffer_->GetSize(), tiling_data_.c_str(), + tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, context.GetStream())); RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CopyTilingInfo] End"); GELOGD("[%s] Done updating tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index b5aac527..4eae475d 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -111,14 +111,26 @@ TEST_F(UtestGeHybrid, aicore_op_task_init_success) { TEST_F(UtestGeHybrid, task_update_tiling_info) { auto aicore_task = std::unique_ptr(new(std::nothrow)hybrid::AiCoreOpTask()); - aicore_task->is_single_op_ = true; auto graph = make_shared("graph"); OpDescPtr op_desc = CreateOpDesc("Add", "Add"); ge::AttrUtils::SetStr(op_desc, "compile_info_key", "key"); ge::AttrUtils::SetStr(op_desc, "compile_info_json", "json"); + ge::AttrUtils::SetBool(op_desc, "support_dynamicshape", true); + ge::AttrUtils::SetInt(op_desc, "op_para_size", 1); auto node = graph->AddNode(op_desc); - optiling::OpRunInfo tiling_info; - ASSERT_EQ(aicore_task->CalcTilingInfo(node, tiling_info), SUCCESS); + + std::unique_ptr node_item; + NodeItem::Create(node, node_item); + node_item->input_start = 0; + node_item->output_start = 0; + + GraphExecutionContext execution_context; + SubgraphContext subgraph_context(nullptr, &execution_context); + NodeState node_state(*node_item, &subgraph_context); + auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); + ASSERT_TRUE(task_context != nullptr); + ASSERT_EQ(aicore_task->InitTilingInfo(*op_desc), SUCCESS); + ASSERT_EQ(aicore_task->UpdateTilingInfo(*task_context), SUCCESS); } TEST_F(UtestGeHybrid, index_taskdefs_failed) { From a48a3fa01c13805616085c40200372f05cdff97b Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 6 May 2021 20:39:31 +0800 Subject: [PATCH 2/7] MemcpyAsync in aicore executor. --- ge/graph/passes/reshape_recovery_pass.cc | 8 +++++++- ge/hybrid/node_executor/aicore/aicore_op_task.cc | 10 +++++----- .../ge/graph/passes/reshape_recovery_pass_unittest.cc | 8 ++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc index 7a9d085b..ba12ba15 100644 --- a/ge/graph/passes/reshape_recovery_pass.cc +++ b/ge/graph/passes/reshape_recovery_pass.cc @@ -60,7 +60,7 @@ Status InsertReshapeIfNeed(const NodePtr &node) { node->GetName().c_str(), src_anchor->GetIdx(), dst_node->GetName().c_str(), dst_anchor->GetIdx()); GE_CHECK_NOTNULL(dst_node); GE_CHECK_NOTNULL(dst_node->GetOpDesc()); - auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); + auto dst_tensor = dst_node->GetOpDesc()->MutableInputDesc(dst_anchor->GetIdx()); GE_CHECK_NOTNULL(dst_tensor); bool is_dynamic = false; const auto &src_tensor_dims = src_tensor->GetShape().GetDims(); @@ -71,6 +71,12 @@ Status InsertReshapeIfNeed(const NodePtr &node) { dst_node->GetName().c_str()); is_dynamic = true; } + if (dst_node->GetType() == NETOUTPUT && is_dynamic) { + // NetOutput shape must be continuous when dynamic shape. + // Otherwise, there may be an error waiting for the shape refresh to time out during execution. + dst_tensor->SetShape(src_tensor->GetShape()); + continue; + } bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims && !is_dynamic; if (is_need_insert_reshape) { diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 36f65bbe..68fbf93b 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -354,6 +354,8 @@ Status AiCoreOpTask::PrepareWithShape(TaskContext &context) { Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { auto node = context.GetNodeItem().node; GE_CHECK_NOTNULL(node); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); OpRunInfo tiling_info; @@ -368,16 +370,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { // update op args by tiling info block_dim_ = static_cast(tiling_info.block_dim); + op_desc->SetWorkspaceBytes(tiling_info.workspaces); clear_atomic_ = tiling_info.clear_atomic; + tiling_data_ = tiling_info.tiling_data.str(); tiling_key_ = tiling_info.tiling_key; GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); - - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - op_desc->SetWorkspaceBytes(tiling_info.workspaces); if (tiling_data_.empty()) { - GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); + GELOGD("[%s] Tiling data is empty.", op_desc->GsetName().c_str()); return SUCCESS; } if (tiling_buffer_ == nullptr) { diff --git a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc index af60021c..3be11452 100644 --- a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc @@ -42,8 +42,8 @@ ut::GraphBuilder Graph1Builder() { auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1}); auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224}); auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); - auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); - auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0); + auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {-1, 224}); + auto netoutput1 = builder.AddNode("netoutput1", "NetOutput", 2, 0); builder.AddDataEdge(var1, 0, transdata1, 0); builder.AddDataEdge(const1, 0, transdata2, 0); @@ -58,10 +58,10 @@ TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) { auto builder = Graph1Builder(); auto graph = builder.GetGraph(); ReshapeRecoveryPass reshape_recovery_pass; - EXPECT_EQ(graph->GetDirectNodesSize(),5); + EXPECT_EQ(graph->GetDirectNodesSize(), 5); Status ret = reshape_recovery_pass.Run(graph); EXPECT_EQ(ret, SUCCESS); - EXPECT_EQ(graph->GetDirectNodesSize(),8); + EXPECT_EQ(graph->GetDirectNodesSize(), 7); auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0"); EXPECT_NE(reshape1, nullptr); From 5f63b5ee912e9cd3146987d9dfce50c55ac763f3 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 6 May 2021 20:41:07 +0800 Subject: [PATCH 3/7] MemcpyAsync in aicore executor. --- ge/hybrid/node_executor/aicore/aicore_op_task.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 68fbf93b..0fcc6299 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -377,7 +377,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { tiling_key_ = tiling_info.tiling_key; GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); if (tiling_data_.empty()) { - GELOGD("[%s] Tiling data is empty.", op_desc->GsetName().c_str()); + GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); return SUCCESS; } if (tiling_buffer_ == nullptr) { From e93b37621f9331ab1200144e613efd5cdb7824dc Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Fri, 7 May 2021 15:39:08 +0800 Subject: [PATCH 4/7] Optimize performance of single_op executor. --- ge/single_op/single_op.cc | 32 +++++++++++++++++++ ge/single_op/single_op.h | 3 ++ ge/single_op/single_op_model.cc | 32 +++++++++++++++++++ ge/single_op/single_op_model.h | 2 ++ .../ge/single_op/single_op_model_unittest.cc | 17 ++++++++++ tests/ut/ge/single_op/single_op_unittest.cc | 21 +++++++++++- 6 files changed, 106 insertions(+), 1 deletion(-) diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 4b3f17cf..e326f3e0 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -361,6 +361,37 @@ Status DynamicSingleOp::SetHostTensorValue(const std::vector &input_desc, + const vector &input_buffers) { + for (auto &tensor_map : tensor_with_hostmem_) { + auto index = tensor_map.first; + if (index >= input_desc.size() || index >= input_buffers.size()) { + GELOGE(INTERNAL_ERROR, "[Check][Size]Index %d should smaller then input desc size %zu " + "and input buffers size %zu.", index, input_desc.size(), input_buffers.size()); + return INTERNAL_ERROR; + } + auto ge_tensor_desc = input_desc[index]; + // reconstruct GeTensor by DataBuffer + GeTensorPtr ge_tensor = MakeShared(ge_tensor_desc); + GE_CHECK_NOTNULL(ge_tensor); + GELOGD("The %d tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", + index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); + if (ge_tensor->SetData(reinterpret_cast(input_buffers[index].data), + static_cast(input_buffers[index].length)) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor."); + return INTERNAL_ERROR; + } + for (auto &tensor_desc : tensor_map.second) { + GE_CHECK_NOTNULL(tensor_desc); + if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) { + GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE."); + return FAILED; + } + } + } + return SUCCESS; +} + Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, vector &output_desc, @@ -374,6 +405,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, if (!inputs_size.empty()) { StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); + GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers)); } if (hybrid_model_executor_ != nullptr) { diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index 01d6dfc0..deb4532e 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -81,9 +81,12 @@ class DynamicSingleOp { std::vector &outputs) const; Status SetHostTensorValue(const std::vector> &inputs_size, const vector &input_desc, const std::vector &input_buffers); + Status SetHostTensorValue(const vector &input_desc, const vector &input_buffers); std::unique_ptr op_task_; std::unique_ptr hybrid_model_; std::unique_ptr hybrid_model_executor_; + std::map> tensor_with_hostmem_; + uintptr_t resource_id_ = 0; std::mutex *stream_mutex_; rtStream_t stream_ = nullptr; diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index a4135999..6959c6b3 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -235,6 +235,13 @@ Status SingleOpModel::LoadAllNodes() { if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) { data_ops_.emplace_back(op_desc); + auto tensor = op_desc->MutableInputDesc(0); + if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) { + int32_t index = 0; + (void) AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index); + GELOGD("Node %s, index %d, has host mem.", node->GetName().c_str(), index); + op_with_hostmem_[index] = node; + } continue; } @@ -616,6 +623,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & if (need_hybrid_model) { GELOGD("Build single op HybridModel."); GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); + GE_CHK_STATUS(SetHostMemTensor(single_op), "[Init][HostMem]Failed."); auto root_model = model_helper_.GetGeRootModel(); GE_CHECK_NOTNULL(root_model); root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); @@ -634,4 +642,28 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & } return BuildTaskListForDynamicOp(&resource, single_op); } + +Status SingleOpModel::SetHostMemTensor(DynamicSingleOp &single_op) { + for (auto &node_map : op_with_hostmem_) { + auto node = node_map.second; + auto out_anchor = node->GetOutDataAnchor(0); + GE_CHECK_NOTNULL(out_anchor); + auto in_anchors = out_anchor->GetPeerInDataAnchors(); + vector tensor_descs; + auto idx = node_map.first; + for (auto anchor : in_anchors) { + GE_CHECK_NOTNULL(anchor); + auto output_node = anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(output_node); + auto op_desc = output_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + auto tensor_desc = op_desc->MutableInputDesc(anchor->GetIdx()); + tensor_descs.emplace_back(tensor_desc); + GELOGD("Get %d th input tensor desc of %s by %d data node: %s.", anchor->GetIdx(), + output_node->GetName().c_str(), idx, node->GetName().c_str()); + } + single_op.tensor_with_hostmem_[idx] = tensor_descs; + } + return SUCCESS; +} } // namespace ge diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index d900f09f..e7d07ee0 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -77,6 +77,7 @@ class SingleOpModel { static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); void ParseArgTable(OpTask *task, SingleOp &op); Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); + Status SetHostMemTensor(DynamicSingleOp &single_op); std::string model_name_; uint32_t model_id_ = 0; @@ -86,6 +87,7 @@ class SingleOpModel { ModelHelper model_helper_; map op_list_; + map op_with_hostmem_; SingleOpModelParam model_params_; std::vector input_offset_list_; diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index dadabaf6..f5d1a83c 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -27,6 +27,7 @@ #include "single_op/task/tbe_task_builder.h" #undef private #undef protected +#include "graph/passes/graph_builder_utils.h" using namespace std; using namespace testing; @@ -223,3 +224,19 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { model.BuildDynamicOp(res, dynamic_single_op); } +TEST_F(UtestSingleOpModel, test_host_mem) { + string model_data_str = "123456789"; + SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); + + // make graph + ut::GraphBuilder builder = ut::GraphBuilder("graph"); + auto data = builder.AddNode("Data", "Data", 0, 1); + auto netoutput = builder.AddNode("Netoutput", "NetOutput", 1, 0); + builder.AddDataEdge(data, 0, netoutput, 0); + auto graph = builder.GetGraph(); + model.op_with_hostmem_[0] = data; + + std::mutex stream_mu_; + DynamicSingleOp single_op(0, &stream_mu_, nullptr); + ASSERT_EQ(model.SetHostMemTensor(single_op), SUCCESS); +} diff --git a/tests/ut/ge/single_op/single_op_unittest.cc b/tests/ut/ge/single_op/single_op_unittest.cc index 8c2f6e51..3519811b 100644 --- a/tests/ut/ge/single_op/single_op_unittest.cc +++ b/tests/ut/ge/single_op/single_op_unittest.cc @@ -160,4 +160,23 @@ TEST_F(UtestSingleOp, test_singleop_execute_async2) { EXPECT_EQ(single_op.running_param_->mem_base, nullptr); EXPECT_EQ(single_op.tasks_.size(), 0); EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); -} \ No newline at end of file +} + +TEST_F(UtestSingleOp, test_set_host_mem) { + std::mutex stream_mu_; + DynamicSingleOp single_op(0, &stream_mu_, nullptr); + + vector input_buffers; + DataBuffer data_buffer; + input_buffers.emplace_back(data_buffer); + + vector input_descs; + GeTensorDesc tensor_desc1; + input_descs.emplace_back(tensor_desc1); + + vector op_input_descs; + auto tensor_desc2 = std::make_shared(); + op_input_descs.emplace_back(tensor_desc2); + single_op.tensor_with_hostmem_[0] = op_input_descs; + EXPECT_EQ(single_op.SetHostTensorValue(input_descs, input_buffers), SUCCESS); +} From 3ace2a36b6f927c3cd7a496d6d9bece44584be38 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Fri, 7 May 2021 16:07:23 +0800 Subject: [PATCH 5/7] Optimize performance of single_op executor. --- ge/single_op/single_op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index e326f3e0..36ca1850 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -364,9 +364,9 @@ Status DynamicSingleOp::SetHostTensorValue(const std::vector &input_desc, const vector &input_buffers) { for (auto &tensor_map : tensor_with_hostmem_) { - auto index = tensor_map.first; + auto index = static_cast(tensor_map.first); if (index >= input_desc.size() || index >= input_buffers.size()) { - GELOGE(INTERNAL_ERROR, "[Check][Size]Index %d should smaller then input desc size %zu " + GELOGE(INTERNAL_ERROR, "[Check][Size]Index %zu should smaller then input desc size %zu " "and input buffers size %zu.", index, input_desc.size(), input_buffers.size()); return INTERNAL_ERROR; } @@ -374,7 +374,7 @@ Status DynamicSingleOp::SetHostTensorValue(const vector &input_des // reconstruct GeTensor by DataBuffer GeTensorPtr ge_tensor = MakeShared(ge_tensor_desc); GE_CHECK_NOTNULL(ge_tensor); - GELOGD("The %d tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", + GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); if (ge_tensor->SetData(reinterpret_cast(input_buffers[index].data), static_cast(input_buffers[index].length)) != SUCCESS) { From ef1ed8d6accab27943dab84ccfd506e87ea3091a Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Sat, 8 May 2021 09:55:13 +0800 Subject: [PATCH 6/7] Optimize performance of single_op executor. --- ge/hybrid/model/hybrid_model_builder.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index a047a05b..7949ae7f 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -364,6 +364,16 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s const auto &src_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(src_node); auto src_node_item = MutableNodeItem(src_node); + GE_CHECK_NOTNULL(src_node_item); + if (src_node_item->NodeType() == DATA) { + auto op_desc = src_node_item->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + auto tensor = op_desc->MutableInputDesc(0); + if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) { + GELOGD("Skip d2h memcpy, get hostmem from node %s.", src_node_item->NodeName().c_str()); + continue; + } + } src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); dependent_for_shape_inference.emplace(src_node); host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item); From a933ea880c8b047755915c5bb8a0c15eb6d510d4 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Mon, 10 May 2021 09:40:51 +0800 Subject: [PATCH 7/7] Optimize performance of single_op executor. --- ge/hybrid/model/hybrid_model_builder.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 7949ae7f..a047a05b 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -364,16 +364,6 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s const auto &src_node = peer_out_anchor->GetOwnerNode(); GE_CHECK_NOTNULL(src_node); auto src_node_item = MutableNodeItem(src_node); - GE_CHECK_NOTNULL(src_node_item); - if (src_node_item->NodeType() == DATA) { - auto op_desc = src_node_item->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - auto tensor = op_desc->MutableInputDesc(0); - if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) { - GELOGD("Skip d2h memcpy, get hostmem from node %s.", src_node_item->NodeName().c_str()); - continue; - } - } src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); dependent_for_shape_inference.emplace(src_node); host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item);