Browse Source

Optimize performance of single_op executor.

tags/v1.3.0
zhaozhixuan 3 years ago
parent
commit
e93b37621f
6 changed files with 106 additions and 1 deletions
  1. +32
    -0
      ge/single_op/single_op.cc
  2. +3
    -0
      ge/single_op/single_op.h
  3. +32
    -0
      ge/single_op/single_op_model.cc
  4. +2
    -0
      ge/single_op/single_op_model.h
  5. +17
    -0
      tests/ut/ge/single_op/single_op_model_unittest.cc
  6. +20
    -1
      tests/ut/ge/single_op/single_op_unittest.cc

+ 32
- 0
ge/single_op/single_op.cc View File

@@ -361,6 +361,37 @@ Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, u
return SUCCESS; return SUCCESS;
} }


Status DynamicSingleOp::SetHostTensorValue(const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &input_buffers) {
for (auto &tensor_map : tensor_with_hostmem_) {
auto index = tensor_map.first;
if (index >= input_desc.size() || index >= input_buffers.size()) {
GELOGE(INTERNAL_ERROR, "[Check][Size]Index %d should smaller then input desc size %zu "
"and input buffers size %zu.", index, input_desc.size(), input_buffers.size());
return INTERNAL_ERROR;
}
auto ge_tensor_desc = input_desc[index];
// reconstruct GeTensor by DataBuffer
GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc);
GE_CHECK_NOTNULL(ge_tensor);
GELOGD("The %d tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor.");
return INTERNAL_ERROR;
}
for (auto &tensor_desc : tensor_map.second) {
GE_CHECK_NOTNULL(tensor_desc);
if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) {
GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE.");
return FAILED;
}
}
}
return SUCCESS;
}

Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &input_buffers, const vector<DataBuffer> &input_buffers,
vector<GeTensorDesc> &output_desc, vector<GeTensorDesc> &output_desc,
@@ -374,6 +405,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
if (!inputs_size.empty()) { if (!inputs_size.empty()) {
StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_);
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers));
GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers));
} }


if (hybrid_model_executor_ != nullptr) { if (hybrid_model_executor_ != nullptr) {


+ 3
- 0
ge/single_op/single_op.h View File

@@ -81,9 +81,12 @@ class DynamicSingleOp {
std::vector<DataBuffer> &outputs) const; std::vector<DataBuffer> &outputs) const;
Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers); const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers);
Status SetHostTensorValue(const vector<GeTensorDesc> &input_desc, const vector<DataBuffer> &input_buffers);
std::unique_ptr<OpTask> op_task_; std::unique_ptr<OpTask> op_task_;
std::unique_ptr<hybrid::HybridModel> hybrid_model_; std::unique_ptr<hybrid::HybridModel> hybrid_model_;
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
std::map<int32_t, std::vector<GeTensorDescPtr>> tensor_with_hostmem_;

uintptr_t resource_id_ = 0; uintptr_t resource_id_ = 0;
std::mutex *stream_mutex_; std::mutex *stream_mutex_;
rtStream_t stream_ = nullptr; rtStream_t stream_ = nullptr;


+ 32
- 0
ge/single_op/single_op_model.cc View File

@@ -235,6 +235,13 @@ Status SingleOpModel::LoadAllNodes() {


if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) { if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) {
data_ops_.emplace_back(op_desc); data_ops_.emplace_back(op_desc);
auto tensor = op_desc->MutableInputDesc(0);
if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
int32_t index = 0;
(void) AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index);
GELOGD("Node %s, index %d, has host mem.", node->GetName().c_str(), index);
op_with_hostmem_[index] = node;
}
continue; continue;
} }


@@ -616,6 +623,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
if (need_hybrid_model) { if (need_hybrid_model) {
GELOGD("Build single op HybridModel."); GELOGD("Build single op HybridModel.");
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
GE_CHK_STATUS(SetHostMemTensor(single_op), "[Init][HostMem]Failed.");
auto root_model = model_helper_.GetGeRootModel(); auto root_model = model_helper_.GetGeRootModel();
GE_CHECK_NOTNULL(root_model); GE_CHECK_NOTNULL(root_model);
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
@@ -634,4 +642,28 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
} }
return BuildTaskListForDynamicOp(&resource, single_op); return BuildTaskListForDynamicOp(&resource, single_op);
} }

Status SingleOpModel::SetHostMemTensor(DynamicSingleOp &single_op) {
for (auto &node_map : op_with_hostmem_) {
auto node = node_map.second;
auto out_anchor = node->GetOutDataAnchor(0);
GE_CHECK_NOTNULL(out_anchor);
auto in_anchors = out_anchor->GetPeerInDataAnchors();
vector<GeTensorDescPtr> tensor_descs;
auto idx = node_map.first;
for (auto anchor : in_anchors) {
GE_CHECK_NOTNULL(anchor);
auto output_node = anchor->GetOwnerNode();
GE_CHECK_NOTNULL(output_node);
auto op_desc = output_node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
auto tensor_desc = op_desc->MutableInputDesc(anchor->GetIdx());
tensor_descs.emplace_back(tensor_desc);
GELOGD("Get %d th input tensor desc of %s by %d data node: %s.", anchor->GetIdx(),
output_node->GetName().c_str(), idx, node->GetName().c_str());
}
single_op.tensor_with_hostmem_[idx] = tensor_descs;
}
return SUCCESS;
}
} // namespace ge } // namespace ge

+ 2
- 0
ge/single_op/single_op_model.h View File

@@ -77,6 +77,7 @@ class SingleOpModel {
static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param); static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);
void ParseArgTable(OpTask *task, SingleOp &op); void ParseArgTable(OpTask *task, SingleOp &op);
Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op);
Status SetHostMemTensor(DynamicSingleOp &single_op);


std::string model_name_; std::string model_name_;
uint32_t model_id_ = 0; uint32_t model_id_ = 0;
@@ -86,6 +87,7 @@ class SingleOpModel {
ModelHelper model_helper_; ModelHelper model_helper_;


map<uint32_t, NodePtr> op_list_; map<uint32_t, NodePtr> op_list_;
map<int32_t, NodePtr> op_with_hostmem_;
SingleOpModelParam model_params_; SingleOpModelParam model_params_;


std::vector<ptrdiff_t> input_offset_list_; std::vector<ptrdiff_t> input_offset_list_;


+ 17
- 0
tests/ut/ge/single_op/single_op_model_unittest.cc View File

@@ -27,6 +27,7 @@
#include "single_op/task/tbe_task_builder.h" #include "single_op/task/tbe_task_builder.h"
#undef private #undef private
#undef protected #undef protected
#include "graph/passes/graph_builder_utils.h"


using namespace std; using namespace std;
using namespace testing; using namespace testing;
@@ -223,3 +224,19 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) {
model.BuildDynamicOp(res, dynamic_single_op); model.BuildDynamicOp(res, dynamic_single_op);
} }


TEST_F(UtestSingleOpModel, test_host_mem) {
string model_data_str = "123456789";
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size());

// make graph
ut::GraphBuilder builder = ut::GraphBuilder("graph");
auto data = builder.AddNode("Data", "Data", 0, 1);
auto netoutput = builder.AddNode("Netoutput", "NetOutput", 1, 0);
builder.AddDataEdge(data, 0, netoutput, 0);
auto graph = builder.GetGraph();
model.op_with_hostmem_[0] = data;

std::mutex stream_mu_;
DynamicSingleOp single_op(0, &stream_mu_, nullptr);
ASSERT_EQ(model.SetHostMemTensor(single_op), SUCCESS);
}

+ 20
- 1
tests/ut/ge/single_op/single_op_unittest.cc View File

@@ -160,4 +160,23 @@ TEST_F(UtestSingleOp, test_singleop_execute_async2) {
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); EXPECT_EQ(single_op.running_param_->mem_base, nullptr);
EXPECT_EQ(single_op.tasks_.size(), 0); EXPECT_EQ(single_op.tasks_.size(), 0);
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID);
}
}

TEST_F(UtestSingleOp, test_set_host_mem) {
std::mutex stream_mu_;
DynamicSingleOp single_op(0, &stream_mu_, nullptr);
vector<DataBuffer> input_buffers;
DataBuffer data_buffer;
input_buffers.emplace_back(data_buffer);

vector<GeTensorDesc> input_descs;
GeTensorDesc tensor_desc1;
input_descs.emplace_back(tensor_desc1);

vector<GeTensorDescPtr> op_input_descs;
auto tensor_desc2 = std::make_shared<GeTensorDesc>();
op_input_descs.emplace_back(tensor_desc2);
single_op.tensor_with_hostmem_[0] = op_input_descs;
EXPECT_EQ(single_op.SetHostTensorValue(input_descs, input_buffers), SUCCESS);
}

Loading…
Cancel
Save