@@ -361,6 +361,37 @@ Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, u | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DynamicSingleOp::SetHostTensorValue(const vector<GeTensorDesc> &input_desc, | |||||
const vector<DataBuffer> &input_buffers) { | |||||
for (auto &tensor_map : tensor_with_hostmem_) { | |||||
auto index = tensor_map.first; | |||||
if (index >= input_desc.size() || index >= input_buffers.size()) { | |||||
GELOGE(INTERNAL_ERROR, "[Check][Size]Index %d should smaller then input desc size %zu " | |||||
"and input buffers size %zu.", index, input_desc.size(), input_buffers.size()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto ge_tensor_desc = input_desc[index]; | |||||
// reconstruct GeTensor by DataBuffer | |||||
GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc); | |||||
GE_CHECK_NOTNULL(ge_tensor); | |||||
GELOGD("The %d tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", | |||||
index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); | |||||
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data), | |||||
static_cast<size_t>(input_buffers[index].length)) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor."); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
for (auto &tensor_desc : tensor_map.second) { | |||||
GE_CHECK_NOTNULL(tensor_desc); | |||||
if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) { | |||||
GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE."); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | ||||
const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
@@ -374,6 +405,7 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
if (!inputs_size.empty()) { | if (!inputs_size.empty()) { | ||||
StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); | ||||
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); | GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); | ||||
GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(input_desc, input_buffers)); | |||||
} | } | ||||
if (hybrid_model_executor_ != nullptr) { | if (hybrid_model_executor_ != nullptr) { | ||||
@@ -81,9 +81,12 @@ class DynamicSingleOp { | |||||
std::vector<DataBuffer> &outputs) const; | std::vector<DataBuffer> &outputs) const; | ||||
Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size, | ||||
const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers); | const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers); | ||||
Status SetHostTensorValue(const vector<GeTensorDesc> &input_desc, const vector<DataBuffer> &input_buffers); | |||||
std::unique_ptr<OpTask> op_task_; | std::unique_ptr<OpTask> op_task_; | ||||
std::unique_ptr<hybrid::HybridModel> hybrid_model_; | std::unique_ptr<hybrid::HybridModel> hybrid_model_; | ||||
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | ||||
std::map<int32_t, std::vector<GeTensorDescPtr>> tensor_with_hostmem_; | |||||
uintptr_t resource_id_ = 0; | uintptr_t resource_id_ = 0; | ||||
std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
@@ -235,6 +235,13 @@ Status SingleOpModel::LoadAllNodes() { | |||||
if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) { | if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) { | ||||
data_ops_.emplace_back(op_desc); | data_ops_.emplace_back(op_desc); | ||||
auto tensor = op_desc->MutableInputDesc(0); | |||||
if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) { | |||||
int32_t index = 0; | |||||
(void) AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index); | |||||
GELOGD("Node %s, index %d, has host mem.", node->GetName().c_str(), index); | |||||
op_with_hostmem_[index] = node; | |||||
} | |||||
continue; | continue; | ||||
} | } | ||||
@@ -616,6 +623,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
if (need_hybrid_model) { | if (need_hybrid_model) { | ||||
GELOGD("Build single op HybridModel."); | GELOGD("Build single op HybridModel."); | ||||
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | ||||
GE_CHK_STATUS(SetHostMemTensor(single_op), "[Init][HostMem]Failed."); | |||||
auto root_model = model_helper_.GetGeRootModel(); | auto root_model = model_helper_.GetGeRootModel(); | ||||
GE_CHECK_NOTNULL(root_model); | GE_CHECK_NOTNULL(root_model); | ||||
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | ||||
@@ -634,4 +642,28 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
} | } | ||||
return BuildTaskListForDynamicOp(&resource, single_op); | return BuildTaskListForDynamicOp(&resource, single_op); | ||||
} | } | ||||
Status SingleOpModel::SetHostMemTensor(DynamicSingleOp &single_op) { | |||||
for (auto &node_map : op_with_hostmem_) { | |||||
auto node = node_map.second; | |||||
auto out_anchor = node->GetOutDataAnchor(0); | |||||
GE_CHECK_NOTNULL(out_anchor); | |||||
auto in_anchors = out_anchor->GetPeerInDataAnchors(); | |||||
vector<GeTensorDescPtr> tensor_descs; | |||||
auto idx = node_map.first; | |||||
for (auto anchor : in_anchors) { | |||||
GE_CHECK_NOTNULL(anchor); | |||||
auto output_node = anchor->GetOwnerNode(); | |||||
GE_CHECK_NOTNULL(output_node); | |||||
auto op_desc = output_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
auto tensor_desc = op_desc->MutableInputDesc(anchor->GetIdx()); | |||||
tensor_descs.emplace_back(tensor_desc); | |||||
GELOGD("Get %d th input tensor desc of %s by %d data node: %s.", anchor->GetIdx(), | |||||
output_node->GetName().c_str(), idx, node->GetName().c_str()); | |||||
} | |||||
single_op.tensor_with_hostmem_[idx] = tensor_descs; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -77,6 +77,7 @@ class SingleOpModel { | |||||
static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | ||||
void ParseArgTable(OpTask *task, SingleOp &op); | void ParseArgTable(OpTask *task, SingleOp &op); | ||||
Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); | ||||
Status SetHostMemTensor(DynamicSingleOp &single_op); | |||||
std::string model_name_; | std::string model_name_; | ||||
uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
@@ -86,6 +87,7 @@ class SingleOpModel { | |||||
ModelHelper model_helper_; | ModelHelper model_helper_; | ||||
map<uint32_t, NodePtr> op_list_; | map<uint32_t, NodePtr> op_list_; | ||||
map<int32_t, NodePtr> op_with_hostmem_; | |||||
SingleOpModelParam model_params_; | SingleOpModelParam model_params_; | ||||
std::vector<ptrdiff_t> input_offset_list_; | std::vector<ptrdiff_t> input_offset_list_; | ||||
@@ -27,6 +27,7 @@ | |||||
#include "single_op/task/tbe_task_builder.h" | #include "single_op/task/tbe_task_builder.h" | ||||
#undef private | #undef private | ||||
#undef protected | #undef protected | ||||
#include "graph/passes/graph_builder_utils.h" | |||||
using namespace std; | using namespace std; | ||||
using namespace testing; | using namespace testing; | ||||
@@ -223,3 +224,19 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { | |||||
model.BuildDynamicOp(res, dynamic_single_op); | model.BuildDynamicOp(res, dynamic_single_op); | ||||
} | } | ||||
TEST_F(UtestSingleOpModel, test_host_mem) { | |||||
string model_data_str = "123456789"; | |||||
SingleOpModel model("model", model_data_str.c_str(), model_data_str.size()); | |||||
// make graph | |||||
ut::GraphBuilder builder = ut::GraphBuilder("graph"); | |||||
auto data = builder.AddNode("Data", "Data", 0, 1); | |||||
auto netoutput = builder.AddNode("Netoutput", "NetOutput", 1, 0); | |||||
builder.AddDataEdge(data, 0, netoutput, 0); | |||||
auto graph = builder.GetGraph(); | |||||
model.op_with_hostmem_[0] = data; | |||||
std::mutex stream_mu_; | |||||
DynamicSingleOp single_op(0, &stream_mu_, nullptr); | |||||
ASSERT_EQ(model.SetHostMemTensor(single_op), SUCCESS); | |||||
} |
@@ -160,4 +160,23 @@ TEST_F(UtestSingleOp, test_singleop_execute_async2) { | |||||
EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | EXPECT_EQ(single_op.running_param_->mem_base, nullptr); | ||||
EXPECT_EQ(single_op.tasks_.size(), 0); | EXPECT_EQ(single_op.tasks_.size(), 0); | ||||
EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); | EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); | ||||
} | |||||
} | |||||
TEST_F(UtestSingleOp, test_set_host_mem) { | |||||
std::mutex stream_mu_; | |||||
DynamicSingleOp single_op(0, &stream_mu_, nullptr); | |||||
vector<DataBuffer> input_buffers; | |||||
DataBuffer data_buffer; | |||||
input_buffers.emplace_back(data_buffer); | |||||
vector<GeTensorDesc> input_descs; | |||||
GeTensorDesc tensor_desc1; | |||||
input_descs.emplace_back(tensor_desc1); | |||||
vector<GeTensorDescPtr> op_input_descs; | |||||
auto tensor_desc2 = std::make_shared<GeTensorDesc>(); | |||||
op_input_descs.emplace_back(tensor_desc2); | |||||
single_op.tensor_with_hostmem_[0] = op_input_descs; | |||||
EXPECT_EQ(single_op.SetHostTensorValue(input_descs, input_buffers), SUCCESS); | |||||
} |