From: @zhangxiaokun9 Reviewed-by: @xchu42,@wqtshg,@ji_chen Signed-off-by: @ji_chentags/v1.2.0
@@ -163,7 +163,6 @@ DavinciModel::~DavinciModel() { | |||
op_list_.clear(); | |||
data_op_list_.clear(); | |||
output_op_list_.clear(); | |||
tensor_name_to_fixed_addr_size_.clear(); | |||
tensor_name_to_peer_output_index_.clear(); | |||
GE_DELETE_NEW_SINGLE(data_inputer_); | |||
@@ -830,12 +829,11 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
{CASE, &DavinciModel::InitCase}, | |||
}; | |||
GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed."); | |||
vector<OpDescPtr> output_op_list; | |||
map<uint32_t, OpDescPtr> data_by_index; | |||
auto nodes = compute_graph->GetAllNodes(); | |||
const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); | |||
for (size_t i = 0; i < nodes.size(); i++) { | |||
for (size_t i = 0; i < nodes.size(); ++i) { | |||
auto node = nodes.at(i); | |||
auto op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
@@ -850,7 +848,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | |||
if (IsDataOp(op_desc->GetType())) { | |||
if (InitDataOp(node, data_op_index, data_by_index) != SUCCESS) { | |||
if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | |||
return PARAM_INVALID; | |||
} | |||
@@ -859,7 +857,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
} | |||
if (op_desc->GetType() == NETOUTPUT) { | |||
if (InitNetOutput(node) != SUCCESS) { | |||
if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | |||
return PARAM_INVALID; | |||
} | |||
@@ -919,33 +917,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||
} | |||
GE_TIMESTAMP_ADD(InitTbeHandle); | |||
} | |||
AdjustDataOpList(data_by_index); | |||
GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | |||
GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph) { | |||
if (!known_node_) return SUCCESS; | |||
// for dynamic shape | |||
auto direct_nodes = compute_graph->GetDirectNode(); | |||
for (size_t i = 0; i < direct_nodes.size(); i++) { | |||
auto node = direct_nodes.at(i); | |||
auto op_desc = node->GetOpDesc(); | |||
if (op_desc == nullptr) { | |||
GELOGE(PARAM_INVALID, "op_desc is null."); | |||
return PARAM_INVALID; | |||
} | |||
if (IsDataOp(op_desc->GetType())) { | |||
GELOGD("init data op %s", op_desc->GetName().c_str()); | |||
data_op_list_.push_back(op_desc); | |||
} | |||
if (op_desc->GetType() == NETOUTPUT) { | |||
GELOGD("init netouput op %s", op_desc->GetName().c_str()); | |||
output_op_list_.push_back(op_desc); | |||
} | |||
} | |||
return SUCCESS; | |||
return OptInputOutputInfo(data_by_index, output_op_list); | |||
} | |||
void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||
@@ -963,24 +938,35 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||
} | |||
} | |||
/// | |||
/// @ingroup ge | |||
/// @brief Data Op Initialize. | |||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||
/// @param [in] NodePtr: Data Op. | |||
/// @param [in/out] data_op_index: NetOutput addr size info. | |||
/// @param [in/out] data_op_index: index of courrent count. | |||
/// @param [in/out] data_by_index: Data ordered by index. | |||
/// @return Status | |||
Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index) { | |||
/// | |||
Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | |||
map<uint32_t, OpDescPtr> &data_by_index) { | |||
// op_desc Checked by Init: Data, valid. | |||
auto op_desc = node->GetOpDesc(); | |||
if (known_node_) { | |||
if (node->GetOwnerComputeGraph() != graph) { | |||
GELOGI("Skip subgraph Data node: %s.", op_desc->GetName().c_str()); | |||
return SUCCESS; | |||
} | |||
uint32_t parent_index = 0; // Ignore subgraph Data Node. | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||
GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); | |||
return SUCCESS; | |||
GELOGI("Init Data node: %s.", op_desc->GetName().c_str()); | |||
auto data_index = data_op_index++; | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | |||
GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); | |||
} | |||
data_by_index[data_index] = op_desc; | |||
data_op_list_.push_back(op_desc); | |||
if (known_node_) { | |||
return SUCCESS; | |||
} | |||
// Make information for copy input data. | |||
const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc); | |||
@@ -992,10 +978,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size()); | |||
return PARAM_INVALID; | |||
} | |||
auto data_index = data_op_index; | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | |||
GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index); | |||
} | |||
bool fusion_flag = false; | |||
ZeroCopyOffset zero_copy_offset; | |||
int64_t data_size = output_size_list[kDataIndex]; | |||
@@ -1006,7 +989,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
return PARAM_INVALID; | |||
} | |||
new_input_data_info_[data_index] = zero_copy_offset; | |||
data_by_index[data_index] = op_desc; | |||
for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | |||
void *addr = virtual_addr_list.at(index); | |||
@@ -1017,7 +999,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
new_input_outside_addrs_[addr] = zero_copy_offset; | |||
} | |||
data_op_index++; | |||
return SUCCESS; | |||
} | |||
@@ -1025,18 +1006,52 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
/// @ingroup ge | |||
/// @brief Sort Data op list by index. | |||
/// @param [in] data_by_index: map of Data Op. | |||
/// @return | |||
/// @param [in] output_op_list: list of NetOutput op. | |||
/// @return Status | |||
/// | |||
void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index) { | |||
Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, | |||
const vector<OpDescPtr> &output_op_list) { | |||
GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size()); | |||
if (data_by_index.size() != data_op_list_.size()) { | |||
GELOGW("Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); | |||
return; | |||
GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); | |||
return INTERNAL_ERROR; | |||
} | |||
data_op_list_.clear(); | |||
for (auto &item : data_by_index) { | |||
data_op_list_.emplace_back(item.second); | |||
auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||
GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | |||
input_addrs_list_.emplace_back(output_addrs); | |||
if (item.second->GetType() == AIPP_DATA_TYPE) { | |||
GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | |||
is_dynamic_aipp_ = true; | |||
} | |||
} | |||
for (const auto &op_desc : output_op_list) { | |||
auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||
GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); | |||
output_addrs_list_.emplace_back(input_addrs); | |||
bool getnext_sink_dynamic = false; | |||
if (AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { | |||
GELOGI("ATTR_GETNEXT_SINK_DYNMAIC has been set and is true, node: %s", op_desc->GetName().c_str()); | |||
is_getnext_sink_dynamic_ = true; | |||
} | |||
vector<string> shape_info; | |||
if (AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, shape_info)) { | |||
dynamic_output_shape_info_.insert(dynamic_output_shape_info_.end(), shape_info.begin(), shape_info.end()); | |||
} | |||
if (InitOutputTensorInfo(op_desc) != SUCCESS) { | |||
return INTERNAL_ERROR; | |||
} | |||
} | |||
return InitOutputDescInfo(output_op_list, output_descs_, output_formats_); | |||
} | |||
bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||
@@ -1050,24 +1065,27 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||
/// @ingroup ge | |||
/// @brief NetOutput Op Initialize. | |||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||
/// @param [in] NodePtr: NetOutput Op. | |||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | |||
/// @return Status | |||
Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||
Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, | |||
vector<OpDescPtr> &output_op_list) { | |||
// node->GetOpDesc Checked by Init: NetOutput, valid. | |||
auto op_desc = node->GetOpDesc(); | |||
// excludes the function op sub graph, e.g. case,if | |||
if (known_node_) { | |||
if (node->GetOwnerComputeGraph() != graph) { | |||
GELOGI("Skip subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | |||
op_list_.erase(op_desc->GetId()); | |||
return SUCCESS; | |||
} | |||
ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); | |||
GE_CHECK_NOTNULL(owner_graph); | |||
if (owner_graph->GetParentGraph() != nullptr) { | |||
GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | |||
op_list_.erase(op_desc->GetId()); | |||
GELOGI("Init NetOutput node: %s.", op_desc->GetName().c_str()); | |||
output_op_list.push_back(op_desc); | |||
if (known_node_) { | |||
return SUCCESS; | |||
} | |||
output_op_list_.push_back(op_desc); | |||
// Make information for copy output data. | |||
const vector<int64_t> input_size_list = ModelUtils::GetInputSize(op_desc); | |||
const vector<void *> virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||
@@ -1665,32 +1683,30 @@ Status DavinciModel::CpuModelRepeat() { | |||
Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc) { | |||
if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { | |||
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { | |||
GELOGI("data_op_list_ is empty or input_desc size is not 1."); | |||
} else { | |||
std::vector<uint32_t> input_formats; | |||
vector<uint32_t> input_formats; | |||
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed."); | |||
} | |||
std::vector<uint32_t> outputFormats; | |||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get output desc info failed."); | |||
vector<uint32_t> output_formats; | |||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc, | |||
std::vector<uint32_t> &input_formats, | |||
std::vector<uint32_t> &outputFormats) { | |||
if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { | |||
vector<uint32_t> &input_formats, | |||
vector<uint32_t> &output_formats) { | |||
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { | |||
GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); | |||
return FAILED; | |||
} | |||
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | |||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); | |||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); | |||
return SUCCESS; | |||
} | |||
@@ -1828,29 +1844,22 @@ void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynami | |||
dynamic_type = dynamic_type_; | |||
} | |||
void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||
for (auto &op : output_op_list_) { | |||
if (op->GetType() != NETOUTPUT) { | |||
continue; | |||
} | |||
if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | |||
GELOGD("Can not get dynamic output dims attr"); | |||
} | |||
} | |||
void DavinciModel::GetModelAttr(vector<string> &out_shape_info) { | |||
out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); | |||
} | |||
Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc, | |||
std::vector<uint32_t> &input_formats, | |||
std::vector<uint32_t> &outputFormats) { | |||
if ((data_op_list_.empty()) || (1 != data_op_list_[0]->GetInputsSize())) { | |||
std::vector<uint32_t> &output_formats) { | |||
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) { | |||
GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); | |||
return FAILED; | |||
} | |||
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | |||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); | |||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); | |||
GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, | |||
"output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), | |||
@@ -1939,7 +1948,7 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s | |||
return SUCCESS; | |||
} | |||
void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, | |||
void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, | |||
uint32_t &format_result) { | |||
/// netoutput input tensor desc | |||
GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); | |||
@@ -1992,10 +2001,10 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD | |||
output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | |||
} | |||
Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) { | |||
GELOGD("Output node size: %zu", output_op_list_.size()); | |||
for (size_t i = 0; i < output_op_list_.size(); i++) { | |||
auto &op_desc = output_op_list_[i]; | |||
Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list, | |||
vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) { | |||
GELOGD("Output node size: %zu", output_op_list.size()); | |||
for (const auto &op_desc : output_op_list) { | |||
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||
for (uint32_t index = 0; index < out_size; index++) { | |||
string output_name; | |||
@@ -2018,13 +2027,19 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, | |||
std::to_string(src_index[index]); | |||
} | |||
output.name = output_name; | |||
output_desc.push_back(output); | |||
formats.push_back(format_result); | |||
output_descs.push_back(output); | |||
output_formats.push_back(format_result); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) { | |||
output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); | |||
output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); | |||
return SUCCESS; | |||
} | |||
ge::Format DavinciModel::GetFormat() { | |||
if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { | |||
GELOGW("OP List Pointer is null or input_desc size is not 1!"); | |||
@@ -2362,7 +2377,7 @@ void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { | |||
/// @author | |||
/// | |||
Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind) { | |||
if (output_op_list_.empty()) { | |||
if (output_addrs_list_.empty()) { | |||
Status ret = SyncVarData(); | |||
return ret; | |||
} | |||
@@ -2421,20 +2436,12 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | |||
std::vector<ge::OutputTensorInfo> &outputs) { | |||
GE_CHECK_NOTNULL(op_desc); | |||
GE_CHECK_NOTNULL(output_data); | |||
if (output_data->blobs.size() > data_index) { | |||
GELOGI("No need to generate output tensor info, model id:%u", model_id_); | |||
return SUCCESS; | |||
} | |||
std::vector<int64_t> out_buffer_size_vec; | |||
std::vector<std::vector<int64_t>> shape_info_vec; | |||
Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) { | |||
size_t input_num = op_desc->GetInputsSize(); | |||
if (is_getnext_sink_dynamic_) { | |||
input_num = input_num - kGetDynamicDimsCount; | |||
} | |||
for (size_t i = 0; i < input_num; ++i) { | |||
int64_t size = 0; | |||
auto input_desc = op_desc->GetInputDescPtr(i); | |||
@@ -2454,25 +2461,37 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data | |||
} | |||
} | |||
GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); | |||
out_buffer_size_vec.push_back(size); | |||
shape_info_vec.push_back(output_shape); | |||
output_buffer_size_.push_back(size); | |||
output_shape_info_.push_back(output_shape); | |||
} | |||
return SUCCESS; | |||
} | |||
Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs) { | |||
GE_CHECK_NOTNULL(output_data); | |||
if (!output_data->blobs.empty()) { | |||
GELOGI("No need to generate output tensor info, model id:%u", model_id_); | |||
return SUCCESS; | |||
} | |||
GELOGI("Output blobs size:%zu, data index:%u, model id:%u", out_buffer_size_vec.size(), data_index, model_id_); | |||
for (size_t i = 0; i < out_buffer_size_vec.size(); ++i) { | |||
std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[out_buffer_size_vec[i]]); | |||
GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_); | |||
for (size_t i = 0; i < output_buffer_size_.size(); ++i) { | |||
std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]); | |||
if (data_buf == nullptr) { | |||
GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); | |||
return GE_GRAPH_MALLOC_FAILED; | |||
} | |||
output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(out_buffer_size_vec[i]), false}); | |||
output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size_[i]), false}); | |||
ge::OutputTensorInfo output; | |||
output.dims = shape_info_vec[i]; | |||
output.dims = output_shape_info_[i]; | |||
output.data = std::move(data_buf); | |||
output.length = out_buffer_size_vec[i]; | |||
output.length = output_buffer_size_[i]; | |||
outputs.emplace_back(std::move(output)); | |||
GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, | |||
formats::JoinToString(output.dims).c_str(), output.length); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -2507,36 +2526,28 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||
return INTERNAL_ERROR; | |||
} | |||
if (output_op_list_.empty()) { | |||
if (output_addrs_list_.empty()) { | |||
GELOGW("Output tensor list is empty, model id: %u", model_id_); | |||
GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
GE_CHECK_NOTNULL(output_data); | |||
// index of data in output_data | |||
uint32_t data_index = 0; | |||
output_data->index = data_id; | |||
output_data->model_id = model_id_; | |||
is_getnext_sink_dynamic_ = false; | |||
// copy output data from op to designated position | |||
for (auto &op_desc : output_op_list_) { | |||
if (IsGetNextSinkDynamic(op_desc)) { | |||
GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); | |||
is_getnext_sink_dynamic_ = true; | |||
cur_dynamic_dims_.clear(); | |||
cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); | |||
auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), | |||
netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
GE_CHK_RT_RET(ret); | |||
} | |||
GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); | |||
if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { | |||
return INTERNAL_ERROR; | |||
} | |||
data_index += op_desc->GetInputsSize(); | |||
if (is_getnext_sink_dynamic_) { | |||
GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); | |||
cur_dynamic_dims_.clear(); | |||
cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); | |||
auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), | |||
netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); | |||
GE_CHK_RT_RET(ret); | |||
} | |||
GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); | |||
if (GenOutputTensorInfo(output_data, outputs) != SUCCESS) { | |||
return INTERNAL_ERROR; | |||
} | |||
if (CopyOutputData(data_id, *output_data, RT_MEMCPY_DEVICE_TO_HOST) != SUCCESS) { | |||
@@ -2668,10 +2679,10 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
model->SetProfileTime(MODEL_AFTER_PROC_START)); | |||
GE_TIMESTAMP_START(ReturnResult3); | |||
// copy output data from device to host | |||
GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), | |||
GE_IF_BOOL_EXEC(!model->output_addrs_list_.empty(), | |||
(void)model->ReturnResult(current_data.index, rslt_flg, false, data_wrapper->GetOutput())) | |||
// copy output data from device to host for variable graph | |||
GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); | |||
GE_IF_BOOL_EXEC(model->output_addrs_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); | |||
GE_IF_BOOL_EXEC(model->is_first_execute_, | |||
GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | |||
@@ -2791,30 +2802,49 @@ void DavinciModel::UnbindTaskSinkStream() { | |||
} | |||
} | |||
void *DavinciModel::GetRunAddress(void *addr) const { | |||
if (fixed_mem_base_ == reinterpret_cast<uintptr_t>(mem_base_)) { | |||
return addr; | |||
} | |||
uintptr_t ptr = reinterpret_cast<uintptr_t>(addr); | |||
if ((fixed_mem_base_ <= ptr) && (ptr < fixed_mem_base_ + runtime_param_.mem_size)) { | |||
return mem_base_ + (ptr - fixed_mem_base_); | |||
} else { | |||
return addr; | |||
} | |||
} | |||
Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) { | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); | |||
if (inputs.size() > data_op_list_.size()) { | |||
GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size()); | |||
GELOGI("in, inputs size: %zu, input addr size: %zu, outputs size: %zu, output addr size: %zu", | |||
inputs.size(), input_addrs_list_.size(), outputs.size(), output_addrs_list_.size()); | |||
if (inputs.size() > input_addrs_list_.size()) { | |||
GELOGE(FAILED, "input data addr %zu should less than input op num %zu.", inputs.size(), input_addrs_list_.size()); | |||
return FAILED; | |||
} | |||
// remove zero copy addr in last iteration | |||
knonw_input_data_info_.clear(); | |||
knonw_output_data_info_.clear(); | |||
known_input_data_info_.clear(); | |||
known_output_data_info_.clear(); | |||
for (size_t i = 0; i < inputs.size(); ++i) { | |||
const vector<void *> addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); | |||
knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); | |||
const vector<void *> &addr_list = input_addrs_list_[i]; | |||
void *addr = GetRunAddress(addr_list[kDataIndex]); | |||
known_input_data_info_[addr] = inputs[i]; | |||
GELOGI("input %zu, v addr %p, r addr %p, p addr %p", i, addr_list[kDataIndex], addr, inputs[i]); | |||
} | |||
if (output_op_list_.size() < kOutputNum) { | |||
GELOGW("output op num in graph is %zu.", output_op_list_.size()); | |||
if (output_addrs_list_.empty()) { | |||
GELOGW("output op num in graph is %zu", output_addrs_list_.size()); | |||
return SUCCESS; | |||
} | |||
const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); | |||
const vector<void *> &addr_list = output_addrs_list_.front(); | |||
for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { | |||
knonw_output_data_info_[addr_list[i]] = outputs[i]; | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | |||
void *addr = GetRunAddress(addr_list[i]); | |||
known_output_data_info_[addr] = outputs[i]; | |||
GELOGI("output %zu, v addr %p, r addr %p, p addr %p", i, addr_list[i], addr, outputs[i]); | |||
} | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap success."); | |||
GELOGI("success, known input data info size: %zu, known output data info size: %zu", | |||
known_input_data_info_.size(), known_output_data_info_.size()); | |||
return SUCCESS; | |||
} | |||
@@ -2825,40 +2855,30 @@ void DavinciModel::SetTotalIOAddrs(const vector<void *> &io_addrs) { | |||
} | |||
for (size_t i = 0; i < io_addrs.size(); ++i) { | |||
uintptr_t addr = reinterpret_cast<uintptr_t>(io_addrs[i]); | |||
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { | |||
total_io_addrs_.emplace_back(mem_base_ + (addr - fixed_mem_base_)); | |||
} else { | |||
total_io_addrs_.emplace_back(io_addrs[i]); | |||
} | |||
total_io_addrs_.emplace_back(GetRunAddress(io_addrs[i])); | |||
} | |||
} | |||
Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | |||
if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_)) { | |||
for (size_t i = 0; i < total_io_addrs.size(); ++i) { | |||
uintptr_t addr = reinterpret_cast<uintptr_t>(total_io_addrs[i]); | |||
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { | |||
total_io_addrs[i] = mem_base_ + (addr - fixed_mem_base_); | |||
} | |||
total_io_addrs[i] = GetRunAddress(total_io_addrs[i]); | |||
} | |||
} | |||
for (size_t i = 0; i < total_io_addrs.size(); ++i) { | |||
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); | |||
if (it_in != knonw_input_data_info_.end()) { | |||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||
knonw_input_data_info_.at(total_io_addrs[i])); | |||
total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); | |||
auto it_in = known_input_data_info_.find(total_io_addrs[i]); | |||
if (it_in != known_input_data_info_.end()) { | |||
GELOGI("input %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_input_data_info_.at(total_io_addrs[i])); | |||
total_io_addrs[i] = known_input_data_info_.at(total_io_addrs[i]); | |||
} | |||
auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); | |||
if (it_out != knonw_output_data_info_.end()) { | |||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||
knonw_output_data_info_.at(total_io_addrs[i])); | |||
total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); | |||
auto it_out = known_output_data_info_.find(total_io_addrs[i]); | |||
if (it_out != known_output_data_info_.end()) { | |||
GELOGI("output %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_output_data_info_.at(total_io_addrs[i])); | |||
total_io_addrs[i] = known_output_data_info_.at(total_io_addrs[i]); | |||
} | |||
} | |||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | |||
GELOGI("success, total io addrs size: %zu", total_io_addrs.size()); | |||
return SUCCESS; | |||
} | |||
@@ -3159,15 +3179,8 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||
"MAY cause inference result ERROR, please check model input", | |||
input_size, op_size); | |||
} | |||
bool is_dynamic_aipp = false; | |||
for (const auto &op_desc : data_op_list_) { | |||
if (op_desc->GetType() == AIPP_DATA_TYPE) { | |||
GELOGI("This is dynamic aipp model."); | |||
is_dynamic_aipp = true; | |||
break; | |||
} | |||
} | |||
if (is_dynamic_aipp) { | |||
if (is_dynamic_aipp_) { | |||
GELOGI("This is dynamic aipp model, no need to judge smaller input size"); | |||
return true; | |||
} | |||
@@ -49,6 +49,10 @@ | |||
#include "task_info/task_info.h" | |||
#include "graph/common/local_context.h" | |||
using std::mutex; | |||
using std::thread; | |||
using std::multimap; | |||
namespace ge { | |||
// op debug need 2048 bits buffer | |||
const size_t kOpDebugMemorySize = 2048UL; | |||
@@ -84,11 +88,11 @@ struct SuperKernelTaskInfo { | |||
uint32_t last_stream_id; | |||
void *last_stream; | |||
void *last_sm_desc; | |||
std::vector<void *> kernel_list; | |||
std::vector<void *> arg_list; | |||
std::vector<uint32_t> dump_flag_list; | |||
std::vector<OpDescPtr> op_desc_list; | |||
std::vector<uintptr_t> dump_args_list; | |||
vector<void *> kernel_list; | |||
vector<void *> arg_list; | |||
vector<uint32_t> dump_flag_list; | |||
vector<OpDescPtr> op_desc_list; | |||
vector<uintptr_t> dump_args_list; | |||
uint32_t last_dump_flag; | |||
int64_t last_group_key; | |||
uintptr_t last_dump_args; | |||
@@ -123,7 +127,7 @@ class DavinciModel { | |||
/// @brief DavinciModel constructor | |||
/// @author | |||
/// | |||
DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener); | |||
DavinciModel(int32_t priority, const shared_ptr<ModelListener> &listener); | |||
/// | |||
/// @ingroup ge | |||
@@ -153,7 +157,7 @@ class DavinciModel { | |||
/// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op. | |||
/// @return: 0 for success / others for fail | |||
/// | |||
Status SetQueIds(const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids); | |||
Status SetQueIds(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids); | |||
/// | |||
/// @ingroup ge | |||
@@ -223,13 +227,14 @@ class DavinciModel { | |||
// get total mem size | |||
size_t TotalMemSize() const { return runtime_param_.mem_size; } | |||
const std::map<uint32_t, MemInfo> &P2PMemInfos() const {return runtime_param_.memory_infos;} | |||
const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; } | |||
// model name | |||
string Name() const { return name_; } | |||
// om_name | |||
string OmName() const { return om_name_; } | |||
// version | |||
uint32_t Version() const { return version_; } | |||
@@ -255,9 +260,6 @@ class DavinciModel { | |||
Status DestroyThread(); | |||
// Get Data Op. | |||
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; } | |||
// get Op | |||
OpDescPtr GetOpByIndex(uint32_t index) const { | |||
if (op_list_.find(index) == op_list_.end()) { | |||
@@ -274,11 +276,12 @@ class DavinciModel { | |||
} | |||
return nullptr; | |||
} | |||
// get task info for profiling | |||
const std::vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | |||
const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | |||
// get updated task info list | |||
std::vector<TaskInfoPtr> GetTaskList() { return task_list_; } | |||
vector<TaskInfoPtr> GetTaskList() { return task_list_; } | |||
// Modified from KernelTaskInfo. | |||
SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } | |||
@@ -323,7 +326,7 @@ class DavinciModel { | |||
Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc); | |||
Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc, | |||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats); | |||
vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats); | |||
/// | |||
/// @ingroup ge | |||
@@ -332,7 +335,7 @@ class DavinciModel { | |||
/// @param [out] dynamic_type | |||
/// @return execute result | |||
/// | |||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const; | |||
Status GetDynamicBatchInfo(vector<vector<int64_t>> &batch_info, int32_t &dynamic_type) const; | |||
/// | |||
/// @ingroup ge | |||
@@ -340,13 +343,13 @@ class DavinciModel { | |||
/// @param [out] batch_info | |||
/// @return None | |||
/// | |||
void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const; | |||
void GetCombinedDynamicDims(vector<vector<int64_t>> &batch_info) const; | |||
void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const; | |||
void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const; | |||
void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||
void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type); | |||
void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||
void GetModelAttr(vector<string> &dynamic_output_shape_info); | |||
/// | |||
/// @ingroup ge | |||
@@ -373,7 +376,7 @@ class DavinciModel { | |||
/// @param [in] string identification: unique identification for current op. | |||
/// @return None | |||
/// | |||
void GetUniqueId(const OpDescPtr &op_desc, std::string &unique_identification); | |||
void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); | |||
/// | |||
/// @ingroup ge | |||
@@ -384,7 +387,7 @@ class DavinciModel { | |||
/// | |||
Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | |||
vector<InputOutputDescInfo> &output_desc, | |||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats); | |||
vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats); | |||
Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); | |||
@@ -406,8 +409,6 @@ class DavinciModel { | |||
/// | |||
bool RunFlag() const { return run_flg_; } | |||
Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Set Session Id | |||
@@ -453,14 +454,14 @@ class DavinciModel { | |||
/// @ingroup ge | |||
/// @brief Save outside address of Data or NetOutput used info for ZeroCopy. | |||
/// @param [in] const OpDescPtr &op_desc: current op desc | |||
/// @param [in] const std::vector<void *> &outside_addrs: address of task | |||
/// @param [in] const vector<void *> &outside_addrs: address of task | |||
/// @param [in] const void *args_offset: arguments address save the address. | |||
/// @return None. | |||
/// | |||
void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args, | |||
void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector<void *> &outside_addrs, const void *info, void *args, | |||
size_t size, size_t offset); | |||
void SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||
void SetDynamicSize(const vector<uint64_t> &batch_num, int32_t dynamic_type); | |||
bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | |||
@@ -476,7 +477,7 @@ class DavinciModel { | |||
data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); | |||
} | |||
void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args) { | |||
void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) { | |||
data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | |||
} | |||
@@ -485,7 +486,7 @@ class DavinciModel { | |||
DavinciModel(const DavinciModel &model) = delete; | |||
const map<int64_t, std::vector<rtStream_t>> &GetHcclFolowStream() { | |||
const map<int64_t, vector<rtStream_t>> &GetHcclFolowStream() { | |||
return main_follow_stream_mapping_; | |||
} | |||
void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); | |||
@@ -534,8 +535,8 @@ class DavinciModel { | |||
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | |||
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | |||
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | |||
std::vector<InputOutputDims> &output_dims); | |||
Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | |||
vector<InputOutputDims> &output_dims); | |||
void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | |||
// om file name | |||
void SetOmName(string om_name) { om_name_ = om_name; } | |||
@@ -546,7 +547,6 @@ class DavinciModel { | |||
bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | |||
return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | |||
} | |||
Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph); | |||
private: | |||
// memory address of weights | |||
@@ -566,6 +566,8 @@ class DavinciModel { | |||
struct timeInfo time_info_; | |||
int32_t dataInputTid; | |||
void *GetRunAddress(void *addr) const; | |||
/// | |||
/// @ingroup ge | |||
/// @brief Copy Check input size and model op size. | |||
@@ -603,7 +605,7 @@ class DavinciModel { | |||
/// @param [in] batch_label: batch label for multi-batch scenes | |||
/// @return SUCCESS handle successfully / others handle failed | |||
/// | |||
Status UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | |||
Status UpdateIoTaskArgs(const map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | |||
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label); | |||
Status CopyInputData(const InputData &input_data, bool device_data = false); | |||
@@ -619,7 +621,8 @@ class DavinciModel { | |||
void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input); | |||
Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | |||
Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats); | |||
Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats); | |||
Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | |||
@@ -631,7 +634,7 @@ class DavinciModel { | |||
uint8_t *MallocWeightsMem(size_t weights_size); | |||
uint8_t* MallocP2PMem(size_t p2p_data_size); | |||
uint8_t *MallocP2PMem(size_t p2p_data_size); | |||
void FreeFeatureMapMem(); | |||
@@ -663,27 +666,33 @@ class DavinciModel { | |||
/// | |||
/// @ingroup ge | |||
/// @brief Data Op Initialize. | |||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||
/// @param [in] NodePtr: Data Op. | |||
/// @param [in/out] data_op_index: NetOutput addr size info. | |||
/// @param [in/out] data_op_index: index of courrent count. | |||
/// @param [in/out] data_by_index: Data ordered by index. | |||
/// @return Status | |||
/// | |||
Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index); | |||
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | |||
map<uint32_t, OpDescPtr> &data_by_index); | |||
/// | |||
/// @ingroup ge | |||
/// @brief Sort Data op list by index. | |||
/// @param [in] data_by_index: map of Data Op. | |||
/// @return | |||
/// @param [in] output_op_list: list of NetOutput op. | |||
/// @return Status | |||
/// | |||
void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index); | |||
Status OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list); | |||
/// | |||
/// @ingroup ge | |||
/// @brief NetOutput Op Initialize. | |||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||
/// @param [in] NodePtr: NetOutput Op. | |||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | |||
/// @return Status | |||
/// | |||
Status InitNetOutput(const NodePtr &node); | |||
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list); | |||
/// | |||
/// @ingroup ge | |||
@@ -722,7 +731,7 @@ class DavinciModel { | |||
/// | |||
Status InitTbeHandle(const OpDescPtr &op_desc); | |||
void StoreTbeHandle(const std::string &handle_key); | |||
void StoreTbeHandle(const string &handle_key); | |||
void CleanTbeHandle(); | |||
/// | |||
@@ -753,7 +762,7 @@ class DavinciModel { | |||
/// | |||
Status BindInputQueue(); | |||
Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs); | |||
/// | |||
/// @ingroup ge | |||
@@ -824,7 +833,7 @@ class DavinciModel { | |||
Status DoTaskSink(); | |||
void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||
void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | |||
@@ -838,13 +847,16 @@ class DavinciModel { | |||
Status SinkTimeProfile(const InputData ¤t_data); | |||
Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | |||
std::vector<ge::OutputTensorInfo> &outputs); | |||
Status InitOutputTensorInfo(const OpDescPtr &op_desc); | |||
Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | |||
void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | |||
Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list, | |||
vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &formats); | |||
void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); | |||
void SetLabelForDynamic(const NodePtr &node); | |||
void ParseDynamicOutShape(const std::vector<std::string> &str_info, std::vector<vector<int64_t>> &vec_info); | |||
void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info); | |||
bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); | |||
void GetAllGearsInfo(const NodePtr &node); | |||
Status GetGetDynamicDimsNodeInfo(const NodePtr &node); | |||
@@ -866,56 +878,54 @@ class DavinciModel { | |||
GeModelPtr ge_model_; | |||
bool need_destroy_aicpu_kernel_{false}; | |||
vector<std::string> out_node_name_; | |||
vector<string> out_node_name_; | |||
map<uint32_t, OpDescPtr> op_list_; | |||
// data op_desc | |||
vector<OpDescPtr> data_op_list_; | |||
vector<OpDescPtr> output_op_list_; | |||
vector<OpDescPtr> variable_op_list_; | |||
std::map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||
std::map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||
std::map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||
std::map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||
map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||
map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||
map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||
map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||
std::set<const void *> real_virtual_addrs_; | |||
set<const void *> real_virtual_addrs_; | |||
// output op: save cce op actual needed memory size | |||
vector<int64_t> output_memory_size_list_; | |||
std::thread thread_id_; | |||
thread thread_id_; | |||
std::shared_ptr<ModelListener> listener_; | |||
shared_ptr<ModelListener> listener_; | |||
bool run_flg_; | |||
std::mutex mux_run_flg_; | |||
mutex mux_run_flg_; | |||
int32_t priority_; | |||
vector<rtStream_t> stream_list_; | |||
std::mutex all_hccl_stream_list_mutex_; | |||
mutex all_hccl_stream_list_mutex_; | |||
vector<rtStream_t> all_hccl_stream_list_; | |||
// for reuse hccl_follow_stream | |||
std::mutex capacity_of_stream_mutex_; | |||
std::map<int64_t, std::vector<rtStream_t>> main_follow_stream_mapping_; | |||
mutex capacity_of_stream_mutex_; | |||
map<int64_t, vector<rtStream_t>> main_follow_stream_mapping_; | |||
vector<rtEvent_t> event_list_; | |||
vector<rtLabel_t> label_list_; | |||
set<uint32_t> label_id_indication_; | |||
std::mutex outside_addrs_mutex_; | |||
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||
std::set<const void *> copy_only_addrs_; // Address need copy to original place. | |||
mutex outside_addrs_mutex_; | |||
vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||
set<const void *> copy_only_addrs_; // Address need copy to original place. | |||
std::vector<TaskInfoPtr> task_list_; | |||
vector<TaskInfoPtr> task_list_; | |||
// rt_moodel_handle | |||
rtModel_t rt_model_handle_; | |||
@@ -933,39 +943,39 @@ class DavinciModel { | |||
rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED}; | |||
// ACL queue schedule, save queue ids for Init. | |||
std::vector<TaskInfoPtr> cpu_task_list_; | |||
std::vector<uint32_t> input_queue_ids_; // input queue ids created by caller. | |||
std::vector<uint32_t> output_queue_ids_; // output queue ids created by caller. | |||
std::vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task. | |||
std::vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task. | |||
vector<TaskInfoPtr> cpu_task_list_; | |||
vector<uint32_t> input_queue_ids_; // input queue ids created by caller. | |||
vector<uint32_t> output_queue_ids_; // output queue ids created by caller. | |||
vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task. | |||
vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task. | |||
uint64_t session_id_; | |||
uint32_t device_id_; | |||
std::mutex flowctrl_op_index_internal_map_mutex_; | |||
std::map<uint32_t, uint32_t> flowctrl_op_index_internal_map_; | |||
mutex flowctrl_op_index_internal_map_mutex_; | |||
map<uint32_t, uint32_t> flowctrl_op_index_internal_map_; | |||
std::vector<rtStream_t> active_stream_list_; | |||
std::set<uint32_t> active_stream_indication_; | |||
vector<rtStream_t> active_stream_list_; | |||
set<uint32_t> active_stream_indication_; | |||
std::set<uint32_t> hcom_streams_; | |||
set<uint32_t> hcom_streams_; | |||
RuntimeParam runtime_param_; | |||
static std::mutex tvm_bin_mutex_; | |||
std::set<std::string> tvm_bin_kernel_; | |||
static mutex tvm_bin_mutex_; | |||
set<string> tvm_bin_kernel_; | |||
std::map<std::string, uint32_t> used_tbe_handle_map_; | |||
map<string, uint32_t> used_tbe_handle_map_; | |||
// for profiling task and graph info | |||
std::vector<TaskDescInfo> task_desc_info_; | |||
vector<TaskDescInfo> task_desc_info_; | |||
int64_t maxDumpOpNum_; | |||
// for data dump | |||
DataDumper data_dumper_; | |||
uint64_t iterator_count_; | |||
bool is_l1_fusion_enable_; | |||
std::map<OpDescPtr, void *> saved_task_addrs_; | |||
map<OpDescPtr, void *> saved_task_addrs_; | |||
void *l1_fusion_addr_ = nullptr; | |||
bool known_node_ = false; | |||
@@ -976,14 +986,14 @@ class DavinciModel { | |||
void *hybrid_addrs_ = nullptr; | |||
uint32_t total_hybrid_args_size_ = 0; | |||
int64_t total_fixed_addr_size_ = 0; | |||
std::map<const void *, void *> knonw_input_data_info_; | |||
std::map<const void *, void *> knonw_output_data_info_; | |||
map<const void *, void *> known_input_data_info_; | |||
map<const void *, void *> known_output_data_info_; | |||
vector<void *> total_io_addrs_; | |||
vector<void *> orig_total_io_addrs_; | |||
bool base_addr_not_changed_ = false; | |||
vector<vector<int64_t>> batch_info_; | |||
std::vector<std::vector<int64_t>> combined_batch_info_; | |||
vector<vector<int64_t>> combined_batch_info_; | |||
vector<string> user_designate_shape_order_; | |||
int32_t dynamic_type_ = 0; | |||
bool is_dynamic_ = false; | |||
@@ -991,35 +1001,47 @@ class DavinciModel { | |||
vector<uint64_t> batch_size_; | |||
// key: input tensor name, generally rts op; | |||
// value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op | |||
std::map<string, int64_t> tensor_name_to_fixed_addr_size_; | |||
map<string, int64_t> tensor_name_to_fixed_addr_size_; | |||
// key: input tensor name, generally rts op; value: the peer output anchor of the peer op | |||
std::map<string, int64_t> tensor_name_to_peer_output_index_; | |||
map<string, int64_t> tensor_name_to_peer_output_index_; | |||
// if model is first execute | |||
bool is_first_execute_; | |||
// for op debug | |||
std::mutex debug_reg_mutex_; | |||
mutex debug_reg_mutex_; | |||
bool is_op_debug_reg_ = false; | |||
void *op_debug_addr_ = nullptr; | |||
void *p2p_debug_addr_ = nullptr; | |||
bool is_new_model_desc_{false}; | |||
bool is_online_infer_dynamic_ = false; | |||
bool is_getnext_sink_dynamic_ = false; | |||
std::vector<int64_t> cur_dynamic_dims_; | |||
vector<int64_t> cur_dynamic_dims_; | |||
void *netoutput_last_input_addr_ = nullptr; | |||
int64_t netoutput_last_input_size_ = 0; | |||
size_t shape_of_cur_dynamic_dims_ = 0; | |||
// key: input_index: input is merge node; value: each gear info and each output size | |||
std::map<size_t, std::map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_; | |||
map<size_t, map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_; | |||
// key: input_index: input is merge node; value: each gear info and each output shape | |||
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | |||
std::vector<std::vector<int64_t>> all_gears_info_; | |||
map<size_t, map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | |||
vector<vector<int64_t>> all_gears_info_; | |||
std::multimap<uint32_t, uint32_t> op_id_map_; | |||
std::vector<ProfileInfo> profile_list_; | |||
multimap<uint32_t, uint32_t> op_id_map_; | |||
vector<ProfileInfo> profile_list_; | |||
// For super kernel. | |||
SuperKernelTaskInfo skt_info_; | |||
bool is_dynamic_aipp_ = false; | |||
vector<string> dynamic_output_shape_info_; | |||
vector<vector<void *>> input_addrs_list_; | |||
vector<vector<void *>> output_addrs_list_; | |||
vector<int64_t> output_buffer_size_; | |||
vector<vector<int64_t>> output_shape_info_; | |||
vector<InputOutputDescInfo> output_descs_; | |||
vector<uint32_t> output_formats_; | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ |
@@ -565,6 +565,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||
"graph/load/end_graph_task_unittest.cc" | |||
"graph/load/new_model_manager_event_manager_unittest.cc" | |||
#"graph/load/output_net_output_unittest.cc" | |||
"graph/load/davinci_model_unittest.cc" | |||
"graph/load/tbe_handle_store_unittest.cc" | |||
"graph/load/hccl_task_info_unittest.cc" | |||
"graph/load/kernel_ex_task_info_unittest.cc" | |||
@@ -0,0 +1,285 @@ | |||
/** | |||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include <gtest/gtest.h> | |||
#define private public | |||
#define protected public | |||
#include "graph/utils/graph_utils.h" | |||
#include "common/profiling/profiling_manager.h" | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
using namespace std; | |||
namespace ge { | |||
extern OpDescPtr CreateOpDesc(string name, string type); | |||
class UtestDavinciModel : public testing::Test { | |||
protected: | |||
void SetUp() {} | |||
void TearDown() {} | |||
}; | |||
TEST_F(UtestDavinciModel, init_success) { | |||
DavinciModel model(0, nullptr); | |||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||
ProfilingManager::Instance().is_load_profiling_ = true; | |||
GeModelPtr ge_model = make_shared<GeModel>(); | |||
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); | |||
AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); | |||
AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); | |||
shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>(); | |||
ge_model->SetModelTaskDef(model_task_def); | |||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
TensorUtils::SetSize(tensor, 512); | |||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||
op_input->AddInputDesc(tensor); | |||
op_input->AddOutputDesc(tensor); | |||
op_input->SetInputOffset({1024}); | |||
op_input->SetOutputOffset({1024}); | |||
NodePtr node_input = graph->AddNode(op_input); // op_index = 0 | |||
OpDescPtr op_kernel = CreateOpDesc("square", "Square"); | |||
op_kernel->AddInputDesc(tensor); | |||
op_kernel->AddOutputDesc(tensor); | |||
op_kernel->SetInputOffset({1024}); | |||
op_kernel->SetOutputOffset({1024}); | |||
NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 | |||
OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); | |||
op_memcpy->AddInputDesc(tensor); | |||
op_memcpy->AddOutputDesc(tensor); | |||
op_memcpy->SetInputOffset({1024}); | |||
op_memcpy->SetOutputOffset({5120}); | |||
NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 | |||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||
op_output->AddInputDesc(tensor); | |||
op_output->SetInputOffset({5120}); | |||
op_output->SetSrcName( { "memcpy" } ); | |||
op_output->SetSrcIndex( { 0 } ); | |||
NodePtr node_output = graph->AddNode(op_output); // op_index = 3 | |||
domi::TaskDef *task_def1 = model_task_def->add_task(); | |||
task_def1->set_stream_id(0); | |||
task_def1->set_type(RT_MODEL_TASK_KERNEL); | |||
domi::KernelDef *kernel_def = task_def1->mutable_kernel(); | |||
kernel_def->set_stub_func("stub_func"); | |||
kernel_def->set_args_size(64); | |||
string args(64, '1'); | |||
kernel_def->set_args(args.data(), 64); | |||
domi::KernelContext *context = kernel_def->mutable_context(); | |||
context->set_op_index(1); | |||
context->set_kernel_type(2); // ccKernelType::TE | |||
uint16_t args_offset[9] = {0}; | |||
context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||
domi::TaskDef *task_def2 = model_task_def->add_task(); | |||
task_def2->set_stream_id(0); | |||
task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); | |||
domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); | |||
memcpy_async->set_src(1024); | |||
memcpy_async->set_dst(5120); | |||
memcpy_async->set_dst_max(512); | |||
memcpy_async->set_count(1); | |||
memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); | |||
memcpy_async->set_op_index(2); | |||
EXPECT_EQ(model.Assign(ge_model), SUCCESS); | |||
EXPECT_EQ(model.Init(), SUCCESS); | |||
EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||
EXPECT_EQ(model.output_addrs_list_.size(), 1); | |||
EXPECT_EQ(model.task_list_.size(), 2); | |||
ProfilingManager::Instance().is_load_profiling_ = false; | |||
} | |||
TEST_F(UtestDavinciModel, init_data_op) { | |||
DavinciModel model(0, nullptr); | |||
model.ge_model_ = make_shared<GeModel>(); | |||
model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||
model.runtime_param_.mem_size = 5120000; | |||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
TensorUtils::SetSize(tensor, 512); | |||
op_input->AddInputDesc(tensor); | |||
op_input->AddOutputDesc(tensor); | |||
op_input->SetInputOffset({1024}); | |||
op_input->SetOutputOffset({5120}); | |||
NodePtr node_input = graph->AddNode(op_input); | |||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||
op_output->AddInputDesc(tensor); | |||
op_output->SetInputOffset({1024}); | |||
op_output->SetSrcName( { "data" } ); | |||
op_output->SetSrcIndex( { 0 } ); | |||
NodePtr node_output = graph->AddNode(op_output); | |||
EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||
EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||
EXPECT_EQ(model.output_addrs_list_.size(), 1); | |||
EXPECT_EQ(model.op_list_.size(), 2); | |||
} | |||
TEST_F(UtestDavinciModel, init_data_op_subgraph) { | |||
DavinciModel model(0, nullptr); | |||
model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||
model.runtime_param_.mem_size = 5120000; | |||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
op_input->AddInputDesc(tensor); | |||
op_input->AddOutputDesc(tensor); | |||
op_input->SetInputOffset({1024}); | |||
op_input->SetOutputOffset({5120}); | |||
NodePtr node = graph->AddNode(op_input); | |||
uint32_t data_op_index = 0; | |||
map<uint32_t, OpDescPtr> data_by_index; | |||
EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS); | |||
EXPECT_EQ(model.input_addrs_list_.size(), 0); | |||
EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||
EXPECT_EQ(data_op_index, 0); | |||
EXPECT_TRUE(data_by_index.empty()); | |||
} | |||
TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { | |||
DavinciModel model(0, nullptr); | |||
model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||
model.runtime_param_.mem_size = 5120000; | |||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
op_output->AddInputDesc(tensor); | |||
op_output->SetInputOffset({1024}); | |||
op_output->SetSrcName( { "data" } ); | |||
op_output->SetSrcIndex( { 0 } ); | |||
NodePtr node = graph->AddNode(op_output); | |||
std::vector<OpDescPtr> output_op_list; | |||
EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS); | |||
EXPECT_EQ(model.input_addrs_list_.size(), 0); | |||
EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||
EXPECT_TRUE(output_op_list.empty()); | |||
} | |||
TEST_F(UtestDavinciModel, init_unknown) { | |||
DavinciModel model(0, nullptr); | |||
model.SetKnownNode(true); | |||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||
GeModelPtr ge_model = make_shared<GeModel>(); | |||
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); | |||
AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); | |||
AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); | |||
shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>(); | |||
ge_model->SetModelTaskDef(model_task_def); | |||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||
TensorUtils::SetSize(tensor, 512); | |||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||
op_input->AddInputDesc(tensor); | |||
op_input->AddOutputDesc(tensor); | |||
op_input->SetInputOffset({1024}); | |||
op_input->SetOutputOffset({1024}); | |||
NodePtr node_input = graph->AddNode(op_input); // op_index = 0 | |||
OpDescPtr op_kernel = CreateOpDesc("square", "Square"); | |||
op_kernel->AddInputDesc(tensor); | |||
op_kernel->AddOutputDesc(tensor); | |||
op_kernel->SetInputOffset({1024}); | |||
op_kernel->SetOutputOffset({1024}); | |||
NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 | |||
OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); | |||
op_memcpy->AddInputDesc(tensor); | |||
op_memcpy->AddOutputDesc(tensor); | |||
op_memcpy->SetInputOffset({1024}); | |||
op_memcpy->SetOutputOffset({5120}); | |||
NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 | |||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||
op_output->AddInputDesc(tensor); | |||
op_output->SetInputOffset({5120}); | |||
op_output->SetSrcName( { "memcpy" } ); | |||
op_output->SetSrcIndex( { 0 } ); | |||
NodePtr node_output = graph->AddNode(op_output); // op_index = 3 | |||
domi::TaskDef *task_def1 = model_task_def->add_task(); | |||
task_def1->set_stream_id(0); | |||
task_def1->set_type(RT_MODEL_TASK_KERNEL); | |||
domi::KernelDef *kernel_def = task_def1->mutable_kernel(); | |||
kernel_def->set_stub_func("stub_func"); | |||
kernel_def->set_args_size(64); | |||
string args(64, '1'); | |||
kernel_def->set_args(args.data(), 64); | |||
domi::KernelContext *context = kernel_def->mutable_context(); | |||
context->set_op_index(1); | |||
context->set_kernel_type(2); // ccKernelType::TE | |||
uint16_t args_offset[9] = {0}; | |||
context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||
domi::TaskDef *task_def2 = model_task_def->add_task(); | |||
task_def2->set_stream_id(0); | |||
task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); | |||
domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); | |||
memcpy_async->set_src(1024); | |||
memcpy_async->set_dst(5120); | |||
memcpy_async->set_dst_max(512); | |||
memcpy_async->set_count(1); | |||
memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); | |||
memcpy_async->set_op_index(2); | |||
EXPECT_EQ(model.Assign(ge_model), SUCCESS); | |||
EXPECT_EQ(model.Init(), SUCCESS); | |||
EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||
EXPECT_EQ(model.output_addrs_list_.size(), 1); | |||
EXPECT_EQ(model.task_list_.size(), 2); | |||
EXPECT_EQ(model.task_list_[0]->UpdateArgs(), SUCCESS); | |||
EXPECT_EQ(model.task_list_[1]->UpdateArgs(), SUCCESS); | |||
vector<string> out_shape_info; | |||
model.GetModelAttr(out_shape_info); | |||
vector<InputOutputDescInfo> input_descs; | |||
vector<InputOutputDescInfo> output_descs; | |||
EXPECT_EQ(model.GetInputOutputDescInfo(input_descs, output_descs), SUCCESS); | |||
int32_t virtual_addr = 0; | |||
const vector<void *> inputs = { &virtual_addr }; | |||
const vector<void *> outputs = { &virtual_addr }; | |||
EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | |||
} | |||
} // namespace ge |