From c22b0ebe9ff1ad02a964cf01275009ced6e08912 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Wed, 30 Dec 2020 10:17:59 +0800 Subject: [PATCH] Eliminate output_op_list_ --- .../load/new_model_manager/davinci_model.cc | 355 +++++++++--------- .../load/new_model_manager/davinci_model.h | 204 +++++----- tests/ut/ge/CMakeLists.txt | 1 + .../ge/graph/load/davinci_model_unittest.cc | 285 ++++++++++++++ 4 files changed, 583 insertions(+), 262 deletions(-) create mode 100644 tests/ut/ge/graph/load/davinci_model_unittest.cc diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc index d834a737..7721739b 100755 --- a/ge/graph/load/new_model_manager/davinci_model.cc +++ b/ge/graph/load/new_model_manager/davinci_model.cc @@ -163,7 +163,6 @@ DavinciModel::~DavinciModel() { op_list_.clear(); data_op_list_.clear(); - output_op_list_.clear(); tensor_name_to_fixed_addr_size_.clear(); tensor_name_to_peer_output_index_.clear(); GE_DELETE_NEW_SINGLE(data_inputer_); @@ -830,12 +829,11 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { {CASE, &DavinciModel::InitCase}, }; - GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed."); - + vector output_op_list; map data_by_index; auto nodes = compute_graph->GetAllNodes(); const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); - for (size_t i = 0; i < nodes.size(); i++) { + for (size_t i = 0; i < nodes.size(); ++i) { auto node = nodes.at(i); auto op_desc = node->GetOpDesc(); if (op_desc == nullptr) { @@ -850,7 +848,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); if (IsDataOp(op_desc->GetType())) { - if (InitDataOp(node, data_op_index, data_by_index) != SUCCESS) { + if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -859,7 +857,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } if (op_desc->GetType() == NETOUTPUT) { - if (InitNetOutput(node) != SUCCESS) { + if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -919,33 +917,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } GE_TIMESTAMP_ADD(InitTbeHandle); } - AdjustDataOpList(data_by_index); + GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); - return SUCCESS; -} - -Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph) { - if (!known_node_) return SUCCESS; - // for dynamic shape - auto direct_nodes = compute_graph->GetDirectNode(); - for (size_t i = 0; i < direct_nodes.size(); i++) { - auto node = direct_nodes.at(i); - auto op_desc = node->GetOpDesc(); - if (op_desc == nullptr) { - GELOGE(PARAM_INVALID, "op_desc is null."); - return PARAM_INVALID; - } - if (IsDataOp(op_desc->GetType())) { - GELOGD("init data op %s", op_desc->GetName().c_str()); - data_op_list_.push_back(op_desc); - } - if (op_desc->GetType() == NETOUTPUT) { - GELOGD("init netouput op %s", op_desc->GetName().c_str()); - output_op_list_.push_back(op_desc); - } - } - return SUCCESS; + return OptInputOutputInfo(data_by_index, output_op_list); } void DavinciModel::SetLabelForDynamic(const NodePtr &node) { @@ -963,24 +938,35 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { } } +/// /// @ingroup ge /// @brief Data Op Initialize. +/// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: Data Op. -/// @param [in/out] data_op_index: NetOutput addr size info. +/// @param [in/out] data_op_index: index of courrent count. +/// @param [in/out] data_by_index: Data ordered by index. /// @return Status -Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, map &data_by_index) { +/// +Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, + map &data_by_index) { // op_desc Checked by Init: Data, valid. auto op_desc = node->GetOpDesc(); - if (known_node_) { + if (node->GetOwnerComputeGraph() != graph) { + GELOGI("Skip subgraph Data node: %s.", op_desc->GetName().c_str()); return SUCCESS; } - uint32_t parent_index = 0; // Ignore subgraph Data Node. - if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { - GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); - return SUCCESS; + + GELOGI("Init Data node: %s.", op_desc->GetName().c_str()); + auto data_index = data_op_index++; + if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { + GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); } + data_by_index[data_index] = op_desc; data_op_list_.push_back(op_desc); + if (known_node_) { + return SUCCESS; + } // Make information for copy input data. const vector output_size_list = ModelUtils::GetOutputSize(op_desc); @@ -992,10 +978,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size()); return PARAM_INVALID; } - auto data_index = data_op_index; - if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { - GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index); - } + bool fusion_flag = false; ZeroCopyOffset zero_copy_offset; int64_t data_size = output_size_list[kDataIndex]; @@ -1006,7 +989,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma return PARAM_INVALID; } new_input_data_info_[data_index] = zero_copy_offset; - data_by_index[data_index] = op_desc; for (size_t index = 0; index < virtual_addr_list.size(); ++index) { void *addr = virtual_addr_list.at(index); @@ -1017,7 +999,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma new_input_outside_addrs_[addr] = zero_copy_offset; } - data_op_index++; return SUCCESS; } @@ -1025,18 +1006,52 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma /// @ingroup ge /// @brief Sort Data op list by index. /// @param [in] data_by_index: map of Data Op. -/// @return +/// @param [in] output_op_list: list of NetOutput op. +/// @return Status /// -void DavinciModel::AdjustDataOpList(const map &data_by_index) { +Status DavinciModel::OptInputOutputInfo(const map &data_by_index, + const vector &output_op_list) { + GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size()); if (data_by_index.size() != data_op_list_.size()) { - GELOGW("Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); - return; + GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); + return INTERNAL_ERROR; } data_op_list_.clear(); for (auto &item : data_by_index) { data_op_list_.emplace_back(item.second); + auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); + GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); + input_addrs_list_.emplace_back(output_addrs); + + if (item.second->GetType() == AIPP_DATA_TYPE) { + GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); + is_dynamic_aipp_ = true; + } } + + for (const auto &op_desc : output_op_list) { + auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); + GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); + output_addrs_list_.emplace_back(input_addrs); + + bool getnext_sink_dynamic = false; + if (AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { + GELOGI("ATTR_GETNEXT_SINK_DYNMAIC has been set and is true, node: %s", op_desc->GetName().c_str()); + is_getnext_sink_dynamic_ = true; + } + + vector shape_info; + if (AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, shape_info)) { + dynamic_output_shape_info_.insert(dynamic_output_shape_info_.end(), shape_info.begin(), shape_info.end()); + } + + if (InitOutputTensorInfo(op_desc) != SUCCESS) { + return INTERNAL_ERROR; + } + } + + return InitOutputDescInfo(output_op_list, output_descs_, output_formats_); } bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { @@ -1050,24 +1065,27 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { /// @ingroup ge /// @brief NetOutput Op Initialize. +/// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: NetOutput Op. +/// @param [in/out] vector: All NetOutput node in model. /// @return Status -Status DavinciModel::InitNetOutput(const NodePtr &node) { +Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, + vector &output_op_list) { // node->GetOpDesc Checked by Init: NetOutput, valid. auto op_desc = node->GetOpDesc(); // excludes the function op sub graph, e.g. case,if - if (known_node_) { + if (node->GetOwnerComputeGraph() != graph) { + GELOGI("Skip subgraph NetOutput node: %s.", op_desc->GetName().c_str()); + op_list_.erase(op_desc->GetId()); return SUCCESS; } - ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); - GE_CHECK_NOTNULL(owner_graph); - if (owner_graph->GetParentGraph() != nullptr) { - GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); - op_list_.erase(op_desc->GetId()); + + GELOGI("Init NetOutput node: %s.", op_desc->GetName().c_str()); + output_op_list.push_back(op_desc); + if (known_node_) { return SUCCESS; } - output_op_list_.push_back(op_desc); // Make information for copy output data. const vector input_size_list = ModelUtils::GetInputSize(op_desc); const vector virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); @@ -1665,32 +1683,30 @@ Status DavinciModel::CpuModelRepeat() { Status DavinciModel::GetInputOutputDescInfo(vector &input_desc, vector &output_desc) { - if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { + if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { GELOGI("data_op_list_ is empty or input_desc size is not 1."); } else { - std::vector input_formats; + vector input_formats; GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed."); } - std::vector outputFormats; - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get output desc info failed."); - + vector output_formats; + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); return SUCCESS; } Status DavinciModel::GetInputOutputDescInfo(vector &input_desc, vector &output_desc, - std::vector &input_formats, - std::vector &outputFormats) { - if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { + vector &input_formats, + vector &output_formats) { + if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); return FAILED; } GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); - + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); return SUCCESS; } @@ -1828,29 +1844,22 @@ void DavinciModel::GetCurShape(std::vector &batch_info, int32_t &dynami dynamic_type = dynamic_type_; } -void DavinciModel::GetModelAttr(std::vector &dynamic_output_shape_info) { - for (auto &op : output_op_list_) { - if (op->GetType() != NETOUTPUT) { - continue; - } - if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { - GELOGD("Can not get dynamic output dims attr"); - } - } +void DavinciModel::GetModelAttr(vector &out_shape_info) { + out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); } Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector &input_desc, vector &output_desc, std::vector &input_formats, - std::vector &outputFormats) { - if ((data_op_list_.empty()) || (1 != data_op_list_[0]->GetInputsSize())) { + std::vector &output_formats) { + if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) { GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); return FAILED; } GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); - GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); + GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), @@ -1939,7 +1948,7 @@ Status DavinciModel::GetInputDescInfo(vector &input_desc, s return SUCCESS; } -void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, +void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result) { /// netoutput input tensor desc GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); @@ -1992,10 +2001,10 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); } -Status DavinciModel::GetOutputDescInfo(vector &output_desc, std::vector &formats) { - GELOGD("Output node size: %zu", output_op_list_.size()); - for (size_t i = 0; i < output_op_list_.size(); i++) { - auto &op_desc = output_op_list_[i]; +Status DavinciModel::InitOutputDescInfo(const vector &output_op_list, + vector &output_descs, vector &output_formats) { + GELOGD("Output node size: %zu", output_op_list.size()); + for (const auto &op_desc : output_op_list) { uint32_t out_size = static_cast(op_desc->GetInputsSize()); for (uint32_t index = 0; index < out_size; index++) { string output_name; @@ -2018,13 +2027,19 @@ Status DavinciModel::GetOutputDescInfo(vector &output_desc, std::to_string(src_index[index]); } output.name = output_name; - output_desc.push_back(output); - formats.push_back(format_result); + output_descs.push_back(output); + output_formats.push_back(format_result); } } return SUCCESS; } +Status DavinciModel::GetOutputDescInfo(vector &output_descs, vector &output_formats) { + output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); + output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); + return SUCCESS; +} + ge::Format DavinciModel::GetFormat() { if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { GELOGW("OP List Pointer is null or input_desc size is not 1!"); @@ -2362,7 +2377,7 @@ void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { /// @author /// Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind) { - if (output_op_list_.empty()) { + if (output_addrs_list_.empty()) { Status ret = SyncVarData(); return ret; } @@ -2421,20 +2436,12 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r return SUCCESS; } -Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, - std::vector &outputs) { - GE_CHECK_NOTNULL(op_desc); - GE_CHECK_NOTNULL(output_data); - if (output_data->blobs.size() > data_index) { - GELOGI("No need to generate output tensor info, model id:%u", model_id_); - return SUCCESS; - } - std::vector out_buffer_size_vec; - std::vector> shape_info_vec; +Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) { size_t input_num = op_desc->GetInputsSize(); if (is_getnext_sink_dynamic_) { input_num = input_num - kGetDynamicDimsCount; } + for (size_t i = 0; i < input_num; ++i) { int64_t size = 0; auto input_desc = op_desc->GetInputDescPtr(i); @@ -2454,25 +2461,37 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data } } GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); - out_buffer_size_vec.push_back(size); - shape_info_vec.push_back(output_shape); + output_buffer_size_.push_back(size); + output_shape_info_.push_back(output_shape); + } + + return SUCCESS; +} + +Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector &outputs) { + GE_CHECK_NOTNULL(output_data); + if (!output_data->blobs.empty()) { + GELOGI("No need to generate output tensor info, model id:%u", model_id_); + return SUCCESS; } - GELOGI("Output blobs size:%zu, data index:%u, model id:%u", out_buffer_size_vec.size(), data_index, model_id_); - for (size_t i = 0; i < out_buffer_size_vec.size(); ++i) { - std::unique_ptr data_buf(new (std::nothrow) uint8_t[out_buffer_size_vec[i]]); + + GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_); + for (size_t i = 0; i < output_buffer_size_.size(); ++i) { + std::unique_ptr data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]); if (data_buf == nullptr) { GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); return GE_GRAPH_MALLOC_FAILED; } - output_data->blobs.push_back({data_buf.get(), static_cast(out_buffer_size_vec[i]), false}); + output_data->blobs.push_back({data_buf.get(), static_cast(output_buffer_size_[i]), false}); ge::OutputTensorInfo output; - output.dims = shape_info_vec[i]; + output.dims = output_shape_info_[i]; output.data = std::move(data_buf); - output.length = out_buffer_size_vec[i]; + output.length = output_buffer_size_[i]; outputs.emplace_back(std::move(output)); GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, formats::JoinToString(output.dims).c_str(), output.length); } + return SUCCESS; } @@ -2507,36 +2526,28 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b return INTERNAL_ERROR; } - if (output_op_list_.empty()) { + if (output_addrs_list_.empty()) { GELOGW("Output tensor list is empty, model id: %u", model_id_); GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed."); return INTERNAL_ERROR; } GE_CHECK_NOTNULL(output_data); - // index of data in output_data - uint32_t data_index = 0; - output_data->index = data_id; output_data->model_id = model_id_; - is_getnext_sink_dynamic_ = false; - // copy output data from op to designated position - for (auto &op_desc : output_op_list_) { - if (IsGetNextSinkDynamic(op_desc)) { - GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); - is_getnext_sink_dynamic_ = true; - cur_dynamic_dims_.clear(); - cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); - auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), - netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); - GE_CHK_RT_RET(ret); - } - GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); - if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { - return INTERNAL_ERROR; - } - data_index += op_desc->GetInputsSize(); + if (is_getnext_sink_dynamic_) { + GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); + cur_dynamic_dims_.clear(); + cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); + auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), + netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); + GE_CHK_RT_RET(ret); + } + + GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); + if (GenOutputTensorInfo(output_data, outputs) != SUCCESS) { + return INTERNAL_ERROR; } if (CopyOutputData(data_id, *output_data, RT_MEMCPY_DEVICE_TO_HOST) != SUCCESS) { @@ -2668,10 +2679,10 @@ void *DavinciModel::Run(DavinciModel *model) { model->SetProfileTime(MODEL_AFTER_PROC_START)); GE_TIMESTAMP_START(ReturnResult3); // copy output data from device to host - GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), + GE_IF_BOOL_EXEC(!model->output_addrs_list_.empty(), (void)model->ReturnResult(current_data.index, rslt_flg, false, data_wrapper->GetOutput())) // copy output data from device to host for variable graph - GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); + GE_IF_BOOL_EXEC(model->output_addrs_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); GE_IF_BOOL_EXEC(model->is_first_execute_, GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), @@ -2791,30 +2802,49 @@ void DavinciModel::UnbindTaskSinkStream() { } } +void *DavinciModel::GetRunAddress(void *addr) const { + if (fixed_mem_base_ == reinterpret_cast(mem_base_)) { + return addr; + } + + uintptr_t ptr = reinterpret_cast(addr); + if ((fixed_mem_base_ <= ptr) && (ptr < fixed_mem_base_ + runtime_param_.mem_size)) { + return mem_base_ + (ptr - fixed_mem_base_); + } else { + return addr; + } +} + Status DavinciModel::CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs) { - GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); - if (inputs.size() > data_op_list_.size()) { - GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size()); + GELOGI("in, inputs size: %zu, input addr size: %zu, outputs size: %zu, output addr size: %zu", + inputs.size(), input_addrs_list_.size(), outputs.size(), output_addrs_list_.size()); + if (inputs.size() > input_addrs_list_.size()) { + GELOGE(FAILED, "input data addr %zu should less than input op num %zu.", inputs.size(), input_addrs_list_.size()); return FAILED; } // remove zero copy addr in last iteration - knonw_input_data_info_.clear(); - knonw_output_data_info_.clear(); + known_input_data_info_.clear(); + known_output_data_info_.clear(); for (size_t i = 0; i < inputs.size(); ++i) { - const vector addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); - knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; - GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); + const vector &addr_list = input_addrs_list_[i]; + void *addr = GetRunAddress(addr_list[kDataIndex]); + known_input_data_info_[addr] = inputs[i]; + GELOGI("input %zu, v addr %p, r addr %p, p addr %p", i, addr_list[kDataIndex], addr, inputs[i]); } - if (output_op_list_.size() < kOutputNum) { - GELOGW("output op num in graph is %zu.", output_op_list_.size()); + + if (output_addrs_list_.empty()) { + GELOGW("output op num in graph is %zu", output_addrs_list_.size()); return SUCCESS; } - const vector addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); + const vector &addr_list = output_addrs_list_.front(); for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { - knonw_output_data_info_[addr_list[i]] = outputs[i]; - GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); + void *addr = GetRunAddress(addr_list[i]); + known_output_data_info_[addr] = outputs[i]; + GELOGI("output %zu, v addr %p, r addr %p, p addr %p", i, addr_list[i], addr, outputs[i]); } - GELOGI("DavinciModel::CreateKnownZeroCopyMap success."); + + GELOGI("success, known input data info size: %zu, known output data info size: %zu", + known_input_data_info_.size(), known_output_data_info_.size()); return SUCCESS; } @@ -2825,40 +2855,30 @@ void DavinciModel::SetTotalIOAddrs(const vector &io_addrs) { } for (size_t i = 0; i < io_addrs.size(); ++i) { - uintptr_t addr = reinterpret_cast(io_addrs[i]); - if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { - total_io_addrs_.emplace_back(mem_base_ + (addr - fixed_mem_base_)); - } else { - total_io_addrs_.emplace_back(io_addrs[i]); - } + total_io_addrs_.emplace_back(GetRunAddress(io_addrs[i])); } } Status DavinciModel::UpdateKnownZeroCopyAddr(vector &total_io_addrs) { if (fixed_mem_base_ != reinterpret_cast(mem_base_)) { for (size_t i = 0; i < total_io_addrs.size(); ++i) { - uintptr_t addr = reinterpret_cast(total_io_addrs[i]); - if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { - total_io_addrs[i] = mem_base_ + (addr - fixed_mem_base_); - } + total_io_addrs[i] = GetRunAddress(total_io_addrs[i]); } } for (size_t i = 0; i < total_io_addrs.size(); ++i) { - auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); - if (it_in != knonw_input_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_input_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); + auto it_in = known_input_data_info_.find(total_io_addrs[i]); + if (it_in != known_input_data_info_.end()) { + GELOGI("input %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_input_data_info_.at(total_io_addrs[i])); + total_io_addrs[i] = known_input_data_info_.at(total_io_addrs[i]); } - auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); - if (it_out != knonw_output_data_info_.end()) { - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], - knonw_output_data_info_.at(total_io_addrs[i])); - total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); + auto it_out = known_output_data_info_.find(total_io_addrs[i]); + if (it_out != known_output_data_info_.end()) { + GELOGI("output %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_output_data_info_.at(total_io_addrs[i])); + total_io_addrs[i] = known_output_data_info_.at(total_io_addrs[i]); } } - GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); + GELOGI("success, total io addrs size: %zu", total_io_addrs.size()); return SUCCESS; } @@ -3159,15 +3179,8 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 "MAY cause inference result ERROR, please check model input", input_size, op_size); } - bool is_dynamic_aipp = false; - for (const auto &op_desc : data_op_list_) { - if (op_desc->GetType() == AIPP_DATA_TYPE) { - GELOGI("This is dynamic aipp model."); - is_dynamic_aipp = true; - break; - } - } - if (is_dynamic_aipp) { + + if (is_dynamic_aipp_) { GELOGI("This is dynamic aipp model, no need to judge smaller input size"); return true; } diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h index a8013f7d..906c0548 100755 --- a/ge/graph/load/new_model_manager/davinci_model.h +++ b/ge/graph/load/new_model_manager/davinci_model.h @@ -49,6 +49,10 @@ #include "task_info/task_info.h" #include "graph/common/local_context.h" +using std::mutex; +using std::thread; +using std::multimap; + namespace ge { // op debug need 2048 bits buffer const size_t kOpDebugMemorySize = 2048UL; @@ -84,11 +88,11 @@ struct SuperKernelTaskInfo { uint32_t last_stream_id; void *last_stream; void *last_sm_desc; - std::vector kernel_list; - std::vector arg_list; - std::vector dump_flag_list; - std::vector op_desc_list; - std::vector dump_args_list; + vector kernel_list; + vector arg_list; + vector dump_flag_list; + vector op_desc_list; + vector dump_args_list; uint32_t last_dump_flag; int64_t last_group_key; uintptr_t last_dump_args; @@ -123,7 +127,7 @@ class DavinciModel { /// @brief DavinciModel constructor /// @author /// - DavinciModel(int32_t priority, const std::shared_ptr &listener); + DavinciModel(int32_t priority, const shared_ptr &listener); /// /// @ingroup ge @@ -153,7 +157,7 @@ class DavinciModel { /// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op. /// @return: 0 for success / others for fail /// - Status SetQueIds(const std::vector &input_queue_ids, const std::vector &output_queue_ids); + Status SetQueIds(const vector &input_queue_ids, const vector &output_queue_ids); /// /// @ingroup ge @@ -223,13 +227,14 @@ class DavinciModel { // get total mem size size_t TotalMemSize() const { return runtime_param_.mem_size; } - const std::map &P2PMemInfos() const {return runtime_param_.memory_infos;} + const map &P2PMemInfos() const { return runtime_param_.memory_infos; } // model name string Name() const { return name_; } // om_name string OmName() const { return om_name_; } + // version uint32_t Version() const { return version_; } @@ -255,9 +260,6 @@ class DavinciModel { Status DestroyThread(); - // Get Data Op. - const vector &GetDataList() const { return data_op_list_; } - // get Op OpDescPtr GetOpByIndex(uint32_t index) const { if (op_list_.find(index) == op_list_.end()) { @@ -274,11 +276,12 @@ class DavinciModel { } return nullptr; } + // get task info for profiling - const std::vector &GetTaskDescInfo() const { return task_desc_info_; } + const vector &GetTaskDescInfo() const { return task_desc_info_; } // get updated task info list - std::vector GetTaskList() { return task_list_; } + vector GetTaskList() { return task_list_; } // Modified from KernelTaskInfo. SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } @@ -323,7 +326,7 @@ class DavinciModel { Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc); Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc, - std::vector &inputFormats, std::vector &output_formats); + vector &inputFormats, vector &output_formats); /// /// @ingroup ge @@ -332,7 +335,7 @@ class DavinciModel { /// @param [out] dynamic_type /// @return execute result /// - Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const; + Status GetDynamicBatchInfo(vector> &batch_info, int32_t &dynamic_type) const; /// /// @ingroup ge @@ -340,13 +343,13 @@ class DavinciModel { /// @param [out] batch_info /// @return None /// - void GetCombinedDynamicDims(std::vector> &batch_info) const; + void GetCombinedDynamicDims(vector> &batch_info) const; - void GetUserDesignateShapeOrder(std::vector &user_input_shape_order) const; + void GetUserDesignateShapeOrder(vector &user_input_shape_order) const; - void GetCurShape(std::vector &batch_info, int32_t &dynamic_type); + void GetCurShape(vector &batch_info, int32_t &dynamic_type); - void GetModelAttr(std::vector &dynamic_output_shape_info); + void GetModelAttr(vector &dynamic_output_shape_info); /// /// @ingroup ge @@ -373,7 +376,7 @@ class DavinciModel { /// @param [in] string identification: unique identification for current op. /// @return None /// - void GetUniqueId(const OpDescPtr &op_desc, std::string &unique_identification); + void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); /// /// @ingroup ge @@ -384,7 +387,7 @@ class DavinciModel { /// Status GetInputOutputDescInfoForZeroCopy(vector &input_desc, vector &output_desc, - std::vector &inputFormats, std::vector &output_formats); + vector &inputFormats, vector &output_formats); Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); @@ -406,8 +409,6 @@ class DavinciModel { /// bool RunFlag() const { return run_flg_; } - Status GetOutputDescInfo(vector &output_desc, std::vector &formats); - /// /// @ingroup ge /// @brief Set Session Id @@ -453,14 +454,14 @@ class DavinciModel { /// @ingroup ge /// @brief Save outside address of Data or NetOutput used info for ZeroCopy. /// @param [in] const OpDescPtr &op_desc: current op desc - /// @param [in] const std::vector &outside_addrs: address of task + /// @param [in] const vector &outside_addrs: address of task /// @param [in] const void *args_offset: arguments address save the address. /// @return None. /// - void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector &outside_addrs, const void *info, void *args, + void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector &outside_addrs, const void *info, void *args, size_t size, size_t offset); - void SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type); + void SetDynamicSize(const vector &batch_num, int32_t dynamic_type); bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } @@ -476,7 +477,7 @@ class DavinciModel { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); } - void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr &op_desc, uintptr_t args) { + void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr &op_desc, uintptr_t args) { data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); } @@ -485,7 +486,7 @@ class DavinciModel { DavinciModel(const DavinciModel &model) = delete; - const map> &GetHcclFolowStream() { + const map> &GetHcclFolowStream() { return main_follow_stream_mapping_; } void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); @@ -534,8 +535,8 @@ class DavinciModel { void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); - Status GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, - std::vector &output_dims); + Status GetAllAippInputOutputDims(uint32_t index, vector &input_dims, + vector &output_dims); void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } // om file name void SetOmName(string om_name) { om_name_ = om_name; } @@ -546,7 +547,6 @@ class DavinciModel { bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); } - Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph); private: // memory address of weights @@ -566,6 +566,8 @@ class DavinciModel { struct timeInfo time_info_; int32_t dataInputTid; + void *GetRunAddress(void *addr) const; + /// /// @ingroup ge /// @brief Copy Check input size and model op size. @@ -603,7 +605,7 @@ class DavinciModel { /// @param [in] batch_label: batch label for multi-batch scenes /// @return SUCCESS handle successfully / others handle failed /// - Status UpdateIoTaskArgs(const std::map &data_info, bool is_input, + Status UpdateIoTaskArgs(const map &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label); Status CopyInputData(const InputData &input_data, bool device_data = false); @@ -619,7 +621,8 @@ class DavinciModel { void SetInputDimsInfo(const vector &model_input_dims, Format &format, InputOutputDescInfo &input); - Status GetInputDescInfo(vector &input_desc, std::vector &formats); + Status GetInputDescInfo(vector &input_desc, vector &input_formats); + Status GetOutputDescInfo(vector &output_desc, vector &output_formats); Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); @@ -631,7 +634,7 @@ class DavinciModel { uint8_t *MallocWeightsMem(size_t weights_size); - uint8_t* MallocP2PMem(size_t p2p_data_size); + uint8_t *MallocP2PMem(size_t p2p_data_size); void FreeFeatureMapMem(); @@ -663,27 +666,33 @@ class DavinciModel { /// /// @ingroup ge /// @brief Data Op Initialize. + /// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: Data Op. - /// @param [in/out] data_op_index: NetOutput addr size info. + /// @param [in/out] data_op_index: index of courrent count. + /// @param [in/out] data_by_index: Data ordered by index. /// @return Status /// - Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map &data_by_index); + Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, + map &data_by_index); /// /// @ingroup ge /// @brief Sort Data op list by index. /// @param [in] data_by_index: map of Data Op. - /// @return + /// @param [in] output_op_list: list of NetOutput op. + /// @return Status /// - void AdjustDataOpList(const map &data_by_index); + Status OptInputOutputInfo(const map &data_by_index, const vector &output_op_list); /// /// @ingroup ge /// @brief NetOutput Op Initialize. + /// @param [in] ComputeGraphPtr: root graph of the model. /// @param [in] NodePtr: NetOutput Op. + /// @param [in/out] vector: All NetOutput node in model. /// @return Status /// - Status InitNetOutput(const NodePtr &node); + Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector &output_op_list); /// /// @ingroup ge @@ -722,7 +731,7 @@ class DavinciModel { /// Status InitTbeHandle(const OpDescPtr &op_desc); - void StoreTbeHandle(const std::string &handle_key); + void StoreTbeHandle(const string &handle_key); void CleanTbeHandle(); /// @@ -753,7 +762,7 @@ class DavinciModel { /// Status BindInputQueue(); - Status CpuTaskModelZeroCopy(std::vector &mbuf_list, std::map &outside_addrs); + Status CpuTaskModelZeroCopy(vector &mbuf_list, map &outside_addrs); /// /// @ingroup ge @@ -824,7 +833,7 @@ class DavinciModel { Status DoTaskSink(); - void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); + void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); @@ -838,13 +847,16 @@ class DavinciModel { Status SinkTimeProfile(const InputData ¤t_data); - Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, - std::vector &outputs); + Status InitOutputTensorInfo(const OpDescPtr &op_desc); + Status GenOutputTensorInfo(OutputData *output_data, vector &outputs); - void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); + Status InitOutputDescInfo(const vector &output_op_list, + vector &output_desc, vector &formats); + + void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); void SetLabelForDynamic(const NodePtr &node); - void ParseDynamicOutShape(const std::vector &str_info, std::vector> &vec_info); + void ParseDynamicOutShape(const vector &str_info, vector> &vec_info); bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); void GetAllGearsInfo(const NodePtr &node); Status GetGetDynamicDimsNodeInfo(const NodePtr &node); @@ -866,56 +878,54 @@ class DavinciModel { GeModelPtr ge_model_; bool need_destroy_aicpu_kernel_{false}; - vector out_node_name_; + vector out_node_name_; map op_list_; // data op_desc vector data_op_list_; - vector output_op_list_; - vector variable_op_list_; - std::map new_input_data_info_; - std::map new_output_data_info_; - std::map new_input_outside_addrs_; - std::map new_output_outside_addrs_; + map new_input_data_info_; + map new_output_data_info_; + map new_input_outside_addrs_; + map new_output_outside_addrs_; - std::set real_virtual_addrs_; + set real_virtual_addrs_; // output op: save cce op actual needed memory size vector output_memory_size_list_; - std::thread thread_id_; + thread thread_id_; - std::shared_ptr listener_; + shared_ptr listener_; bool run_flg_; - std::mutex mux_run_flg_; + mutex mux_run_flg_; int32_t priority_; vector stream_list_; - std::mutex all_hccl_stream_list_mutex_; + mutex all_hccl_stream_list_mutex_; vector all_hccl_stream_list_; // for reuse hccl_follow_stream - std::mutex capacity_of_stream_mutex_; - std::map> main_follow_stream_mapping_; + mutex capacity_of_stream_mutex_; + map> main_follow_stream_mapping_; vector event_list_; vector label_list_; set label_id_indication_; - std::mutex outside_addrs_mutex_; - std::vector zero_copy_tasks_; // Task used Data or NetOutput addr. - std::set copy_only_addrs_; // Address need copy to original place. + mutex outside_addrs_mutex_; + vector zero_copy_tasks_; // Task used Data or NetOutput addr. + set copy_only_addrs_; // Address need copy to original place. - std::vector task_list_; + vector task_list_; // rt_moodel_handle rtModel_t rt_model_handle_; @@ -933,39 +943,39 @@ class DavinciModel { rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED}; // ACL queue schedule, save queue ids for Init. - std::vector cpu_task_list_; - std::vector input_queue_ids_; // input queue ids created by caller. - std::vector output_queue_ids_; // output queue ids created by caller. - std::vector input_mbuf_list_; // input mbuf created by dequeue task. - std::vector output_mbuf_list_; // output mbuf created by dequeue task. + vector cpu_task_list_; + vector input_queue_ids_; // input queue ids created by caller. + vector output_queue_ids_; // output queue ids created by caller. + vector input_mbuf_list_; // input mbuf created by dequeue task. + vector output_mbuf_list_; // output mbuf created by dequeue task. uint64_t session_id_; uint32_t device_id_; - std::mutex flowctrl_op_index_internal_map_mutex_; - std::map flowctrl_op_index_internal_map_; + mutex flowctrl_op_index_internal_map_mutex_; + map flowctrl_op_index_internal_map_; - std::vector active_stream_list_; - std::set active_stream_indication_; + vector active_stream_list_; + set active_stream_indication_; - std::set hcom_streams_; + set hcom_streams_; RuntimeParam runtime_param_; - static std::mutex tvm_bin_mutex_; - std::set tvm_bin_kernel_; + static mutex tvm_bin_mutex_; + set tvm_bin_kernel_; - std::map used_tbe_handle_map_; + map used_tbe_handle_map_; // for profiling task and graph info - std::vector task_desc_info_; + vector task_desc_info_; int64_t maxDumpOpNum_; // for data dump DataDumper data_dumper_; uint64_t iterator_count_; bool is_l1_fusion_enable_; - std::map saved_task_addrs_; + map saved_task_addrs_; void *l1_fusion_addr_ = nullptr; bool known_node_ = false; @@ -976,14 +986,14 @@ class DavinciModel { void *hybrid_addrs_ = nullptr; uint32_t total_hybrid_args_size_ = 0; int64_t total_fixed_addr_size_ = 0; - std::map knonw_input_data_info_; - std::map knonw_output_data_info_; + map known_input_data_info_; + map known_output_data_info_; vector total_io_addrs_; vector orig_total_io_addrs_; bool base_addr_not_changed_ = false; vector> batch_info_; - std::vector> combined_batch_info_; + vector> combined_batch_info_; vector user_designate_shape_order_; int32_t dynamic_type_ = 0; bool is_dynamic_ = false; @@ -991,35 +1001,47 @@ class DavinciModel { vector batch_size_; // key: input tensor name, generally rts op; // value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op - std::map tensor_name_to_fixed_addr_size_; + map tensor_name_to_fixed_addr_size_; // key: input tensor name, generally rts op; value: the peer output anchor of the peer op - std::map tensor_name_to_peer_output_index_; + map tensor_name_to_peer_output_index_; // if model is first execute bool is_first_execute_; // for op debug - std::mutex debug_reg_mutex_; + mutex debug_reg_mutex_; bool is_op_debug_reg_ = false; void *op_debug_addr_ = nullptr; void *p2p_debug_addr_ = nullptr; bool is_new_model_desc_{false}; bool is_online_infer_dynamic_ = false; bool is_getnext_sink_dynamic_ = false; - std::vector cur_dynamic_dims_; + vector cur_dynamic_dims_; void *netoutput_last_input_addr_ = nullptr; int64_t netoutput_last_input_size_ = 0; size_t shape_of_cur_dynamic_dims_ = 0; // key: input_index: input is merge node; value: each gear info and each output size - std::map, int64_t>> merge_nodes_gear_and_real_out_size_info_; + map, int64_t>> merge_nodes_gear_and_real_out_size_info_; // key: input_index: input is merge node; value: each gear info and each output shape - std::map, vector>> merge_nodes_gear_and_real_out_shape_info_; - std::vector> all_gears_info_; + map, vector>> merge_nodes_gear_and_real_out_shape_info_; + vector> all_gears_info_; - std::multimap op_id_map_; - std::vector profile_list_; + multimap op_id_map_; + vector profile_list_; // For super kernel. SuperKernelTaskInfo skt_info_; + + bool is_dynamic_aipp_ = false; + vector dynamic_output_shape_info_; + + vector> input_addrs_list_; + vector> output_addrs_list_; + + vector output_buffer_size_; + vector> output_shape_info_; + + vector output_descs_; + vector output_formats_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 175774bb..ebaf7708 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -565,6 +565,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/end_graph_task_unittest.cc" "graph/load/new_model_manager_event_manager_unittest.cc" #"graph/load/output_net_output_unittest.cc" + "graph/load/davinci_model_unittest.cc" "graph/load/tbe_handle_store_unittest.cc" "graph/load/hccl_task_info_unittest.cc" "graph/load/kernel_ex_task_info_unittest.cc" diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc new file mode 100644 index 00000000..3cd0455d --- /dev/null +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -0,0 +1,285 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public +#include "graph/utils/graph_utils.h" +#include "common/profiling/profiling_manager.h" +#include "graph/load/new_model_manager/davinci_model.h" + +using namespace std; + +namespace ge { +extern OpDescPtr CreateOpDesc(string name, string type); + +class UtestDavinciModel : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestDavinciModel, init_success) { + DavinciModel model(0, nullptr); + ComputeGraphPtr graph = make_shared("default"); + ProfilingManager::Instance().is_load_profiling_ = true; + + GeModelPtr ge_model = make_shared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); + AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); + AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); + + shared_ptr model_task_def = make_shared(); + ge_model->SetModelTaskDef(model_task_def); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({1024}); + NodePtr node_input = graph->AddNode(op_input); // op_index = 0 + + OpDescPtr op_kernel = CreateOpDesc("square", "Square"); + op_kernel->AddInputDesc(tensor); + op_kernel->AddOutputDesc(tensor); + op_kernel->SetInputOffset({1024}); + op_kernel->SetOutputOffset({1024}); + NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 + + OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); + op_memcpy->AddInputDesc(tensor); + op_memcpy->AddOutputDesc(tensor); + op_memcpy->SetInputOffset({1024}); + op_memcpy->SetOutputOffset({5120}); + NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({5120}); + op_output->SetSrcName( { "memcpy" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node_output = graph->AddNode(op_output); // op_index = 3 + + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_KERNEL); + domi::KernelDef *kernel_def = task_def1->mutable_kernel(); + kernel_def->set_stub_func("stub_func"); + kernel_def->set_args_size(64); + string args(64, '1'); + kernel_def->set_args(args.data(), 64); + domi::KernelContext *context = kernel_def->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + + domi::TaskDef *task_def2 = model_task_def->add_task(); + task_def2->set_stream_id(0); + task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); + domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); + memcpy_async->set_src(1024); + memcpy_async->set_dst(5120); + memcpy_async->set_dst_max(512); + memcpy_async->set_count(1); + memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); + memcpy_async->set_op_index(2); + + EXPECT_EQ(model.Assign(ge_model), SUCCESS); + EXPECT_EQ(model.Init(), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 1); + EXPECT_EQ(model.task_list_.size(), 2); + + ProfilingManager::Instance().is_load_profiling_ = false; +} + +TEST_F(UtestDavinciModel, init_data_op) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({5120}); + NodePtr node_input = graph->AddNode(op_input); + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({1024}); + op_output->SetSrcName( { "data" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node_output = graph->AddNode(op_output); + + EXPECT_EQ(model.InitNodes(graph), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 1); + EXPECT_EQ(model.op_list_.size(), 2); +} + +TEST_F(UtestDavinciModel, init_data_op_subgraph) { + DavinciModel model(0, nullptr); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({5120}); + NodePtr node = graph->AddNode(op_input); + + uint32_t data_op_index = 0; + map data_by_index; + EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 0); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_EQ(data_op_index, 0); + EXPECT_TRUE(data_by_index.empty()); +} + +TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { + DavinciModel model(0, nullptr); + model.runtime_param_.mem_base = (uint8_t *)0x08000000; + model.runtime_param_.mem_size = 5120000; + ComputeGraphPtr graph = make_shared("default"); + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({1024}); + op_output->SetSrcName( { "data" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node = graph->AddNode(op_output); + + std::vector output_op_list; + EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 0); + EXPECT_EQ(model.output_addrs_list_.size(), 0); + EXPECT_TRUE(output_op_list.empty()); +} + +TEST_F(UtestDavinciModel, init_unknown) { + DavinciModel model(0, nullptr); + model.SetKnownNode(true); + ComputeGraphPtr graph = make_shared("default"); + + GeModelPtr ge_model = make_shared(); + ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); + AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); + AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); + + shared_ptr model_task_def = make_shared(); + ge_model->SetModelTaskDef(model_task_def); + + GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); + TensorUtils::SetSize(tensor, 512); + + OpDescPtr op_input = CreateOpDesc("data", DATA); + op_input->AddInputDesc(tensor); + op_input->AddOutputDesc(tensor); + op_input->SetInputOffset({1024}); + op_input->SetOutputOffset({1024}); + NodePtr node_input = graph->AddNode(op_input); // op_index = 0 + + OpDescPtr op_kernel = CreateOpDesc("square", "Square"); + op_kernel->AddInputDesc(tensor); + op_kernel->AddOutputDesc(tensor); + op_kernel->SetInputOffset({1024}); + op_kernel->SetOutputOffset({1024}); + NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 + + OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); + op_memcpy->AddInputDesc(tensor); + op_memcpy->AddOutputDesc(tensor); + op_memcpy->SetInputOffset({1024}); + op_memcpy->SetOutputOffset({5120}); + NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 + + OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); + op_output->AddInputDesc(tensor); + op_output->SetInputOffset({5120}); + op_output->SetSrcName( { "memcpy" } ); + op_output->SetSrcIndex( { 0 } ); + NodePtr node_output = graph->AddNode(op_output); // op_index = 3 + + + domi::TaskDef *task_def1 = model_task_def->add_task(); + task_def1->set_stream_id(0); + task_def1->set_type(RT_MODEL_TASK_KERNEL); + domi::KernelDef *kernel_def = task_def1->mutable_kernel(); + kernel_def->set_stub_func("stub_func"); + kernel_def->set_args_size(64); + string args(64, '1'); + kernel_def->set_args(args.data(), 64); + domi::KernelContext *context = kernel_def->mutable_context(); + context->set_op_index(1); + context->set_kernel_type(2); // ccKernelType::TE + uint16_t args_offset[9] = {0}; + context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); + + domi::TaskDef *task_def2 = model_task_def->add_task(); + task_def2->set_stream_id(0); + task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); + domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); + memcpy_async->set_src(1024); + memcpy_async->set_dst(5120); + memcpy_async->set_dst_max(512); + memcpy_async->set_count(1); + memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); + memcpy_async->set_op_index(2); + + EXPECT_EQ(model.Assign(ge_model), SUCCESS); + EXPECT_EQ(model.Init(), SUCCESS); + + EXPECT_EQ(model.input_addrs_list_.size(), 1); + EXPECT_EQ(model.output_addrs_list_.size(), 1); + EXPECT_EQ(model.task_list_.size(), 2); + + EXPECT_EQ(model.task_list_[0]->UpdateArgs(), SUCCESS); + EXPECT_EQ(model.task_list_[1]->UpdateArgs(), SUCCESS); + + vector out_shape_info; + model.GetModelAttr(out_shape_info); + + vector input_descs; + vector output_descs; + EXPECT_EQ(model.GetInputOutputDescInfo(input_descs, output_descs), SUCCESS); + + int32_t virtual_addr = 0; + const vector inputs = { &virtual_addr }; + const vector outputs = { &virtual_addr }; + EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); +} +} // namespace ge