diff --git a/ge/graph/load/model_manager/cpu_queue_schedule.cc b/ge/graph/load/model_manager/cpu_queue_schedule.cc index d9b716ea..6807043a 100644 --- a/ge/graph/load/model_manager/cpu_queue_schedule.cc +++ b/ge/graph/load/model_manager/cpu_queue_schedule.cc @@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() { /// @param [in] outside_addrs: model input/output memory addr /// @return: 0 for success / others for failed /// -Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, std::map &outside_addrs) { +Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, const map &outside_addrs) { if ((args_ != nullptr) || (args_size_ > 0)) { GELOGE(FAILED, "Task already initialized, size: %u", args_size_); return FAILED; @@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector &mbuf_list, std::map> virtual_args_addrs = addrs_mapping_list[0]; - for (const auto &virtual_args_addr : virtual_args_addrs) { - addr_map_info.addr_num += virtual_args_addr.second.size(); - } - } - GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); - // init src_addrs/dst_addrs - size_t index = 0; vector src_addrs; vector dst_addrs; - for (auto &addrs : outside_addrs) { - auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); + for (const auto &addrs : outside_addrs) { + const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); std::map> virtual_args_addrs = addrs_mapping_list[0]; for (const auto &virtual_args_addr : virtual_args_addrs) { + addr_map_info.addr_num += virtual_args_addr.second.size(); for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { - src_addrs.push_back(mbuf_list.at(index)); + src_addrs.emplace_back(mbuf_list.at(addrs.first)); dst_addrs.push_back(static_cast(reinterpret_cast(virtual_args_addr.second.at(i)))); } } - index++; } + GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); diff --git a/ge/graph/load/model_manager/cpu_queue_schedule.h b/ge/graph/load/model_manager/cpu_queue_schedule.h index de4c5327..8dc44538 100644 --- a/ge/graph/load/model_manager/cpu_queue_schedule.h +++ b/ge/graph/load/model_manager/cpu_queue_schedule.h @@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { ~CpuTaskZeroCopy() override; Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } - Status Init(std::vector &mbuf_list, std::map &outside_addrs); + Status Init(std::vector &mbuf_list, const map &outside_addrs); Status Distribute() override; private: diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 8bae86af..ed2428d9 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { }; vector output_op_list; + set input_outside_addrs; + set output_outside_addrs; map data_by_index; map variable_by_name; auto nodes = compute_graph->GetAllNodes(); @@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); if (IsDataOp(op_desc->GetType())) { - if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { + if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) { GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } if (op_desc->GetType() == NETOUTPUT) { - if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { + if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) { GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); return PARAM_INVALID; } @@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { /// @return Status /// Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, - map &data_by_index) { + map &data_by_index, set &input_outside_addrs) { // op_desc Checked by Init: Data, valid. auto op_desc = node->GetOpDesc(); if (node->GetOwnerComputeGraph() != graph) { @@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); return PARAM_INVALID; } - new_input_data_info_[data_index] = zero_copy_offset; - - for (size_t index = 0; index < virtual_addr_list.size(); ++index) { - void *addr = virtual_addr_list.at(index); - if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) { - continue; - } - zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_); - new_input_outside_addrs_[addr] = zero_copy_offset; + if (input_outside_addrs.count(virtual_addr) == 0) { + int64_t output_offset = output_offset_list.at(kDataIndex); + zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_); + input_outside_addrs.insert(virtual_addr); } + input_data_info_[data_index] = zero_copy_offset; return SUCCESS; } @@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { /// @param [in/out] vector: All NetOutput node in model. /// @return Status Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, - vector &output_op_list) { + vector &output_op_list, set &output_outside_addrs) { // node->GetOpDesc Checked by Init: NetOutput, valid. auto op_desc = node->GetOpDesc(); // excludes the function op sub graph, e.g. case,if @@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & return PARAM_INVALID; } - size_t num = new_output_data_info_.size(); + size_t num = output_data_info_.size(); bool fusion_flag = false; size_t input_count = input_size_list.size(); @@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); return PARAM_INVALID;); - new_output_data_info_[num + idx] = zero_copy_offset; void *addr = virtual_addr_list.at(idx); int64_t input_offset = input_offset_list.at(idx); - vector tensor_addrs; - zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); - auto rslt = new_output_outside_addrs_.insert(std::pair(addr, zero_copy_offset)); - if (!rslt.second) { + if (output_outside_addrs.count(addr) == 0) { + vector tensor_addrs; + zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); + output_outside_addrs.insert(addr); + for (size_t i = 0; i < tensor_addrs.size(); ++i) { + void *real_addr = tensor_addrs.at(i); + DisableZeroCopy(real_addr); + real_virtual_addrs_.insert(real_addr); + } + } else { GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str()); DisableZeroCopy(addr); } - - for (size_t i = 0; i < tensor_addrs.size(); ++i) { - void *real_addr = tensor_addrs.at(i); - DisableZeroCopy(real_addr); - real_virtual_addrs_.insert(real_addr); - } + output_data_info_[num + idx] = zero_copy_offset; } return SUCCESS; } @@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() { return SUCCESS; } - if (input_queue_ids_.size() != new_input_data_info_.size()) { + if (input_queue_ids_.size() != input_data_info_.size()) { GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", - input_queue_ids_.size(), new_input_data_info_.size()); + input_queue_ids_.size(), input_data_info_.size()); return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; } - if (output_queue_ids_.size() != new_output_data_info_.size()) { + if (output_queue_ids_.size() != output_data_info_.size()) { GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Output queue ids not match model: output_queue=%zu output_data=%zu", - output_queue_ids_.size(), new_output_data_info_.size()); + output_queue_ids_.size(), output_data_info_.size()); return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; } GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); // Binding input_queue and Data Op. GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); - GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed."); + GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed."); // Binding output_queue and NetOutput Op. GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); - GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed."); + GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed."); GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); @@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() { Status DavinciModel::BindInputQueue() { // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() for (size_t i = 0; i < input_queue_ids_.size(); ++i) { - auto it = new_input_data_info_.find(i); - if (it == new_input_data_info_.end()) { - GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i); + auto it = input_data_info_.find(i); + if (it == input_data_info_.end()) { + GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i); return FAILED; } @@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { } Status DavinciModel::CpuTaskModelZeroCopy(std::vector &mbuf_list, - std::map &outside_addrs) { + const map &outside_addrs) { GELOGI("Set CpuKernel model zero_copy task enter."); std::shared_ptr zero_copy = MakeShared(rt_entry_stream_); if (zero_copy == nullptr) { @@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector &mbuf_list, Status DavinciModel::BindOutputQueue() { // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() for (size_t i = 0; i < output_queue_ids_.size(); ++i) { - auto it = new_output_data_info_.find(i); - if (it == new_output_data_info_.end()) { - GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); + auto it = output_data_info_.find(i); + if (it == output_data_info_.end()) { + GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); return FAILED; } @@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() { Status DavinciModel::BindEnqueue() { for (size_t i = 0; i < output_queue_ids_.size(); ++i) { - auto it = new_output_data_info_.find(i); - if (it == new_output_data_info_.end()) { - GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); + auto it = output_data_info_.find(i); + if (it == output_data_info_.end()) { + GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); return FAILED; } @@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector &output_descs Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; const std::vector &blobs = input_data.blobs; - for (const auto &data : new_input_data_info_) { + for (const auto &data : input_data_info_) { if (data.first >= blobs.size()) { GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), - new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, + input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, data.second.GetOpName().c_str()); return FAILED; } @@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r output_data.index = data_id; output_data.model_id = model_id_; - if (output_data.blobs.size() != new_output_data_info_.size()) { + if (output_data.blobs.size() != output_data_info_.size()) { GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), - new_output_data_info_.size()); + output_data_info_.size()); return FAILED; } std::vector &blobs = output_data.blobs; size_t idx = 0; - for (const auto &output : new_output_data_info_) { + for (const auto &output : output_data_info_) { if (output.first >= blobs.size()) { GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), - new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); + input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); return FAILED; } @@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { /// @return None. /// void DavinciModel::SetCopyOnlyOutput() { - for (const auto &output_outside_addrs : new_output_outside_addrs_) { + for (const auto &output_outside_addrs : output_data_info_) { ZeroCopyOffset output_outside = output_outside_addrs.second; + if (!output_outside.IsRelativeOffsetValid()) { + return; + } for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); std::map> virtual_args_addrs = addrs_mapping_list[out_count]; @@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector lock(outside_addrs_mutex_); - for (auto &input_outside_addrs : new_input_outside_addrs_) { + for (auto &input_outside_addrs : input_data_info_) { ZeroCopyOffset &input_outside = input_outside_addrs.second; input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } - for (auto &output_outside_addrs : new_output_outside_addrs_) { + for (auto &output_outside_addrs : output_data_info_) { ZeroCopyOffset &output_outside = output_outside_addrs.second; output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); } @@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 /// @return SUCCESS handle successfully / PARAM_INVALID for failed /// Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { - if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { + if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); return ACL_ERROR_GE_PARAM_INVALID; } - if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != + if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); return ACL_ERROR_GE_PARAM_INVALID; diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 53e9cd4d..8ed82912 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -675,7 +675,7 @@ class DavinciModel { /// @return Status /// Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, - map &data_by_index); + map &data_by_index, set &input_outside_addrs); /// /// @ingroup ge @@ -694,7 +694,8 @@ class DavinciModel { /// @param [in/out] vector: All NetOutput node in model. /// @return Status /// - Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector &output_op_list); + Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector &output_op_list, + set &output_outside_addrs); /// /// @ingroup ge @@ -764,7 +765,7 @@ class DavinciModel { /// Status BindInputQueue(); - Status CpuTaskModelZeroCopy(vector &mbuf_list, map &outside_addrs); + Status CpuTaskModelZeroCopy(vector &mbuf_list, const map &outside_addrs); /// /// @ingroup ge @@ -897,10 +898,8 @@ class DavinciModel { void *global_step_addr_{nullptr}; uint64_t global_step_size_{0}; - map new_input_data_info_; - map new_output_data_info_; - map new_input_outside_addrs_; - map new_output_outside_addrs_; + map input_data_info_; + map output_data_info_; set real_virtual_addrs_; diff --git a/ge/graph/load/model_manager/zero_copy_offset.cc b/ge/graph/load/model_manager/zero_copy_offset.cc index 3f8555bb..4a448869 100644 --- a/ge/graph/load/model_manager/zero_copy_offset.cc +++ b/ge/graph/load/model_manager/zero_copy_offset.cc @@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector &fusion_basic_addrs, const } } -void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::set &real_virtual_addrs) { +void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, + set &real_virtual_addrs) { uint32_t out_count = 0; if (!fusion_flag) { out_count++; @@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l real_virtual_addrs.insert(addr); } else { GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); - int64_t output_offset = output_offset_list.at(index); for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { if (zero_copy_basic_offset_.at(i) == output_offset) { out_count++; @@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector &output_offset_l } } addr_count_ = out_count; + valid_relative_offset_ = true; } void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, @@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo } } addr_count_ = out_count; + valid_relative_offset_ = true; } void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { + if (!valid_relative_offset_) { + return; + } const auto addr_val = reinterpret_cast(outside_addr); for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { auto args_addrs = outside_addrs_[out_count].find(outside_addr); diff --git a/ge/graph/load/model_manager/zero_copy_offset.h b/ge/graph/load/model_manager/zero_copy_offset.h index fc63fced..82e1bb6d 100644 --- a/ge/graph/load/model_manager/zero_copy_offset.h +++ b/ge/graph/load/model_manager/zero_copy_offset.h @@ -43,8 +43,7 @@ class ZeroCopyOffset { ~ZeroCopyOffset(); Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); - void SetInputOutsideAddrs(const vector &output_offset_list, void *addr, const size_t &index, - bool fusion_flag, std::set &real_virtual_addrs); + void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set &real_virtual_addrs); void IsL2Fusion(const vector &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); Status InitOutputDataInfo(const vector &input_size_list, const vector &virtual_addr_list, @@ -65,9 +64,10 @@ class ZeroCopyOffset { // data_size of Data/Netoutput int64_t GetDataSize() const { return data_size_; } // value of *outside_addrs_ from davinci_model - const std::vector>> &GetOutsideAddrs() { return outside_addrs_; } + const std::vector>> &GetOutsideAddrs() const { return outside_addrs_; } // name of op std::string GetOpName() const { return op_name_; } + const bool IsRelativeOffsetValid() const { return valid_relative_offset_; } private: void *basic_addr_ = nullptr; @@ -81,6 +81,7 @@ class ZeroCopyOffset { std::vector zero_copy_basic_offset_; std::vector zero_copy_relative_offset_; + bool valid_relative_offset_ = false; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 631043ff..384acc30 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -629,6 +629,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/kernel_task_info_unittest.cc" "graph/load/memcpy_addr_async_task_info_unittest.cc" "graph/load/memcpy_async_task_info_unittest.cc" + "graph/load/cpu_queue_schedule_unittest.cc" #"graph/graph_load_unittest.cc" "graph/ge_executor_unittest.cc" "graph/load/model_helper_unittest.cc" diff --git a/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc b/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc new file mode 100644 index 00000000..a36754b8 --- /dev/null +++ b/tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc @@ -0,0 +1,70 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public +#include "graph/load/model_manager/cpu_queue_schedule.h" +#undef private +#undef protected + +using namespace std; + +namespace ge { +class UtestCpuQueueSchedule : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +// test Init_CpuTaskZeroCopy_succ +TEST_F(UtestCpuQueueSchedule, CpuTaskZeroCopy_Init_Success) { + CpuTaskZeroCopy cpu_task_zero_copy(nullptr); + std::vector mbuf_list; + map outside_addrs; + ZeroCopyOffset addr_mapping; + addr_mapping.addr_count_ = 1; + std::vector addr_offset; + addr_offset.push_back((void*) 0x11110000); + uintptr_t addr = 0x12340000; + std::map> outside_addr; + outside_addr[(void*)addr] = addr_offset; + addr_mapping.outside_addrs_.emplace_back(outside_addr); + mbuf_list.emplace_back(addr); + uint32_t index = 0; + outside_addrs[index] = addr_mapping; + EXPECT_EQ(cpu_task_zero_copy.Init(mbuf_list, outside_addrs), SUCCESS); +} + +TEST_F(UtestCpuQueueSchedule, CpuTaskInfo_Init_args_valid) { + CpuTaskZeroCopy cpu_task_zero_copy(nullptr); + CpuTaskActiveEntry cpu_task_active_entry(nullptr); + CpuTaskModelDequeue cpu_task_model_dequeue(nullptr); + CpuTaskModelRepeat cpu_task_model_repeat(nullptr); + CpuTaskWaitEndGraph cpu_task_wait_end_graph(nullptr); + CpuTaskModelEnqueue cpu_task_model_enqueue(nullptr); + CpuTaskPrepareOutput cpu_task_prepare_output(nullptr); + EXPECT_EQ(cpu_task_zero_copy.Distribute(), FAILED); + EXPECT_EQ(cpu_task_active_entry.Distribute(), FAILED); + EXPECT_EQ(cpu_task_model_dequeue.Distribute(), FAILED); + EXPECT_EQ(cpu_task_model_repeat.Distribute(), FAILED); + EXPECT_EQ(cpu_task_wait_end_graph.Distribute(), FAILED); + EXPECT_EQ(cpu_task_model_enqueue.Distribute(), FAILED); + EXPECT_EQ(cpu_task_prepare_output.Distribute(), FAILED); +} +} // namespace ge diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index e6272e45..ff1b546f 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -169,7 +169,8 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) { uint32_t data_op_index = 0; map data_by_index; - EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS); + set input_outside_addrs; + EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index, input_outside_addrs), SUCCESS); EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0); @@ -194,7 +195,8 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { NodePtr node = graph->AddNode(op_output); std::vector output_op_list; - EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS); + set output_outside_addrs; + EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list, output_outside_addrs), SUCCESS); EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0); @@ -800,7 +802,6 @@ TEST_F(UtestDavinciModel, label_task_success) { label_task_def->set_op_index(op_index++); } - { OpDescPtr op_desc = CreateOpDesc("label_else", LABELSET); NodePtr node = graph->AddNode(op_desc); // op_index = 3 @@ -813,7 +814,6 @@ TEST_F(UtestDavinciModel, label_task_success) { label_task_def->set_op_index(op_index++); } - { OpDescPtr op_desc = CreateOpDesc("label_leave", LABELSET); NodePtr node = graph->AddNode(op_desc); // op_index = 4 @@ -826,13 +826,27 @@ TEST_F(UtestDavinciModel, label_task_success) { label_task_def->set_op_index(op_index++); } - EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_LABEL_NUM, 3)); EXPECT_EQ(model.Assign(ge_model), SUCCESS); EXPECT_EQ(model.Init(), SUCCESS); - EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0); EXPECT_EQ(model.task_list_.size(), 5); } + +TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) { + DavinciModel model(0, nullptr); + model.ge_model_ = make_shared(); + model.input_queue_ids_.emplace_back(0); + EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID); + EXPECT_EQ(model.input_data_info_.size(), 0); + ZeroCopyOffset zero_copy_offset; + model.input_data_info_[0] = zero_copy_offset; + model.output_queue_ids_.emplace_back(0); + EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID); + EXPECT_EQ(model.output_data_info_.size(), 0); + model.output_data_info_[0] = zero_copy_offset; + EXPECT_EQ(model.LoadWithQueue(), INTERNAL_ERROR); + EXPECT_EQ(model.active_stream_list_.size(), 0); +} } // namespace ge