Browse Source

!1050 fix mdc mbuf_list order error

From: @zhou_lili
Reviewed-by: @xchu42,@wangxiaotian22
Signed-off-by: @ji_chen
tags/v1.2.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
d2886c4604
9 changed files with 168 additions and 88 deletions
  1. +6
    -16
      ge/graph/load/model_manager/cpu_queue_schedule.cc
  2. +1
    -1
      ge/graph/load/model_manager/cpu_queue_schedule.h
  3. +53
    -52
      ge/graph/load/model_manager/davinci_model.cc
  4. +6
    -7
      ge/graph/load/model_manager/davinci_model.h
  5. +7
    -3
      ge/graph/load/model_manager/zero_copy_offset.cc
  6. +4
    -3
      ge/graph/load/model_manager/zero_copy_offset.h
  7. +1
    -0
      tests/ut/ge/CMakeLists.txt
  8. +70
    -0
      tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc
  9. +20
    -6
      tests/ut/ge/graph/load/davinci_model_unittest.cc

+ 6
- 16
ge/graph/load/model_manager/cpu_queue_schedule.cc View File

@@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() {
/// @param [in] outside_addrs: model input/output memory addr
/// @return: 0 for success / others for failed
///
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) {
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
if ((args_ != nullptr) || (args_size_ > 0)) {
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
@@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)

AddrMapInfo addr_map_info;
for (auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) {
addr_map_info.addr_num += virtual_args_addr.second.size();
}
}
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);

// init src_addrs/dst_addrs
size_t index = 0;
vector<uint64_t> src_addrs;
vector<uint64_t> dst_addrs;
for (auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
for (const auto &addrs : outside_addrs) {
const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) {
addr_map_info.addr_num += virtual_args_addr.second.size();
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
src_addrs.push_back(mbuf_list.at(index));
src_addrs.emplace_back(mbuf_list.at(addrs.first));
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
}
}
index++;
}
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);

// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));


+ 1
- 1
ge/graph/load/model_manager/cpu_queue_schedule.h View File

@@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo {
~CpuTaskZeroCopy() override;

Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs);
Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);

Status Distribute() override;
private:


+ 53
- 52
ge/graph/load/model_manager/davinci_model.cc View File

@@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
};

vector<OpDescPtr> output_op_list;
set<const void *> input_outside_addrs;
set<const void *> output_outside_addrs;
map<uint32_t, OpDescPtr> data_by_index;
map<string, OpDescPtr> variable_by_name;
auto nodes = compute_graph->GetAllNodes();
@@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc);

if (IsDataOp(op_desc->GetType())) {
if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) {
if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) {
GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str());
return PARAM_INVALID;
}
@@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
}

if (op_desc->GetType() == NETOUTPUT) {
if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) {
if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) {
GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str());
return PARAM_INVALID;
}
@@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
/// @return Status
///
Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
map<uint32_t, OpDescPtr> &data_by_index) {
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs) {
// op_desc Checked by Init: Data, valid.
auto op_desc = node->GetOpDesc();
if (node->GetOwnerComputeGraph() != graph) {
@@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod
GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str());
return PARAM_INVALID;
}
new_input_data_info_[data_index] = zero_copy_offset;

for (size_t index = 0; index < virtual_addr_list.size(); ++index) {
void *addr = virtual_addr_list.at(index);
if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) {
continue;
}
zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_);
new_input_outside_addrs_[addr] = zero_copy_offset;
if (input_outside_addrs.count(virtual_addr) == 0) {
int64_t output_offset = output_offset_list.at(kDataIndex);
zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_);
input_outside_addrs.insert(virtual_addr);
}
input_data_info_[data_index] = zero_copy_offset;

return SUCCESS;
}
@@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) {
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
/// @return Status
Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node,
vector<OpDescPtr> &output_op_list) {
vector<OpDescPtr> &output_op_list, set<const void *> &output_outside_addrs) {
// node->GetOpDesc Checked by Init: NetOutput, valid.
auto op_desc = node->GetOpDesc();
// excludes the function op sub graph, e.g. case,if
@@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
return PARAM_INVALID;
}

size_t num = new_output_data_info_.size();
size_t num = output_data_info_.size();
bool fusion_flag = false;

size_t input_count = input_size_list.size();
@@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &
Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag);
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.",
op_desc->GetName().c_str()); return PARAM_INVALID;);
new_output_data_info_[num + idx] = zero_copy_offset;
void *addr = virtual_addr_list.at(idx);
int64_t input_offset = input_offset_list.at(idx);
vector<void *> tensor_addrs;
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset));
if (!rslt.second) {
if (output_outside_addrs.count(addr) == 0) {
vector<void *> tensor_addrs;
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs);
output_outside_addrs.insert(addr);
for (size_t i = 0; i < tensor_addrs.size(); ++i) {
void *real_addr = tensor_addrs.at(i);
DisableZeroCopy(real_addr);
real_virtual_addrs_.insert(real_addr);
}
} else {
GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str());
DisableZeroCopy(addr);
}

for (size_t i = 0; i < tensor_addrs.size(); ++i) {
void *real_addr = tensor_addrs.at(i);
DisableZeroCopy(real_addr);
real_virtual_addrs_.insert(real_addr);
}
output_data_info_[num + idx] = zero_copy_offset;
}
return SUCCESS;
}
@@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() {
return SUCCESS;
}

if (input_queue_ids_.size() != new_input_data_info_.size()) {
if (input_queue_ids_.size() != input_data_info_.size()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu",
input_queue_ids_.size(), new_input_data_info_.size());
input_queue_ids_.size(), input_data_info_.size());
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
}

if (output_queue_ids_.size() != new_output_data_info_.size()) {
if (output_queue_ids_.size() != output_data_info_.size()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID,
"Output queue ids not match model: output_queue=%zu output_data=%zu",
output_queue_ids_.size(), new_output_data_info_.size());
output_queue_ids_.size(), output_data_info_.size());
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID;
}

GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed.");
// Binding input_queue and Data Op.
GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed.");

// Binding output_queue and NetOutput Op.
GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed.");
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed.");

GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed.");
GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed.");
@@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() {
Status DavinciModel::BindInputQueue() {
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
for (size_t i = 0; i < input_queue_ids_.size(); ++i) {
auto it = new_input_data_info_.find(i);
if (it == new_input_data_info_.end()) {
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i);
auto it = input_data_info_.find(i);
if (it == input_data_info_.end()) {
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i);
return FAILED;
}

@@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) {
}

Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
std::map<const void *, ZeroCopyOffset> &outside_addrs) {
const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
GELOGI("Set CpuKernel model zero_copy task enter.");
std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_);
if (zero_copy == nullptr) {
@@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list,
Status DavinciModel::BindOutputQueue() {
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size()
for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
auto it = new_output_data_info_.find(i);
if (it == new_output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
auto it = output_data_info_.find(i);
if (it == output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
return FAILED;
}

@@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() {

Status DavinciModel::BindEnqueue() {
for (size_t i = 0; i < output_queue_ids_.size(); ++i) {
auto it = new_output_data_info_.find(i);
if (it == new_output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i);
auto it = output_data_info_.find(i);
if (it == output_data_info_.end()) {
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i);
return FAILED;
}

@@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs
Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) {
rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE;
const std::vector<DataBuffer> &blobs = input_data.blobs;
for (const auto &data : new_input_data_info_) {
for (const auto &data : input_data_info_) {
if (data.first >= blobs.size()) {
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(),
new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first,
data.second.GetOpName().c_str());
return FAILED;
}
@@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r

output_data.index = data_id;
output_data.model_id = model_id_;
if (output_data.blobs.size() != new_output_data_info_.size()) {
if (output_data.blobs.size() != output_data_info_.size()) {
GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(),
new_output_data_info_.size());
output_data_info_.size());
return FAILED;
}

std::vector<DataBuffer> &blobs = output_data.blobs;
size_t idx = 0;
for (const auto &output : new_output_data_info_) {
for (const auto &output : output_data_info_) {
if (output.first >= blobs.size()) {
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(),
new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first);
return FAILED;
}

@@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
/// @return None.
///
void DavinciModel::SetCopyOnlyOutput() {
for (const auto &output_outside_addrs : new_output_outside_addrs_) {
for (const auto &output_outside_addrs : output_data_info_) {
ZeroCopyOffset output_outside = output_outside_addrs.second;
if (!output_outside.IsRelativeOffsetValid()) {
return;
}
for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) {
auto &addrs_mapping_list = output_outside.GetOutsideAddrs();
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count];
@@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v
for (size_t i = 0; i < nums; ++i) {
std::lock_guard<std::mutex> lock(outside_addrs_mutex_);

for (auto &input_outside_addrs : new_input_outside_addrs_) {
for (auto &input_outside_addrs : input_data_info_) {
ZeroCopyOffset &input_outside = input_outside_addrs.second;
input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
}

for (auto &output_outside_addrs : new_output_outside_addrs_) {
for (auto &output_outside_addrs : output_data_info_) {
ZeroCopyOffset &output_outside = output_outside_addrs.second;
output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen);
}
@@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64
/// @return SUCCESS handle successfully / PARAM_INVALID for failed
///
Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) {
if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed.");
return ACL_ERROR_GE_PARAM_INVALID;
}

if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) !=
SUCCESS) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed.");
return ACL_ERROR_GE_PARAM_INVALID;


+ 6
- 7
ge/graph/load/model_manager/davinci_model.h View File

@@ -675,7 +675,7 @@ class DavinciModel {
/// @return Status
///
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
map<uint32_t, OpDescPtr> &data_by_index);
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs);

///
/// @ingroup ge
@@ -694,7 +694,8 @@ class DavinciModel {
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
/// @return Status
///
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list);
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list,
set<const void *> &output_outside_addrs);

///
/// @ingroup ge
@@ -764,7 +765,7 @@ class DavinciModel {
///
Status BindInputQueue();

Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs);
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);

///
/// @ingroup ge
@@ -897,10 +898,8 @@ class DavinciModel {
void *global_step_addr_{nullptr};
uint64_t global_step_size_{0};

map<uint32_t, ZeroCopyOffset> new_input_data_info_;
map<uint32_t, ZeroCopyOffset> new_output_data_info_;
map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
map<const void *, ZeroCopyOffset> new_output_outside_addrs_;
map<uint32_t, ZeroCopyOffset> input_data_info_;
map<uint32_t, ZeroCopyOffset> output_data_info_;

set<const void *> real_virtual_addrs_;



+ 7
- 3
ge/graph/load/model_manager/zero_copy_offset.cc View File

@@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const
}
}

void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
bool fusion_flag, std::set<const void *> &real_virtual_addrs) {
void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag,
set<const void *> &real_virtual_addrs) {
uint32_t out_count = 0;
if (!fusion_flag) {
out_count++;
@@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
real_virtual_addrs.insert(addr);
} else {
GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr);
int64_t output_offset = output_offset_list.at(index);
for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) {
if (zero_copy_basic_offset_.at(i) == output_offset) {
out_count++;
@@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
}
}
addr_count_ = out_count;
valid_relative_offset_ = true;
}

void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
@@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
}
}
addr_count_ = out_count;
valid_relative_offset_ = true;
}

void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
if (!valid_relative_offset_) {
return;
}
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
auto args_addrs = outside_addrs_[out_count].find(outside_addr);


+ 4
- 3
ge/graph/load/model_manager/zero_copy_offset.h View File

@@ -43,8 +43,7 @@ class ZeroCopyOffset {
~ZeroCopyOffset();

Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag);
void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
bool fusion_flag, std::set<const void *> &real_virtual_addrs);
void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs);

void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag);
Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list,
@@ -65,9 +64,10 @@ class ZeroCopyOffset {
// data_size of Data/Netoutput
int64_t GetDataSize() const { return data_size_; }
// value of *outside_addrs_ from davinci_model
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; }
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; }
// name of op
std::string GetOpName() const { return op_name_; }
const bool IsRelativeOffsetValid() const { return valid_relative_offset_; }

private:
void *basic_addr_ = nullptr;
@@ -81,6 +81,7 @@ class ZeroCopyOffset {

std::vector<int64_t> zero_copy_basic_offset_;
std::vector<int64_t> zero_copy_relative_offset_;
bool valid_relative_offset_ = false;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_

+ 1
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -629,6 +629,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
"graph/load/kernel_task_info_unittest.cc"
"graph/load/memcpy_addr_async_task_info_unittest.cc"
"graph/load/memcpy_async_task_info_unittest.cc"
"graph/load/cpu_queue_schedule_unittest.cc"
#"graph/graph_load_unittest.cc"
"graph/ge_executor_unittest.cc"
"graph/load/model_helper_unittest.cc"


+ 70
- 0
tests/ut/ge/graph/load/cpu_queue_schedule_unittest.cc View File

@@ -0,0 +1,70 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <gtest/gtest.h>

#define private public
#define protected public
#include "graph/load/model_manager/cpu_queue_schedule.h"
#undef private
#undef protected

using namespace std;

namespace ge {
class UtestCpuQueueSchedule : public testing::Test {
protected:
void SetUp() {}

void TearDown() {}
};

// test Init_CpuTaskZeroCopy_succ
TEST_F(UtestCpuQueueSchedule, CpuTaskZeroCopy_Init_Success) {
CpuTaskZeroCopy cpu_task_zero_copy(nullptr);
std::vector<uintptr_t> mbuf_list;
map<uint32_t, ZeroCopyOffset> outside_addrs;
ZeroCopyOffset addr_mapping;
addr_mapping.addr_count_ = 1;
std::vector<void *> addr_offset;
addr_offset.push_back((void*) 0x11110000);
uintptr_t addr = 0x12340000;
std::map<const void *, std::vector<void *>> outside_addr;
outside_addr[(void*)addr] = addr_offset;
addr_mapping.outside_addrs_.emplace_back(outside_addr);
mbuf_list.emplace_back(addr);
uint32_t index = 0;
outside_addrs[index] = addr_mapping;
EXPECT_EQ(cpu_task_zero_copy.Init(mbuf_list, outside_addrs), SUCCESS);
}

TEST_F(UtestCpuQueueSchedule, CpuTaskInfo_Init_args_valid) {
CpuTaskZeroCopy cpu_task_zero_copy(nullptr);
CpuTaskActiveEntry cpu_task_active_entry(nullptr);
CpuTaskModelDequeue cpu_task_model_dequeue(nullptr);
CpuTaskModelRepeat cpu_task_model_repeat(nullptr);
CpuTaskWaitEndGraph cpu_task_wait_end_graph(nullptr);
CpuTaskModelEnqueue cpu_task_model_enqueue(nullptr);
CpuTaskPrepareOutput cpu_task_prepare_output(nullptr);
EXPECT_EQ(cpu_task_zero_copy.Distribute(), FAILED);
EXPECT_EQ(cpu_task_active_entry.Distribute(), FAILED);
EXPECT_EQ(cpu_task_model_dequeue.Distribute(), FAILED);
EXPECT_EQ(cpu_task_model_repeat.Distribute(), FAILED);
EXPECT_EQ(cpu_task_wait_end_graph.Distribute(), FAILED);
EXPECT_EQ(cpu_task_model_enqueue.Distribute(), FAILED);
EXPECT_EQ(cpu_task_prepare_output.Distribute(), FAILED);
}
} // namespace ge

+ 20
- 6
tests/ut/ge/graph/load/davinci_model_unittest.cc View File

@@ -169,7 +169,8 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) {

uint32_t data_op_index = 0;
map<uint32_t, OpDescPtr> data_by_index;
EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS);
set<const void *> input_outside_addrs;
EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index, input_outside_addrs), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0);
@@ -194,7 +195,8 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) {
NodePtr node = graph->AddNode(op_output);

std::vector<OpDescPtr> output_op_list;
EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS);
set<const void *> output_outside_addrs;
EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list, output_outside_addrs), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0);
@@ -800,7 +802,6 @@ TEST_F(UtestDavinciModel, label_task_success) {
label_task_def->set_op_index(op_index++);
}


{
OpDescPtr op_desc = CreateOpDesc("label_else", LABELSET);
NodePtr node = graph->AddNode(op_desc); // op_index = 3
@@ -813,7 +814,6 @@ TEST_F(UtestDavinciModel, label_task_success) {
label_task_def->set_op_index(op_index++);
}


{
OpDescPtr op_desc = CreateOpDesc("label_leave", LABELSET);
NodePtr node = graph->AddNode(op_desc); // op_index = 4
@@ -826,13 +826,27 @@ TEST_F(UtestDavinciModel, label_task_success) {
label_task_def->set_op_index(op_index++);
}


EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_LABEL_NUM, 3));
EXPECT_EQ(model.Assign(ge_model), SUCCESS);
EXPECT_EQ(model.Init(), SUCCESS);

EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0);
EXPECT_EQ(model.task_list_.size(), 5);
}
TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) {
DavinciModel model(0, nullptr);
model.ge_model_ = make_shared<GeModel>();
model.input_queue_ids_.emplace_back(0);
EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID);
EXPECT_EQ(model.input_data_info_.size(), 0);
ZeroCopyOffset zero_copy_offset;
model.input_data_info_[0] = zero_copy_offset;
model.output_queue_ids_.emplace_back(0);
EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID);
EXPECT_EQ(model.output_data_info_.size(), 0);
model.output_data_info_[0] = zero_copy_offset;
EXPECT_EQ(model.LoadWithQueue(), INTERNAL_ERROR);
EXPECT_EQ(model.active_stream_list_.size(), 0);
}
} // namespace ge

Loading…
Cancel
Save