@@ -163,7 +163,6 @@ DavinciModel::~DavinciModel() { | |||||
op_list_.clear(); | op_list_.clear(); | ||||
data_op_list_.clear(); | data_op_list_.clear(); | ||||
output_op_list_.clear(); | |||||
tensor_name_to_fixed_addr_size_.clear(); | tensor_name_to_fixed_addr_size_.clear(); | ||||
tensor_name_to_peer_output_index_.clear(); | tensor_name_to_peer_output_index_.clear(); | ||||
GE_DELETE_NEW_SINGLE(data_inputer_); | GE_DELETE_NEW_SINGLE(data_inputer_); | ||||
@@ -830,12 +829,11 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
{CASE, &DavinciModel::InitCase}, | {CASE, &DavinciModel::InitCase}, | ||||
}; | }; | ||||
GE_CHK_STATUS_RET(InitInputOutputForDynamic(compute_graph), "InitInputOutputForDynamic failed."); | |||||
vector<OpDescPtr> output_op_list; | |||||
map<uint32_t, OpDescPtr> data_by_index; | map<uint32_t, OpDescPtr> data_by_index; | ||||
auto nodes = compute_graph->GetAllNodes(); | auto nodes = compute_graph->GetAllNodes(); | ||||
const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); | const CustAICPUKernelStore &aicpu_kernel_store = ge_model_->GetCustAICPUKernelStore(); | ||||
for (size_t i = 0; i < nodes.size(); i++) { | |||||
for (size_t i = 0; i < nodes.size(); ++i) { | |||||
auto node = nodes.at(i); | auto node = nodes.at(i); | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -850,7 +848,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | ||||
if (IsDataOp(op_desc->GetType())) { | if (IsDataOp(op_desc->GetType())) { | ||||
if (InitDataOp(node, data_op_index, data_by_index) != SUCCESS) { | |||||
if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -859,7 +857,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
} | } | ||||
if (op_desc->GetType() == NETOUTPUT) { | if (op_desc->GetType() == NETOUTPUT) { | ||||
if (InitNetOutput(node) != SUCCESS) { | |||||
if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -919,33 +917,10 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
} | } | ||||
GE_TIMESTAMP_ADD(InitTbeHandle); | GE_TIMESTAMP_ADD(InitTbeHandle); | ||||
} | } | ||||
AdjustDataOpList(data_by_index); | |||||
GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | GE_TIMESTAMP_CALLNUM_END(LoadTBEKernelBinToOpDesc, "GraphLoader::LoadTBEKernelBinToOpDesc."); | ||||
GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | GE_TIMESTAMP_CALLNUM_END(InitTbeHandle, "GraphLoader::InitTbeHandle."); | ||||
return SUCCESS; | |||||
} | |||||
Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph) { | |||||
if (!known_node_) return SUCCESS; | |||||
// for dynamic shape | |||||
auto direct_nodes = compute_graph->GetDirectNode(); | |||||
for (size_t i = 0; i < direct_nodes.size(); i++) { | |||||
auto node = direct_nodes.at(i); | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
GELOGE(PARAM_INVALID, "op_desc is null."); | |||||
return PARAM_INVALID; | |||||
} | |||||
if (IsDataOp(op_desc->GetType())) { | |||||
GELOGD("init data op %s", op_desc->GetName().c_str()); | |||||
data_op_list_.push_back(op_desc); | |||||
} | |||||
if (op_desc->GetType() == NETOUTPUT) { | |||||
GELOGD("init netouput op %s", op_desc->GetName().c_str()); | |||||
output_op_list_.push_back(op_desc); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
return OptInputOutputInfo(data_by_index, output_op_list); | |||||
} | } | ||||
void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | ||||
@@ -963,24 +938,35 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||||
} | } | ||||
} | } | ||||
/// | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Data Op Initialize. | /// @brief Data Op Initialize. | ||||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||||
/// @param [in] NodePtr: Data Op. | /// @param [in] NodePtr: Data Op. | ||||
/// @param [in/out] data_op_index: NetOutput addr size info. | |||||
/// @param [in/out] data_op_index: index of courrent count. | |||||
/// @param [in/out] data_by_index: Data ordered by index. | |||||
/// @return Status | /// @return Status | ||||
Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index) { | |||||
/// | |||||
Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | |||||
map<uint32_t, OpDescPtr> &data_by_index) { | |||||
// op_desc Checked by Init: Data, valid. | // op_desc Checked by Init: Data, valid. | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (known_node_) { | |||||
if (node->GetOwnerComputeGraph() != graph) { | |||||
GELOGI("Skip subgraph Data node: %s.", op_desc->GetName().c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
uint32_t parent_index = 0; // Ignore subgraph Data Node. | |||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | |||||
GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); | |||||
return SUCCESS; | |||||
GELOGI("Init Data node: %s.", op_desc->GetName().c_str()); | |||||
auto data_index = data_op_index++; | |||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | |||||
GELOGD("Get new index %u, old %u", data_index, data_op_index - 1); | |||||
} | } | ||||
data_by_index[data_index] = op_desc; | |||||
data_op_list_.push_back(op_desc); | data_op_list_.push_back(op_desc); | ||||
if (known_node_) { | |||||
return SUCCESS; | |||||
} | |||||
// Make information for copy input data. | // Make information for copy input data. | ||||
const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc); | const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc); | ||||
@@ -992,10 +978,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size()); | op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
auto data_index = data_op_index; | |||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | |||||
GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index); | |||||
} | |||||
bool fusion_flag = false; | bool fusion_flag = false; | ||||
ZeroCopyOffset zero_copy_offset; | ZeroCopyOffset zero_copy_offset; | ||||
int64_t data_size = output_size_list[kDataIndex]; | int64_t data_size = output_size_list[kDataIndex]; | ||||
@@ -1006,7 +989,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
new_input_data_info_[data_index] = zero_copy_offset; | new_input_data_info_[data_index] = zero_copy_offset; | ||||
data_by_index[data_index] = op_desc; | |||||
for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | ||||
void *addr = virtual_addr_list.at(index); | void *addr = virtual_addr_list.at(index); | ||||
@@ -1017,7 +999,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
new_input_outside_addrs_[addr] = zero_copy_offset; | new_input_outside_addrs_[addr] = zero_copy_offset; | ||||
} | } | ||||
data_op_index++; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1025,18 +1006,52 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Sort Data op list by index. | /// @brief Sort Data op list by index. | ||||
/// @param [in] data_by_index: map of Data Op. | /// @param [in] data_by_index: map of Data Op. | ||||
/// @return | |||||
/// @param [in] output_op_list: list of NetOutput op. | |||||
/// @return Status | |||||
/// | /// | ||||
void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index) { | |||||
Status DavinciModel::OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, | |||||
const vector<OpDescPtr> &output_op_list) { | |||||
GELOGD("Data node size: %zu, NetOutput node size: %zu", data_op_list_.size(), output_op_list.size()); | |||||
if (data_by_index.size() != data_op_list_.size()) { | if (data_by_index.size() != data_op_list_.size()) { | ||||
GELOGW("Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); | |||||
return; | |||||
GELOGE(INTERNAL_ERROR, "Data map size: %zu, Data list size: %zu.", data_by_index.size(), data_op_list_.size()); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
data_op_list_.clear(); | data_op_list_.clear(); | ||||
for (auto &item : data_by_index) { | for (auto &item : data_by_index) { | ||||
data_op_list_.emplace_back(item.second); | data_op_list_.emplace_back(item.second); | ||||
auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||||
GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | |||||
input_addrs_list_.emplace_back(output_addrs); | |||||
if (item.second->GetType() == AIPP_DATA_TYPE) { | |||||
GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | |||||
is_dynamic_aipp_ = true; | |||||
} | |||||
} | } | ||||
for (const auto &op_desc : output_op_list) { | |||||
auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||||
GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); | |||||
output_addrs_list_.emplace_back(input_addrs); | |||||
bool getnext_sink_dynamic = false; | |||||
if (AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { | |||||
GELOGI("ATTR_GETNEXT_SINK_DYNMAIC has been set and is true, node: %s", op_desc->GetName().c_str()); | |||||
is_getnext_sink_dynamic_ = true; | |||||
} | |||||
vector<string> shape_info; | |||||
if (AttrUtils::GetListStr(op_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, shape_info)) { | |||||
dynamic_output_shape_info_.insert(dynamic_output_shape_info_.end(), shape_info.begin(), shape_info.end()); | |||||
} | |||||
if (InitOutputTensorInfo(op_desc) != SUCCESS) { | |||||
return INTERNAL_ERROR; | |||||
} | |||||
} | |||||
return InitOutputDescInfo(output_op_list, output_descs_, output_formats_); | |||||
} | } | ||||
bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | ||||
@@ -1050,24 +1065,27 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief NetOutput Op Initialize. | /// @brief NetOutput Op Initialize. | ||||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||||
/// @param [in] NodePtr: NetOutput Op. | /// @param [in] NodePtr: NetOutput Op. | ||||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | |||||
/// @return Status | /// @return Status | ||||
Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, | |||||
vector<OpDescPtr> &output_op_list) { | |||||
// node->GetOpDesc Checked by Init: NetOutput, valid. | // node->GetOpDesc Checked by Init: NetOutput, valid. | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
// excludes the function op sub graph, e.g. case,if | // excludes the function op sub graph, e.g. case,if | ||||
if (known_node_) { | |||||
if (node->GetOwnerComputeGraph() != graph) { | |||||
GELOGI("Skip subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | |||||
op_list_.erase(op_desc->GetId()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
ComputeGraphPtr owner_graph = node->GetOwnerComputeGraph(); | |||||
GE_CHECK_NOTNULL(owner_graph); | |||||
if (owner_graph->GetParentGraph() != nullptr) { | |||||
GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | |||||
op_list_.erase(op_desc->GetId()); | |||||
GELOGI("Init NetOutput node: %s.", op_desc->GetName().c_str()); | |||||
output_op_list.push_back(op_desc); | |||||
if (known_node_) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
output_op_list_.push_back(op_desc); | |||||
// Make information for copy output data. | // Make information for copy output data. | ||||
const vector<int64_t> input_size_list = ModelUtils::GetInputSize(op_desc); | const vector<int64_t> input_size_list = ModelUtils::GetInputSize(op_desc); | ||||
const vector<void *> virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | const vector<void *> virtual_addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | ||||
@@ -1665,32 +1683,30 @@ Status DavinciModel::CpuModelRepeat() { | |||||
Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | ||||
vector<InputOutputDescInfo> &output_desc) { | vector<InputOutputDescInfo> &output_desc) { | ||||
if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { | |||||
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { | |||||
GELOGI("data_op_list_ is empty or input_desc size is not 1."); | GELOGI("data_op_list_ is empty or input_desc size is not 1."); | ||||
} else { | } else { | ||||
std::vector<uint32_t> input_formats; | |||||
vector<uint32_t> input_formats; | |||||
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed."); | GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed."); | ||||
} | } | ||||
std::vector<uint32_t> outputFormats; | |||||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get output desc info failed."); | |||||
vector<uint32_t> output_formats; | |||||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | Status DavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, | ||||
vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
std::vector<uint32_t> &input_formats, | |||||
std::vector<uint32_t> &outputFormats) { | |||||
if ((data_op_list_.empty()) || (data_op_list_[0]->GetInputsSize()) != 1) { | |||||
vector<uint32_t> &input_formats, | |||||
vector<uint32_t> &output_formats) { | |||||
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != 1) { | |||||
GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); | GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | ||||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); | |||||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get output desc info failed"); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1828,29 +1844,22 @@ void DavinciModel::GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynami | |||||
dynamic_type = dynamic_type_; | dynamic_type = dynamic_type_; | ||||
} | } | ||||
void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) { | |||||
for (auto &op : output_op_list_) { | |||||
if (op->GetType() != NETOUTPUT) { | |||||
continue; | |||||
} | |||||
if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | |||||
GELOGD("Can not get dynamic output dims attr"); | |||||
} | |||||
} | |||||
void DavinciModel::GetModelAttr(vector<string> &out_shape_info) { | |||||
out_shape_info.insert(out_shape_info.end(), dynamic_output_shape_info_.begin(), dynamic_output_shape_info_.end()); | |||||
} | } | ||||
Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | ||||
vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
std::vector<uint32_t> &input_formats, | std::vector<uint32_t> &input_formats, | ||||
std::vector<uint32_t> &outputFormats) { | |||||
if ((data_op_list_.empty()) || (1 != data_op_list_[0]->GetInputsSize())) { | |||||
std::vector<uint32_t> &output_formats) { | |||||
if (input_addrs_list_.empty() || input_addrs_list_[0].size() != kOutputNum) { | |||||
GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); | GELOGE(FAILED, "OP List Pointer is null or input_desc size is not 1!"); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed"); | ||||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, outputFormats), "get ouput desc info failed"); | |||||
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed"); | |||||
GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, | GE_CHK_BOOL_RET_STATUS(output_desc.size() == output_memory_size_list_.size(), INTERNAL_ERROR, | ||||
"output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), | "output_desc size[%zu] not equal output_size_list_[%zu] size!", output_desc.size(), | ||||
@@ -1939,7 +1948,7 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, | |||||
void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, | |||||
uint32_t &format_result) { | uint32_t &format_result) { | ||||
/// netoutput input tensor desc | /// netoutput input tensor desc | ||||
GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); | GE_IF_BOOL_EXEC(op_desc->GetInputDescPtr(index) == nullptr, GELOGE(FAILED, "OpDesc GetInputDescPtr is nullptr"); | ||||
@@ -1992,10 +2001,10 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD | |||||
output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | ||||
} | } | ||||
Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) { | |||||
GELOGD("Output node size: %zu", output_op_list_.size()); | |||||
for (size_t i = 0; i < output_op_list_.size(); i++) { | |||||
auto &op_desc = output_op_list_[i]; | |||||
Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list, | |||||
vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) { | |||||
GELOGD("Output node size: %zu", output_op_list.size()); | |||||
for (const auto &op_desc : output_op_list) { | |||||
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | ||||
for (uint32_t index = 0; index < out_size; index++) { | for (uint32_t index = 0; index < out_size; index++) { | ||||
string output_name; | string output_name; | ||||
@@ -2018,13 +2027,19 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, | |||||
std::to_string(src_index[index]); | std::to_string(src_index[index]); | ||||
} | } | ||||
output.name = output_name; | output.name = output_name; | ||||
output_desc.push_back(output); | |||||
formats.push_back(format_result); | |||||
output_descs.push_back(output); | |||||
output_formats.push_back(format_result); | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs, vector<uint32_t> &output_formats) { | |||||
output_descs.insert(output_descs.end(), output_descs_.begin(), output_descs_.end()); | |||||
output_formats.insert(output_formats.end(), output_formats_.begin(), output_formats_.end()); | |||||
return SUCCESS; | |||||
} | |||||
ge::Format DavinciModel::GetFormat() { | ge::Format DavinciModel::GetFormat() { | ||||
if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { | if ((data_op_list_.empty()) || data_op_list_[0] == nullptr || data_op_list_[0]->GetInputDescPtr(0) == nullptr) { | ||||
GELOGW("OP List Pointer is null or input_desc size is not 1!"); | GELOGW("OP List Pointer is null or input_desc size is not 1!"); | ||||
@@ -2362,7 +2377,7 @@ void DavinciModel::SetProfileTime(ModelProcStage stage, int64_t endTime) { | |||||
/// @author | /// @author | ||||
/// | /// | ||||
Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind) { | Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind) { | ||||
if (output_op_list_.empty()) { | |||||
if (output_addrs_list_.empty()) { | |||||
Status ret = SyncVarData(); | Status ret = SyncVarData(); | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -2421,20 +2436,12 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | |||||
std::vector<ge::OutputTensorInfo> &outputs) { | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
GE_CHECK_NOTNULL(output_data); | |||||
if (output_data->blobs.size() > data_index) { | |||||
GELOGI("No need to generate output tensor info, model id:%u", model_id_); | |||||
return SUCCESS; | |||||
} | |||||
std::vector<int64_t> out_buffer_size_vec; | |||||
std::vector<std::vector<int64_t>> shape_info_vec; | |||||
Status DavinciModel::InitOutputTensorInfo(const OpDescPtr &op_desc) { | |||||
size_t input_num = op_desc->GetInputsSize(); | size_t input_num = op_desc->GetInputsSize(); | ||||
if (is_getnext_sink_dynamic_) { | if (is_getnext_sink_dynamic_) { | ||||
input_num = input_num - kGetDynamicDimsCount; | input_num = input_num - kGetDynamicDimsCount; | ||||
} | } | ||||
for (size_t i = 0; i < input_num; ++i) { | for (size_t i = 0; i < input_num; ++i) { | ||||
int64_t size = 0; | int64_t size = 0; | ||||
auto input_desc = op_desc->GetInputDescPtr(i); | auto input_desc = op_desc->GetInputDescPtr(i); | ||||
@@ -2454,25 +2461,37 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data | |||||
} | } | ||||
} | } | ||||
GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); | GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); | ||||
out_buffer_size_vec.push_back(size); | |||||
shape_info_vec.push_back(output_shape); | |||||
output_buffer_size_.push_back(size); | |||||
output_shape_info_.push_back(output_shape); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status DavinciModel::GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs) { | |||||
GE_CHECK_NOTNULL(output_data); | |||||
if (!output_data->blobs.empty()) { | |||||
GELOGI("No need to generate output tensor info, model id:%u", model_id_); | |||||
return SUCCESS; | |||||
} | } | ||||
GELOGI("Output blobs size:%zu, data index:%u, model id:%u", out_buffer_size_vec.size(), data_index, model_id_); | |||||
for (size_t i = 0; i < out_buffer_size_vec.size(); ++i) { | |||||
std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[out_buffer_size_vec[i]]); | |||||
GELOGI("Output blobs size:%zu, model id:%u", output_buffer_size_.size(), model_id_); | |||||
for (size_t i = 0; i < output_buffer_size_.size(); ++i) { | |||||
std::unique_ptr<uint8_t[]> data_buf(new (std::nothrow) uint8_t[output_buffer_size_[i]]); | |||||
if (data_buf == nullptr) { | if (data_buf == nullptr) { | ||||
GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); | GELOGE(GE_GRAPH_MALLOC_FAILED, "Malloc buffer failed."); | ||||
return GE_GRAPH_MALLOC_FAILED; | return GE_GRAPH_MALLOC_FAILED; | ||||
} | } | ||||
output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(out_buffer_size_vec[i]), false}); | |||||
output_data->blobs.push_back({data_buf.get(), static_cast<uint64_t>(output_buffer_size_[i]), false}); | |||||
ge::OutputTensorInfo output; | ge::OutputTensorInfo output; | ||||
output.dims = shape_info_vec[i]; | |||||
output.dims = output_shape_info_[i]; | |||||
output.data = std::move(data_buf); | output.data = std::move(data_buf); | ||||
output.length = out_buffer_size_vec[i]; | |||||
output.length = output_buffer_size_[i]; | |||||
outputs.emplace_back(std::move(output)); | outputs.emplace_back(std::move(output)); | ||||
GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, | GELOGD("Output index:%zu, output dims is %s, data length:%lu.", i, | ||||
formats::JoinToString(output.dims).c_str(), output.length); | formats::JoinToString(output.dims).c_str(), output.length); | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2507,36 +2526,28 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
if (output_op_list_.empty()) { | |||||
if (output_addrs_list_.empty()) { | |||||
GELOGW("Output tensor list is empty, model id: %u", model_id_); | GELOGW("Output tensor list is empty, model id: %u", model_id_); | ||||
GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed."); | GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_id, INTERNAL_ERROR, outputs), "OnComputeDone failed."); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GE_CHECK_NOTNULL(output_data); | GE_CHECK_NOTNULL(output_data); | ||||
// index of data in output_data | |||||
uint32_t data_index = 0; | |||||
output_data->index = data_id; | output_data->index = data_id; | ||||
output_data->model_id = model_id_; | output_data->model_id = model_id_; | ||||
is_getnext_sink_dynamic_ = false; | |||||
// copy output data from op to designated position | |||||
for (auto &op_desc : output_op_list_) { | |||||
if (IsGetNextSinkDynamic(op_desc)) { | |||||
GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); | |||||
is_getnext_sink_dynamic_ = true; | |||||
cur_dynamic_dims_.clear(); | |||||
cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); | |||||
auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), | |||||
netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); | |||||
GE_CHK_RT_RET(ret); | |||||
} | |||||
GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); | |||||
if (GenOutputTensorInfo(op_desc, data_index, output_data, outputs) != SUCCESS) { | |||||
return INTERNAL_ERROR; | |||||
} | |||||
data_index += op_desc->GetInputsSize(); | |||||
if (is_getnext_sink_dynamic_) { | |||||
GELOGD("Reinit cur dynamic dims when getnext sink dynamic."); | |||||
cur_dynamic_dims_.clear(); | |||||
cur_dynamic_dims_.resize(shape_of_cur_dynamic_dims_); | |||||
auto ret = rtMemcpy(cur_dynamic_dims_.data(), shape_of_cur_dynamic_dims_ * sizeof(int64_t), | |||||
netoutput_last_input_addr_, netoutput_last_input_size_, RT_MEMCPY_DEVICE_TO_HOST); | |||||
GE_CHK_RT_RET(ret); | |||||
} | |||||
GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims_).c_str()); | |||||
if (GenOutputTensorInfo(output_data, outputs) != SUCCESS) { | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
if (CopyOutputData(data_id, *output_data, RT_MEMCPY_DEVICE_TO_HOST) != SUCCESS) { | if (CopyOutputData(data_id, *output_data, RT_MEMCPY_DEVICE_TO_HOST) != SUCCESS) { | ||||
@@ -2668,10 +2679,10 @@ void *DavinciModel::Run(DavinciModel *model) { | |||||
model->SetProfileTime(MODEL_AFTER_PROC_START)); | model->SetProfileTime(MODEL_AFTER_PROC_START)); | ||||
GE_TIMESTAMP_START(ReturnResult3); | GE_TIMESTAMP_START(ReturnResult3); | ||||
// copy output data from device to host | // copy output data from device to host | ||||
GE_IF_BOOL_EXEC(!model->output_op_list_.empty(), | |||||
GE_IF_BOOL_EXEC(!model->output_addrs_list_.empty(), | |||||
(void)model->ReturnResult(current_data.index, rslt_flg, false, data_wrapper->GetOutput())) | (void)model->ReturnResult(current_data.index, rslt_flg, false, data_wrapper->GetOutput())) | ||||
// copy output data from device to host for variable graph | // copy output data from device to host for variable graph | ||||
GE_IF_BOOL_EXEC(model->output_op_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); | |||||
GE_IF_BOOL_EXEC(model->output_addrs_list_.empty(), (void)model->ReturnNoOutput(current_data.index)); | |||||
GE_IF_BOOL_EXEC(model->is_first_execute_, | GE_IF_BOOL_EXEC(model->is_first_execute_, | ||||
GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); | GE_TIMESTAMP_EVENT_END(ReturnResult3, "GraphExcute::CopyDataFromDeviceToHost")); | ||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), | ||||
@@ -2791,30 +2802,49 @@ void DavinciModel::UnbindTaskSinkStream() { | |||||
} | } | ||||
} | } | ||||
void *DavinciModel::GetRunAddress(void *addr) const { | |||||
if (fixed_mem_base_ == reinterpret_cast<uintptr_t>(mem_base_)) { | |||||
return addr; | |||||
} | |||||
uintptr_t ptr = reinterpret_cast<uintptr_t>(addr); | |||||
if ((fixed_mem_base_ <= ptr) && (ptr < fixed_mem_base_ + runtime_param_.mem_size)) { | |||||
return mem_base_ + (ptr - fixed_mem_base_); | |||||
} else { | |||||
return addr; | |||||
} | |||||
} | |||||
Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) { | Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) { | ||||
GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); | |||||
if (inputs.size() > data_op_list_.size()) { | |||||
GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size()); | |||||
GELOGI("in, inputs size: %zu, input addr size: %zu, outputs size: %zu, output addr size: %zu", | |||||
inputs.size(), input_addrs_list_.size(), outputs.size(), output_addrs_list_.size()); | |||||
if (inputs.size() > input_addrs_list_.size()) { | |||||
GELOGE(FAILED, "input data addr %zu should less than input op num %zu.", inputs.size(), input_addrs_list_.size()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// remove zero copy addr in last iteration | // remove zero copy addr in last iteration | ||||
knonw_input_data_info_.clear(); | |||||
knonw_output_data_info_.clear(); | |||||
known_input_data_info_.clear(); | |||||
known_output_data_info_.clear(); | |||||
for (size_t i = 0; i < inputs.size(); ++i) { | for (size_t i = 0; i < inputs.size(); ++i) { | ||||
const vector<void *> addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); | |||||
knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; | |||||
GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); | |||||
const vector<void *> &addr_list = input_addrs_list_[i]; | |||||
void *addr = GetRunAddress(addr_list[kDataIndex]); | |||||
known_input_data_info_[addr] = inputs[i]; | |||||
GELOGI("input %zu, v addr %p, r addr %p, p addr %p", i, addr_list[kDataIndex], addr, inputs[i]); | |||||
} | } | ||||
if (output_op_list_.size() < kOutputNum) { | |||||
GELOGW("output op num in graph is %zu.", output_op_list_.size()); | |||||
if (output_addrs_list_.empty()) { | |||||
GELOGW("output op num in graph is %zu", output_addrs_list_.size()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); | |||||
const vector<void *> &addr_list = output_addrs_list_.front(); | |||||
for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { | for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { | ||||
knonw_output_data_info_[addr_list[i]] = outputs[i]; | |||||
GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | |||||
void *addr = GetRunAddress(addr_list[i]); | |||||
known_output_data_info_[addr] = outputs[i]; | |||||
GELOGI("output %zu, v addr %p, r addr %p, p addr %p", i, addr_list[i], addr, outputs[i]); | |||||
} | } | ||||
GELOGI("DavinciModel::CreateKnownZeroCopyMap success."); | |||||
GELOGI("success, known input data info size: %zu, known output data info size: %zu", | |||||
known_input_data_info_.size(), known_output_data_info_.size()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2825,40 +2855,30 @@ void DavinciModel::SetTotalIOAddrs(const vector<void *> &io_addrs) { | |||||
} | } | ||||
for (size_t i = 0; i < io_addrs.size(); ++i) { | for (size_t i = 0; i < io_addrs.size(); ++i) { | ||||
uintptr_t addr = reinterpret_cast<uintptr_t>(io_addrs[i]); | |||||
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { | |||||
total_io_addrs_.emplace_back(mem_base_ + (addr - fixed_mem_base_)); | |||||
} else { | |||||
total_io_addrs_.emplace_back(io_addrs[i]); | |||||
} | |||||
total_io_addrs_.emplace_back(GetRunAddress(io_addrs[i])); | |||||
} | } | ||||
} | } | ||||
Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | ||||
if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_)) { | if (fixed_mem_base_ != reinterpret_cast<uintptr_t>(mem_base_)) { | ||||
for (size_t i = 0; i < total_io_addrs.size(); ++i) { | for (size_t i = 0; i < total_io_addrs.size(); ++i) { | ||||
uintptr_t addr = reinterpret_cast<uintptr_t>(total_io_addrs[i]); | |||||
if ((fixed_mem_base_ <= addr) && (addr < fixed_mem_base_ + runtime_param_.mem_size)) { | |||||
total_io_addrs[i] = mem_base_ + (addr - fixed_mem_base_); | |||||
} | |||||
total_io_addrs[i] = GetRunAddress(total_io_addrs[i]); | |||||
} | } | ||||
} | } | ||||
for (size_t i = 0; i < total_io_addrs.size(); ++i) { | for (size_t i = 0; i < total_io_addrs.size(); ++i) { | ||||
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); | |||||
if (it_in != knonw_input_data_info_.end()) { | |||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
knonw_input_data_info_.at(total_io_addrs[i])); | |||||
total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); | |||||
auto it_in = known_input_data_info_.find(total_io_addrs[i]); | |||||
if (it_in != known_input_data_info_.end()) { | |||||
GELOGI("input %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_input_data_info_.at(total_io_addrs[i])); | |||||
total_io_addrs[i] = known_input_data_info_.at(total_io_addrs[i]); | |||||
} | } | ||||
auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); | |||||
if (it_out != knonw_output_data_info_.end()) { | |||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
knonw_output_data_info_.at(total_io_addrs[i])); | |||||
total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); | |||||
auto it_out = known_output_data_info_.find(total_io_addrs[i]); | |||||
if (it_out != known_output_data_info_.end()) { | |||||
GELOGI("output %zu, v addr %p, p addr %p", i, total_io_addrs[i], known_output_data_info_.at(total_io_addrs[i])); | |||||
total_io_addrs[i] = known_output_data_info_.at(total_io_addrs[i]); | |||||
} | } | ||||
} | } | ||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | |||||
GELOGI("success, total io addrs size: %zu", total_io_addrs.size()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -3159,15 +3179,8 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||||
"MAY cause inference result ERROR, please check model input", | "MAY cause inference result ERROR, please check model input", | ||||
input_size, op_size); | input_size, op_size); | ||||
} | } | ||||
bool is_dynamic_aipp = false; | |||||
for (const auto &op_desc : data_op_list_) { | |||||
if (op_desc->GetType() == AIPP_DATA_TYPE) { | |||||
GELOGI("This is dynamic aipp model."); | |||||
is_dynamic_aipp = true; | |||||
break; | |||||
} | |||||
} | |||||
if (is_dynamic_aipp) { | |||||
if (is_dynamic_aipp_) { | |||||
GELOGI("This is dynamic aipp model, no need to judge smaller input size"); | GELOGI("This is dynamic aipp model, no need to judge smaller input size"); | ||||
return true; | return true; | ||||
} | } | ||||
@@ -49,6 +49,10 @@ | |||||
#include "task_info/task_info.h" | #include "task_info/task_info.h" | ||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
using std::mutex; | |||||
using std::thread; | |||||
using std::multimap; | |||||
namespace ge { | namespace ge { | ||||
// op debug need 2048 bits buffer | // op debug need 2048 bits buffer | ||||
const size_t kOpDebugMemorySize = 2048UL; | const size_t kOpDebugMemorySize = 2048UL; | ||||
@@ -84,11 +88,11 @@ struct SuperKernelTaskInfo { | |||||
uint32_t last_stream_id; | uint32_t last_stream_id; | ||||
void *last_stream; | void *last_stream; | ||||
void *last_sm_desc; | void *last_sm_desc; | ||||
std::vector<void *> kernel_list; | |||||
std::vector<void *> arg_list; | |||||
std::vector<uint32_t> dump_flag_list; | |||||
std::vector<OpDescPtr> op_desc_list; | |||||
std::vector<uintptr_t> dump_args_list; | |||||
vector<void *> kernel_list; | |||||
vector<void *> arg_list; | |||||
vector<uint32_t> dump_flag_list; | |||||
vector<OpDescPtr> op_desc_list; | |||||
vector<uintptr_t> dump_args_list; | |||||
uint32_t last_dump_flag; | uint32_t last_dump_flag; | ||||
int64_t last_group_key; | int64_t last_group_key; | ||||
uintptr_t last_dump_args; | uintptr_t last_dump_args; | ||||
@@ -123,7 +127,7 @@ class DavinciModel { | |||||
/// @brief DavinciModel constructor | /// @brief DavinciModel constructor | ||||
/// @author | /// @author | ||||
/// | /// | ||||
DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener); | |||||
DavinciModel(int32_t priority, const shared_ptr<ModelListener> &listener); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -153,7 +157,7 @@ class DavinciModel { | |||||
/// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op. | /// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op. | ||||
/// @return: 0 for success / others for fail | /// @return: 0 for success / others for fail | ||||
/// | /// | ||||
Status SetQueIds(const std::vector<uint32_t> &input_queue_ids, const std::vector<uint32_t> &output_queue_ids); | |||||
Status SetQueIds(const vector<uint32_t> &input_queue_ids, const vector<uint32_t> &output_queue_ids); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -223,13 +227,14 @@ class DavinciModel { | |||||
// get total mem size | // get total mem size | ||||
size_t TotalMemSize() const { return runtime_param_.mem_size; } | size_t TotalMemSize() const { return runtime_param_.mem_size; } | ||||
const std::map<uint32_t, MemInfo> &P2PMemInfos() const {return runtime_param_.memory_infos;} | |||||
const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; } | |||||
// model name | // model name | ||||
string Name() const { return name_; } | string Name() const { return name_; } | ||||
// om_name | // om_name | ||||
string OmName() const { return om_name_; } | string OmName() const { return om_name_; } | ||||
// version | // version | ||||
uint32_t Version() const { return version_; } | uint32_t Version() const { return version_; } | ||||
@@ -255,9 +260,6 @@ class DavinciModel { | |||||
Status DestroyThread(); | Status DestroyThread(); | ||||
// Get Data Op. | |||||
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; } | |||||
// get Op | // get Op | ||||
OpDescPtr GetOpByIndex(uint32_t index) const { | OpDescPtr GetOpByIndex(uint32_t index) const { | ||||
if (op_list_.find(index) == op_list_.end()) { | if (op_list_.find(index) == op_list_.end()) { | ||||
@@ -274,11 +276,12 @@ class DavinciModel { | |||||
} | } | ||||
return nullptr; | return nullptr; | ||||
} | } | ||||
// get task info for profiling | // get task info for profiling | ||||
const std::vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | |||||
const vector<TaskDescInfo> &GetTaskDescInfo() const { return task_desc_info_; } | |||||
// get updated task info list | // get updated task info list | ||||
std::vector<TaskInfoPtr> GetTaskList() { return task_list_; } | |||||
vector<TaskInfoPtr> GetTaskList() { return task_list_; } | |||||
// Modified from KernelTaskInfo. | // Modified from KernelTaskInfo. | ||||
SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } | SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } | ||||
@@ -323,7 +326,7 @@ class DavinciModel { | |||||
Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc); | Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc); | ||||
Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc, | Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<InputOutputDescInfo> &output_desc, | ||||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats); | |||||
vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -332,7 +335,7 @@ class DavinciModel { | |||||
/// @param [out] dynamic_type | /// @param [out] dynamic_type | ||||
/// @return execute result | /// @return execute result | ||||
/// | /// | ||||
Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) const; | |||||
Status GetDynamicBatchInfo(vector<vector<int64_t>> &batch_info, int32_t &dynamic_type) const; | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -340,13 +343,13 @@ class DavinciModel { | |||||
/// @param [out] batch_info | /// @param [out] batch_info | ||||
/// @return None | /// @return None | ||||
/// | /// | ||||
void GetCombinedDynamicDims(std::vector<std::vector<int64_t>> &batch_info) const; | |||||
void GetCombinedDynamicDims(vector<vector<int64_t>> &batch_info) const; | |||||
void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) const; | |||||
void GetUserDesignateShapeOrder(vector<string> &user_input_shape_order) const; | |||||
void GetCurShape(std::vector<int64_t> &batch_info, int32_t &dynamic_type); | |||||
void GetCurShape(vector<int64_t> &batch_info, int32_t &dynamic_type); | |||||
void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info); | |||||
void GetModelAttr(vector<string> &dynamic_output_shape_info); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -373,7 +376,7 @@ class DavinciModel { | |||||
/// @param [in] string identification: unique identification for current op. | /// @param [in] string identification: unique identification for current op. | ||||
/// @return None | /// @return None | ||||
/// | /// | ||||
void GetUniqueId(const OpDescPtr &op_desc, std::string &unique_identification); | |||||
void GetUniqueId(const OpDescPtr &op_desc, string &unique_identification); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -384,7 +387,7 @@ class DavinciModel { | |||||
/// | /// | ||||
Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | Status GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInfo> &input_desc, | ||||
vector<InputOutputDescInfo> &output_desc, | vector<InputOutputDescInfo> &output_desc, | ||||
std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &output_formats); | |||||
vector<uint32_t> &inputFormats, vector<uint32_t> &output_formats); | |||||
Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); | Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); | ||||
@@ -406,8 +409,6 @@ class DavinciModel { | |||||
/// | /// | ||||
bool RunFlag() const { return run_flg_; } | bool RunFlag() const { return run_flg_; } | ||||
Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Set Session Id | /// @brief Set Session Id | ||||
@@ -453,14 +454,14 @@ class DavinciModel { | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Save outside address of Data or NetOutput used info for ZeroCopy. | /// @brief Save outside address of Data or NetOutput used info for ZeroCopy. | ||||
/// @param [in] const OpDescPtr &op_desc: current op desc | /// @param [in] const OpDescPtr &op_desc: current op desc | ||||
/// @param [in] const std::vector<void *> &outside_addrs: address of task | |||||
/// @param [in] const vector<void *> &outside_addrs: address of task | |||||
/// @param [in] const void *args_offset: arguments address save the address. | /// @param [in] const void *args_offset: arguments address save the address. | ||||
/// @return None. | /// @return None. | ||||
/// | /// | ||||
void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<void *> &outside_addrs, const void *info, void *args, | |||||
void SetZeroCopyAddr(const OpDescPtr &op_desc, const vector<void *> &outside_addrs, const void *info, void *args, | |||||
size_t size, size_t offset); | size_t size, size_t offset); | ||||
void SetDynamicSize(const std::vector<uint64_t> &batch_num, int32_t dynamic_type); | |||||
void SetDynamicSize(const vector<uint64_t> &batch_num, int32_t dynamic_type); | |||||
bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } | ||||
@@ -476,7 +477,7 @@ class DavinciModel { | |||||
data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); | data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); | ||||
} | } | ||||
void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args) { | |||||
void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const shared_ptr<OpDesc> &op_desc, uintptr_t args) { | |||||
data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | ||||
} | } | ||||
@@ -485,7 +486,7 @@ class DavinciModel { | |||||
DavinciModel(const DavinciModel &model) = delete; | DavinciModel(const DavinciModel &model) = delete; | ||||
const map<int64_t, std::vector<rtStream_t>> &GetHcclFolowStream() { | |||||
const map<int64_t, vector<rtStream_t>> &GetHcclFolowStream() { | |||||
return main_follow_stream_mapping_; | return main_follow_stream_mapping_; | ||||
} | } | ||||
void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); | void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); | ||||
@@ -534,8 +535,8 @@ class DavinciModel { | |||||
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | ||||
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | ||||
Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims, | |||||
std::vector<InputOutputDims> &output_dims); | |||||
Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims, | |||||
vector<InputOutputDims> &output_dims); | |||||
void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } | ||||
// om file name | // om file name | ||||
void SetOmName(string om_name) { om_name_ = om_name; } | void SetOmName(string om_name) { om_name_ = om_name; } | ||||
@@ -546,7 +547,6 @@ class DavinciModel { | |||||
bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { | ||||
return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); | ||||
} | } | ||||
Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph); | |||||
private: | private: | ||||
// memory address of weights | // memory address of weights | ||||
@@ -566,6 +566,8 @@ class DavinciModel { | |||||
struct timeInfo time_info_; | struct timeInfo time_info_; | ||||
int32_t dataInputTid; | int32_t dataInputTid; | ||||
void *GetRunAddress(void *addr) const; | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Copy Check input size and model op size. | /// @brief Copy Check input size and model op size. | ||||
@@ -603,7 +605,7 @@ class DavinciModel { | |||||
/// @param [in] batch_label: batch label for multi-batch scenes | /// @param [in] batch_label: batch label for multi-batch scenes | ||||
/// @return SUCCESS handle successfully / others handle failed | /// @return SUCCESS handle successfully / others handle failed | ||||
/// | /// | ||||
Status UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | |||||
Status UpdateIoTaskArgs(const map<uint32_t, ZeroCopyOffset> &data_info, bool is_input, | |||||
const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label); | const vector<DataBuffer> &blobs, bool is_dynamic, const string &batch_label); | ||||
Status CopyInputData(const InputData &input_data, bool device_data = false); | Status CopyInputData(const InputData &input_data, bool device_data = false); | ||||
@@ -619,7 +621,8 @@ class DavinciModel { | |||||
void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input); | void SetInputDimsInfo(const vector<int64_t> &model_input_dims, Format &format, InputOutputDescInfo &input); | ||||
Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats); | |||||
Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, vector<uint32_t> &input_formats); | |||||
Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &output_formats); | |||||
Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); | ||||
@@ -631,7 +634,7 @@ class DavinciModel { | |||||
uint8_t *MallocWeightsMem(size_t weights_size); | uint8_t *MallocWeightsMem(size_t weights_size); | ||||
uint8_t* MallocP2PMem(size_t p2p_data_size); | |||||
uint8_t *MallocP2PMem(size_t p2p_data_size); | |||||
void FreeFeatureMapMem(); | void FreeFeatureMapMem(); | ||||
@@ -663,27 +666,33 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Data Op Initialize. | /// @brief Data Op Initialize. | ||||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||||
/// @param [in] NodePtr: Data Op. | /// @param [in] NodePtr: Data Op. | ||||
/// @param [in/out] data_op_index: NetOutput addr size info. | |||||
/// @param [in/out] data_op_index: index of courrent count. | |||||
/// @param [in/out] data_by_index: Data ordered by index. | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map<uint32_t, OpDescPtr> &data_by_index); | |||||
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | |||||
map<uint32_t, OpDescPtr> &data_by_index); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Sort Data op list by index. | /// @brief Sort Data op list by index. | ||||
/// @param [in] data_by_index: map of Data Op. | /// @param [in] data_by_index: map of Data Op. | ||||
/// @return | |||||
/// @param [in] output_op_list: list of NetOutput op. | |||||
/// @return Status | |||||
/// | /// | ||||
void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index); | |||||
Status OptInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_index, const vector<OpDescPtr> &output_op_list); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief NetOutput Op Initialize. | /// @brief NetOutput Op Initialize. | ||||
/// @param [in] ComputeGraphPtr: root graph of the model. | |||||
/// @param [in] NodePtr: NetOutput Op. | /// @param [in] NodePtr: NetOutput Op. | ||||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status InitNetOutput(const NodePtr &node); | |||||
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -722,7 +731,7 @@ class DavinciModel { | |||||
/// | /// | ||||
Status InitTbeHandle(const OpDescPtr &op_desc); | Status InitTbeHandle(const OpDescPtr &op_desc); | ||||
void StoreTbeHandle(const std::string &handle_key); | |||||
void StoreTbeHandle(const string &handle_key); | |||||
void CleanTbeHandle(); | void CleanTbeHandle(); | ||||
/// | /// | ||||
@@ -753,7 +762,7 @@ class DavinciModel { | |||||
/// | /// | ||||
Status BindInputQueue(); | Status BindInputQueue(); | ||||
Status CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -824,7 +833,7 @@ class DavinciModel { | |||||
Status DoTaskSink(); | Status DoTaskSink(); | ||||
void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||||
void CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); | |||||
Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); | ||||
@@ -838,13 +847,16 @@ class DavinciModel { | |||||
Status SinkTimeProfile(const InputData ¤t_data); | Status SinkTimeProfile(const InputData ¤t_data); | ||||
Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | |||||
std::vector<ge::OutputTensorInfo> &outputs); | |||||
Status InitOutputTensorInfo(const OpDescPtr &op_desc); | |||||
Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | |||||
void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); | |||||
Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list, | |||||
vector<InputOutputDescInfo> &output_desc, vector<uint32_t> &formats); | |||||
void ParseAIPPInfo(string in_out_info, InputOutputDims &dims_info); | |||||
void SetLabelForDynamic(const NodePtr &node); | void SetLabelForDynamic(const NodePtr &node); | ||||
void ParseDynamicOutShape(const std::vector<std::string> &str_info, std::vector<vector<int64_t>> &vec_info); | |||||
void ParseDynamicOutShape(const vector<string> &str_info, vector<vector<int64_t>> &vec_info); | |||||
bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); | bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); | ||||
void GetAllGearsInfo(const NodePtr &node); | void GetAllGearsInfo(const NodePtr &node); | ||||
Status GetGetDynamicDimsNodeInfo(const NodePtr &node); | Status GetGetDynamicDimsNodeInfo(const NodePtr &node); | ||||
@@ -866,56 +878,54 @@ class DavinciModel { | |||||
GeModelPtr ge_model_; | GeModelPtr ge_model_; | ||||
bool need_destroy_aicpu_kernel_{false}; | bool need_destroy_aicpu_kernel_{false}; | ||||
vector<std::string> out_node_name_; | |||||
vector<string> out_node_name_; | |||||
map<uint32_t, OpDescPtr> op_list_; | map<uint32_t, OpDescPtr> op_list_; | ||||
// data op_desc | // data op_desc | ||||
vector<OpDescPtr> data_op_list_; | vector<OpDescPtr> data_op_list_; | ||||
vector<OpDescPtr> output_op_list_; | |||||
vector<OpDescPtr> variable_op_list_; | vector<OpDescPtr> variable_op_list_; | ||||
std::map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||||
std::map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||||
std::map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||||
std::map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||||
map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||||
map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||||
map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||||
map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||||
std::set<const void *> real_virtual_addrs_; | |||||
set<const void *> real_virtual_addrs_; | |||||
// output op: save cce op actual needed memory size | // output op: save cce op actual needed memory size | ||||
vector<int64_t> output_memory_size_list_; | vector<int64_t> output_memory_size_list_; | ||||
std::thread thread_id_; | |||||
thread thread_id_; | |||||
std::shared_ptr<ModelListener> listener_; | |||||
shared_ptr<ModelListener> listener_; | |||||
bool run_flg_; | bool run_flg_; | ||||
std::mutex mux_run_flg_; | |||||
mutex mux_run_flg_; | |||||
int32_t priority_; | int32_t priority_; | ||||
vector<rtStream_t> stream_list_; | vector<rtStream_t> stream_list_; | ||||
std::mutex all_hccl_stream_list_mutex_; | |||||
mutex all_hccl_stream_list_mutex_; | |||||
vector<rtStream_t> all_hccl_stream_list_; | vector<rtStream_t> all_hccl_stream_list_; | ||||
// for reuse hccl_follow_stream | // for reuse hccl_follow_stream | ||||
std::mutex capacity_of_stream_mutex_; | |||||
std::map<int64_t, std::vector<rtStream_t>> main_follow_stream_mapping_; | |||||
mutex capacity_of_stream_mutex_; | |||||
map<int64_t, vector<rtStream_t>> main_follow_stream_mapping_; | |||||
vector<rtEvent_t> event_list_; | vector<rtEvent_t> event_list_; | ||||
vector<rtLabel_t> label_list_; | vector<rtLabel_t> label_list_; | ||||
set<uint32_t> label_id_indication_; | set<uint32_t> label_id_indication_; | ||||
std::mutex outside_addrs_mutex_; | |||||
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||||
std::set<const void *> copy_only_addrs_; // Address need copy to original place. | |||||
mutex outside_addrs_mutex_; | |||||
vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | |||||
set<const void *> copy_only_addrs_; // Address need copy to original place. | |||||
std::vector<TaskInfoPtr> task_list_; | |||||
vector<TaskInfoPtr> task_list_; | |||||
// rt_moodel_handle | // rt_moodel_handle | ||||
rtModel_t rt_model_handle_; | rtModel_t rt_model_handle_; | ||||
@@ -933,39 +943,39 @@ class DavinciModel { | |||||
rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED}; | rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED}; | ||||
// ACL queue schedule, save queue ids for Init. | // ACL queue schedule, save queue ids for Init. | ||||
std::vector<TaskInfoPtr> cpu_task_list_; | |||||
std::vector<uint32_t> input_queue_ids_; // input queue ids created by caller. | |||||
std::vector<uint32_t> output_queue_ids_; // output queue ids created by caller. | |||||
std::vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task. | |||||
std::vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task. | |||||
vector<TaskInfoPtr> cpu_task_list_; | |||||
vector<uint32_t> input_queue_ids_; // input queue ids created by caller. | |||||
vector<uint32_t> output_queue_ids_; // output queue ids created by caller. | |||||
vector<uintptr_t> input_mbuf_list_; // input mbuf created by dequeue task. | |||||
vector<uintptr_t> output_mbuf_list_; // output mbuf created by dequeue task. | |||||
uint64_t session_id_; | uint64_t session_id_; | ||||
uint32_t device_id_; | uint32_t device_id_; | ||||
std::mutex flowctrl_op_index_internal_map_mutex_; | |||||
std::map<uint32_t, uint32_t> flowctrl_op_index_internal_map_; | |||||
mutex flowctrl_op_index_internal_map_mutex_; | |||||
map<uint32_t, uint32_t> flowctrl_op_index_internal_map_; | |||||
std::vector<rtStream_t> active_stream_list_; | |||||
std::set<uint32_t> active_stream_indication_; | |||||
vector<rtStream_t> active_stream_list_; | |||||
set<uint32_t> active_stream_indication_; | |||||
std::set<uint32_t> hcom_streams_; | |||||
set<uint32_t> hcom_streams_; | |||||
RuntimeParam runtime_param_; | RuntimeParam runtime_param_; | ||||
static std::mutex tvm_bin_mutex_; | |||||
std::set<std::string> tvm_bin_kernel_; | |||||
static mutex tvm_bin_mutex_; | |||||
set<string> tvm_bin_kernel_; | |||||
std::map<std::string, uint32_t> used_tbe_handle_map_; | |||||
map<string, uint32_t> used_tbe_handle_map_; | |||||
// for profiling task and graph info | // for profiling task and graph info | ||||
std::vector<TaskDescInfo> task_desc_info_; | |||||
vector<TaskDescInfo> task_desc_info_; | |||||
int64_t maxDumpOpNum_; | int64_t maxDumpOpNum_; | ||||
// for data dump | // for data dump | ||||
DataDumper data_dumper_; | DataDumper data_dumper_; | ||||
uint64_t iterator_count_; | uint64_t iterator_count_; | ||||
bool is_l1_fusion_enable_; | bool is_l1_fusion_enable_; | ||||
std::map<OpDescPtr, void *> saved_task_addrs_; | |||||
map<OpDescPtr, void *> saved_task_addrs_; | |||||
void *l1_fusion_addr_ = nullptr; | void *l1_fusion_addr_ = nullptr; | ||||
bool known_node_ = false; | bool known_node_ = false; | ||||
@@ -976,14 +986,14 @@ class DavinciModel { | |||||
void *hybrid_addrs_ = nullptr; | void *hybrid_addrs_ = nullptr; | ||||
uint32_t total_hybrid_args_size_ = 0; | uint32_t total_hybrid_args_size_ = 0; | ||||
int64_t total_fixed_addr_size_ = 0; | int64_t total_fixed_addr_size_ = 0; | ||||
std::map<const void *, void *> knonw_input_data_info_; | |||||
std::map<const void *, void *> knonw_output_data_info_; | |||||
map<const void *, void *> known_input_data_info_; | |||||
map<const void *, void *> known_output_data_info_; | |||||
vector<void *> total_io_addrs_; | vector<void *> total_io_addrs_; | ||||
vector<void *> orig_total_io_addrs_; | vector<void *> orig_total_io_addrs_; | ||||
bool base_addr_not_changed_ = false; | bool base_addr_not_changed_ = false; | ||||
vector<vector<int64_t>> batch_info_; | vector<vector<int64_t>> batch_info_; | ||||
std::vector<std::vector<int64_t>> combined_batch_info_; | |||||
vector<vector<int64_t>> combined_batch_info_; | |||||
vector<string> user_designate_shape_order_; | vector<string> user_designate_shape_order_; | ||||
int32_t dynamic_type_ = 0; | int32_t dynamic_type_ = 0; | ||||
bool is_dynamic_ = false; | bool is_dynamic_ = false; | ||||
@@ -991,35 +1001,47 @@ class DavinciModel { | |||||
vector<uint64_t> batch_size_; | vector<uint64_t> batch_size_; | ||||
// key: input tensor name, generally rts op; | // key: input tensor name, generally rts op; | ||||
// value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op | // value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op | ||||
std::map<string, int64_t> tensor_name_to_fixed_addr_size_; | |||||
map<string, int64_t> tensor_name_to_fixed_addr_size_; | |||||
// key: input tensor name, generally rts op; value: the peer output anchor of the peer op | // key: input tensor name, generally rts op; value: the peer output anchor of the peer op | ||||
std::map<string, int64_t> tensor_name_to_peer_output_index_; | |||||
map<string, int64_t> tensor_name_to_peer_output_index_; | |||||
// if model is first execute | // if model is first execute | ||||
bool is_first_execute_; | bool is_first_execute_; | ||||
// for op debug | // for op debug | ||||
std::mutex debug_reg_mutex_; | |||||
mutex debug_reg_mutex_; | |||||
bool is_op_debug_reg_ = false; | bool is_op_debug_reg_ = false; | ||||
void *op_debug_addr_ = nullptr; | void *op_debug_addr_ = nullptr; | ||||
void *p2p_debug_addr_ = nullptr; | void *p2p_debug_addr_ = nullptr; | ||||
bool is_new_model_desc_{false}; | bool is_new_model_desc_{false}; | ||||
bool is_online_infer_dynamic_ = false; | bool is_online_infer_dynamic_ = false; | ||||
bool is_getnext_sink_dynamic_ = false; | bool is_getnext_sink_dynamic_ = false; | ||||
std::vector<int64_t> cur_dynamic_dims_; | |||||
vector<int64_t> cur_dynamic_dims_; | |||||
void *netoutput_last_input_addr_ = nullptr; | void *netoutput_last_input_addr_ = nullptr; | ||||
int64_t netoutput_last_input_size_ = 0; | int64_t netoutput_last_input_size_ = 0; | ||||
size_t shape_of_cur_dynamic_dims_ = 0; | size_t shape_of_cur_dynamic_dims_ = 0; | ||||
// key: input_index: input is merge node; value: each gear info and each output size | // key: input_index: input is merge node; value: each gear info and each output size | ||||
std::map<size_t, std::map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_; | |||||
map<size_t, map<vector<int64_t>, int64_t>> merge_nodes_gear_and_real_out_size_info_; | |||||
// key: input_index: input is merge node; value: each gear info and each output shape | // key: input_index: input is merge node; value: each gear info and each output shape | ||||
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | |||||
std::vector<std::vector<int64_t>> all_gears_info_; | |||||
map<size_t, map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | |||||
vector<vector<int64_t>> all_gears_info_; | |||||
std::multimap<uint32_t, uint32_t> op_id_map_; | |||||
std::vector<ProfileInfo> profile_list_; | |||||
multimap<uint32_t, uint32_t> op_id_map_; | |||||
vector<ProfileInfo> profile_list_; | |||||
// For super kernel. | // For super kernel. | ||||
SuperKernelTaskInfo skt_info_; | SuperKernelTaskInfo skt_info_; | ||||
bool is_dynamic_aipp_ = false; | |||||
vector<string> dynamic_output_shape_info_; | |||||
vector<vector<void *>> input_addrs_list_; | |||||
vector<vector<void *>> output_addrs_list_; | |||||
vector<int64_t> output_buffer_size_; | |||||
vector<vector<int64_t>> output_shape_info_; | |||||
vector<InputOutputDescInfo> output_descs_; | |||||
vector<uint32_t> output_formats_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ |
@@ -565,6 +565,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES | |||||
"graph/load/end_graph_task_unittest.cc" | "graph/load/end_graph_task_unittest.cc" | ||||
"graph/load/new_model_manager_event_manager_unittest.cc" | "graph/load/new_model_manager_event_manager_unittest.cc" | ||||
#"graph/load/output_net_output_unittest.cc" | #"graph/load/output_net_output_unittest.cc" | ||||
"graph/load/davinci_model_unittest.cc" | |||||
"graph/load/tbe_handle_store_unittest.cc" | "graph/load/tbe_handle_store_unittest.cc" | ||||
"graph/load/hccl_task_info_unittest.cc" | "graph/load/hccl_task_info_unittest.cc" | ||||
"graph/load/kernel_ex_task_info_unittest.cc" | "graph/load/kernel_ex_task_info_unittest.cc" | ||||
@@ -0,0 +1,285 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gtest/gtest.h> | |||||
#define private public | |||||
#define protected public | |||||
#include "graph/utils/graph_utils.h" | |||||
#include "common/profiling/profiling_manager.h" | |||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
using namespace std; | |||||
namespace ge { | |||||
extern OpDescPtr CreateOpDesc(string name, string type); | |||||
class UtestDavinciModel : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() {} | |||||
}; | |||||
TEST_F(UtestDavinciModel, init_success) { | |||||
DavinciModel model(0, nullptr); | |||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
ProfilingManager::Instance().is_load_profiling_ = true; | |||||
GeModelPtr ge_model = make_shared<GeModel>(); | |||||
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); | |||||
AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); | |||||
AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); | |||||
shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>(); | |||||
ge_model->SetModelTaskDef(model_task_def); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
TensorUtils::SetSize(tensor, 512); | |||||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
op_input->AddInputDesc(tensor); | |||||
op_input->AddOutputDesc(tensor); | |||||
op_input->SetInputOffset({1024}); | |||||
op_input->SetOutputOffset({1024}); | |||||
NodePtr node_input = graph->AddNode(op_input); // op_index = 0 | |||||
OpDescPtr op_kernel = CreateOpDesc("square", "Square"); | |||||
op_kernel->AddInputDesc(tensor); | |||||
op_kernel->AddOutputDesc(tensor); | |||||
op_kernel->SetInputOffset({1024}); | |||||
op_kernel->SetOutputOffset({1024}); | |||||
NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 | |||||
OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); | |||||
op_memcpy->AddInputDesc(tensor); | |||||
op_memcpy->AddOutputDesc(tensor); | |||||
op_memcpy->SetInputOffset({1024}); | |||||
op_memcpy->SetOutputOffset({5120}); | |||||
NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 | |||||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||||
op_output->AddInputDesc(tensor); | |||||
op_output->SetInputOffset({5120}); | |||||
op_output->SetSrcName( { "memcpy" } ); | |||||
op_output->SetSrcIndex( { 0 } ); | |||||
NodePtr node_output = graph->AddNode(op_output); // op_index = 3 | |||||
domi::TaskDef *task_def1 = model_task_def->add_task(); | |||||
task_def1->set_stream_id(0); | |||||
task_def1->set_type(RT_MODEL_TASK_KERNEL); | |||||
domi::KernelDef *kernel_def = task_def1->mutable_kernel(); | |||||
kernel_def->set_stub_func("stub_func"); | |||||
kernel_def->set_args_size(64); | |||||
string args(64, '1'); | |||||
kernel_def->set_args(args.data(), 64); | |||||
domi::KernelContext *context = kernel_def->mutable_context(); | |||||
context->set_op_index(1); | |||||
context->set_kernel_type(2); // ccKernelType::TE | |||||
uint16_t args_offset[9] = {0}; | |||||
context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||||
domi::TaskDef *task_def2 = model_task_def->add_task(); | |||||
task_def2->set_stream_id(0); | |||||
task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); | |||||
domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); | |||||
memcpy_async->set_src(1024); | |||||
memcpy_async->set_dst(5120); | |||||
memcpy_async->set_dst_max(512); | |||||
memcpy_async->set_count(1); | |||||
memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
memcpy_async->set_op_index(2); | |||||
EXPECT_EQ(model.Assign(ge_model), SUCCESS); | |||||
EXPECT_EQ(model.Init(), SUCCESS); | |||||
EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
EXPECT_EQ(model.output_addrs_list_.size(), 1); | |||||
EXPECT_EQ(model.task_list_.size(), 2); | |||||
ProfilingManager::Instance().is_load_profiling_ = false; | |||||
} | |||||
TEST_F(UtestDavinciModel, init_data_op) { | |||||
DavinciModel model(0, nullptr); | |||||
model.ge_model_ = make_shared<GeModel>(); | |||||
model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
model.runtime_param_.mem_size = 5120000; | |||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
TensorUtils::SetSize(tensor, 512); | |||||
op_input->AddInputDesc(tensor); | |||||
op_input->AddOutputDesc(tensor); | |||||
op_input->SetInputOffset({1024}); | |||||
op_input->SetOutputOffset({5120}); | |||||
NodePtr node_input = graph->AddNode(op_input); | |||||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||||
op_output->AddInputDesc(tensor); | |||||
op_output->SetInputOffset({1024}); | |||||
op_output->SetSrcName( { "data" } ); | |||||
op_output->SetSrcIndex( { 0 } ); | |||||
NodePtr node_output = graph->AddNode(op_output); | |||||
EXPECT_EQ(model.InitNodes(graph), SUCCESS); | |||||
EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
EXPECT_EQ(model.output_addrs_list_.size(), 1); | |||||
EXPECT_EQ(model.op_list_.size(), 2); | |||||
} | |||||
TEST_F(UtestDavinciModel, init_data_op_subgraph) { | |||||
DavinciModel model(0, nullptr); | |||||
model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
model.runtime_param_.mem_size = 5120000; | |||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
op_input->AddInputDesc(tensor); | |||||
op_input->AddOutputDesc(tensor); | |||||
op_input->SetInputOffset({1024}); | |||||
op_input->SetOutputOffset({5120}); | |||||
NodePtr node = graph->AddNode(op_input); | |||||
uint32_t data_op_index = 0; | |||||
map<uint32_t, OpDescPtr> data_by_index; | |||||
EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS); | |||||
EXPECT_EQ(model.input_addrs_list_.size(), 0); | |||||
EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
EXPECT_EQ(data_op_index, 0); | |||||
EXPECT_TRUE(data_by_index.empty()); | |||||
} | |||||
TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) { | |||||
DavinciModel model(0, nullptr); | |||||
model.runtime_param_.mem_base = (uint8_t *)0x08000000; | |||||
model.runtime_param_.mem_size = 5120000; | |||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
op_output->AddInputDesc(tensor); | |||||
op_output->SetInputOffset({1024}); | |||||
op_output->SetSrcName( { "data" } ); | |||||
op_output->SetSrcIndex( { 0 } ); | |||||
NodePtr node = graph->AddNode(op_output); | |||||
std::vector<OpDescPtr> output_op_list; | |||||
EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS); | |||||
EXPECT_EQ(model.input_addrs_list_.size(), 0); | |||||
EXPECT_EQ(model.output_addrs_list_.size(), 0); | |||||
EXPECT_TRUE(output_op_list.empty()); | |||||
} | |||||
TEST_F(UtestDavinciModel, init_unknown) { | |||||
DavinciModel model(0, nullptr); | |||||
model.SetKnownNode(true); | |||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | |||||
GeModelPtr ge_model = make_shared<GeModel>(); | |||||
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); | |||||
AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 5120000); | |||||
AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1); | |||||
shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>(); | |||||
ge_model->SetModelTaskDef(model_task_def); | |||||
GeTensorDesc tensor(GeShape(), FORMAT_NCHW, DT_FLOAT); | |||||
TensorUtils::SetSize(tensor, 512); | |||||
OpDescPtr op_input = CreateOpDesc("data", DATA); | |||||
op_input->AddInputDesc(tensor); | |||||
op_input->AddOutputDesc(tensor); | |||||
op_input->SetInputOffset({1024}); | |||||
op_input->SetOutputOffset({1024}); | |||||
NodePtr node_input = graph->AddNode(op_input); // op_index = 0 | |||||
OpDescPtr op_kernel = CreateOpDesc("square", "Square"); | |||||
op_kernel->AddInputDesc(tensor); | |||||
op_kernel->AddOutputDesc(tensor); | |||||
op_kernel->SetInputOffset({1024}); | |||||
op_kernel->SetOutputOffset({1024}); | |||||
NodePtr node_kernel = graph->AddNode(op_kernel); // op_index = 1 | |||||
OpDescPtr op_memcpy = CreateOpDesc("memcpy", MEMCPYASYNC); | |||||
op_memcpy->AddInputDesc(tensor); | |||||
op_memcpy->AddOutputDesc(tensor); | |||||
op_memcpy->SetInputOffset({1024}); | |||||
op_memcpy->SetOutputOffset({5120}); | |||||
NodePtr node_memcpy = graph->AddNode(op_memcpy); // op_index = 2 | |||||
OpDescPtr op_output = CreateOpDesc("output", NETOUTPUT); | |||||
op_output->AddInputDesc(tensor); | |||||
op_output->SetInputOffset({5120}); | |||||
op_output->SetSrcName( { "memcpy" } ); | |||||
op_output->SetSrcIndex( { 0 } ); | |||||
NodePtr node_output = graph->AddNode(op_output); // op_index = 3 | |||||
domi::TaskDef *task_def1 = model_task_def->add_task(); | |||||
task_def1->set_stream_id(0); | |||||
task_def1->set_type(RT_MODEL_TASK_KERNEL); | |||||
domi::KernelDef *kernel_def = task_def1->mutable_kernel(); | |||||
kernel_def->set_stub_func("stub_func"); | |||||
kernel_def->set_args_size(64); | |||||
string args(64, '1'); | |||||
kernel_def->set_args(args.data(), 64); | |||||
domi::KernelContext *context = kernel_def->mutable_context(); | |||||
context->set_op_index(1); | |||||
context->set_kernel_type(2); // ccKernelType::TE | |||||
uint16_t args_offset[9] = {0}; | |||||
context->set_args_offset(args_offset, 9 * sizeof(uint16_t)); | |||||
domi::TaskDef *task_def2 = model_task_def->add_task(); | |||||
task_def2->set_stream_id(0); | |||||
task_def2->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); | |||||
domi::MemcpyAsyncDef *memcpy_async = task_def2->mutable_memcpy_async(); | |||||
memcpy_async->set_src(1024); | |||||
memcpy_async->set_dst(5120); | |||||
memcpy_async->set_dst_max(512); | |||||
memcpy_async->set_count(1); | |||||
memcpy_async->set_kind(RT_MEMCPY_DEVICE_TO_DEVICE); | |||||
memcpy_async->set_op_index(2); | |||||
EXPECT_EQ(model.Assign(ge_model), SUCCESS); | |||||
EXPECT_EQ(model.Init(), SUCCESS); | |||||
EXPECT_EQ(model.input_addrs_list_.size(), 1); | |||||
EXPECT_EQ(model.output_addrs_list_.size(), 1); | |||||
EXPECT_EQ(model.task_list_.size(), 2); | |||||
EXPECT_EQ(model.task_list_[0]->UpdateArgs(), SUCCESS); | |||||
EXPECT_EQ(model.task_list_[1]->UpdateArgs(), SUCCESS); | |||||
vector<string> out_shape_info; | |||||
model.GetModelAttr(out_shape_info); | |||||
vector<InputOutputDescInfo> input_descs; | |||||
vector<InputOutputDescInfo> output_descs; | |||||
EXPECT_EQ(model.GetInputOutputDescInfo(input_descs, output_descs), SUCCESS); | |||||
int32_t virtual_addr = 0; | |||||
const vector<void *> inputs = { &virtual_addr }; | |||||
const vector<void *> outputs = { &virtual_addr }; | |||||
EXPECT_EQ(model.UpdateKnownNodeArgs(inputs, outputs), SUCCESS); | |||||
} | |||||
} // namespace ge |