From bac7bcfc09933b1a5ca41bd837138025023b129e Mon Sep 17 00:00:00 2001 From: lwx897429 Date: Fri, 15 Jan 2021 10:29:25 +0800 Subject: [PATCH] Optional output does not allocate memory --- ge/graph/build/memory/block_mem_assigner.cc | 7 +++++ .../load/new_model_manager/model_utils.cc | 20 +++++++++----- .../node_executor/aicore/aicore_op_task.cc | 26 ++++++++++++++++++- .../node_executor/aicore/aicore_op_task.h | 1 + ge/hybrid/node_executor/task_context.cc | 8 ++++++ metadef | 2 +- parser | 2 +- 7 files changed, 57 insertions(+), 9 deletions(-) diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index 76e7efbe..a523ce3f 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -24,6 +24,7 @@ #include "graph/buffer.h" #include "graph/ge_attr_value.h" #include "graph/ge_context.h" +#include "graph/types.h" #include "graph/node.h" #include "graph/utils/graph_utils.h" #include "graph/utils/node_utils.h" @@ -1401,6 +1402,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector if (output_op_desc != nullptr) { GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); } + // fusion: other type's size not means malloc HBM memory bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; if (l1_flag) { @@ -1408,6 +1410,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); size = 0; } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + GE_IF_BOOL_EXEC((ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); + std::string peer_name; uint32_t peer_input_index = 0; bool out_node_set_continuous_input = false; diff --git a/ge/graph/load/new_model_manager/model_utils.cc b/ge/graph/load/new_model_manager/model_utils.cc index d9a9f3ca..3c141f06 100755 --- a/ge/graph/load/new_model_manager/model_utils.cc +++ b/ge/graph/load/new_model_manager/model_utils.cc @@ -20,6 +20,7 @@ #include "common/op/ge_op_utils.h" #include "graph/utils/tensor_utils.h" #include "graph/manager/graph_var_manager.h" +#include "graph/types.h" #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ do { \ @@ -340,7 +341,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps @@ -424,6 +425,18 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C return v_output_data_addr; } for (size_t i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); + continue; + } GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), uint8_t *variable_addr = nullptr; GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); @@ -431,11 +444,6 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); - const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); - if (tensor_desc == nullptr) { - GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); - continue; - } int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 80ea579b..f61caf19 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -20,6 +20,7 @@ #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/node_executor/aicore/aicore_task_builder.h" #include "graph/load/new_model_manager/tbe_handle_store.h" +#include "graph/types.h" using optiling::OpRunInfo; @@ -34,6 +35,23 @@ constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); + + GE_CHECK_LE(op_desc.GetOutputsSize(), static_cast(INT_MAX)); + int outputs_size = static_cast(op_desc.GetOutputsSize()); + + for (int i = 0; i < outputs_size; ++i) { + const GeTensorDescPtr tensor_desc = op_desc.MutableOutputDesc(i); + if (tensor_desc == nullptr) { + GELOGW("Op: %s, Index: %d, Tensor Desc is null", op_desc.GetName().c_str(), i); + continue; + } + + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + output_indices_to_skip_.push_back(i); + } + } return SUCCESS; } @@ -221,7 +239,8 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) } Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { - size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces(); + size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces() + - output_indices_to_skip_.size(); if (tiling_buffer_ != nullptr) { ++expected_arg_count; } @@ -244,6 +263,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { for (int i = 0; i < task_context.NumOutputs(); ++i) { const auto output = task_context.GetOutput(i); GE_CHECK_NOTNULL(output); + if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) { + GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.", + task_context.GetNodeName(), i); + continue; + } arg_base_[index++] = reinterpret_cast(output->GetData()); } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index dd15c608..3f350531 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -72,6 +72,7 @@ class AiCoreOpTask { uint32_t args_size_ = 0; uint32_t block_dim_ = 1; bool clear_atomic_ = true; + std::vector output_indices_to_skip_; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 8b7c623f..e89ad874 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -18,6 +18,7 @@ #include "framework/common/ge_inner_error_codes.h" #include "framework/common/debug/log.h" #include "graph/utils/tensor_utils.h" +#include "graph/types.h" #include "graph/debug/ge_attr_define.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hybrid/executor/subgraph_executor.h" @@ -213,6 +214,13 @@ Status TaskContext::AllocateOutput(int index, return SUCCESS; } + int32_t calc_type = 0; + bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); + if (ret && (calc_type == static_cast(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { + outputs_start_[index] = TensorValue(); + return SUCCESS; + } + auto it = node_item_->ref_outputs.find(index); if (it != node_item_->ref_outputs.end()) { auto &ref_node = it->second; diff --git a/metadef b/metadef index b00c50c2..88d053a5 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 +Subproject commit 88d053a5f94c40ff21620cef50b87075d5054292 diff --git a/parser b/parser index f0109a2c..6904ba94 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 +Subproject commit 6904ba9488658afc30076d299183fc8875045f49