From: @li-lei0106 Reviewed-by: Signed-off-by:tags/v1.2.0
@@ -24,6 +24,7 @@ | |||||
#include "graph/buffer.h" | #include "graph/buffer.h" | ||||
#include "graph/ge_attr_value.h" | #include "graph/ge_attr_value.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/types.h" | |||||
#include "graph/node.h" | #include "graph/node.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
@@ -1401,6 +1402,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
if (output_op_desc != nullptr) { | if (output_op_desc != nullptr) { | ||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
} | } | ||||
// fusion: other type's size not means malloc HBM memory | // fusion: other type's size not means malloc HBM memory | ||||
bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | ||||
if (l1_flag) { | if (l1_flag) { | ||||
@@ -1408,6 +1410,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | ||||
size = 0; | size = 0; | ||||
} | } | ||||
int32_t calc_type = 0; | |||||
bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||||
GE_IF_BOOL_EXEC((ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); | |||||
std::string peer_name; | std::string peer_name; | ||||
uint32_t peer_input_index = 0; | uint32_t peer_input_index = 0; | ||||
bool out_node_set_continuous_input = false; | bool out_node_set_continuous_input = false; | ||||
@@ -20,6 +20,7 @@ | |||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/types.h" | |||||
#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | ||||
do { \ | do { \ | ||||
@@ -340,7 +341,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | ||||
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | ||||
continue); | continue); | ||||
int64_t mem_type; | int64_t mem_type; | ||||
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | ||||
// feature maps | // feature maps | ||||
@@ -424,6 +425,18 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||||
return v_output_data_addr; | return v_output_data_addr; | ||||
} | } | ||||
for (size_t i = 0; i < outputs_size; ++i) { | for (size_t i = 0; i < outputs_size; ++i) { | ||||
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | |||||
if (tensor_desc == nullptr) { | |||||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||||
continue; | |||||
} | |||||
int32_t calc_type = 0; | |||||
bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||||
if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { | |||||
GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); | |||||
continue; | |||||
} | |||||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | ||||
uint8_t *variable_addr = nullptr; | uint8_t *variable_addr = nullptr; | ||||
GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); | GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); | ||||
@@ -431,11 +444,6 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||||
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | ||||
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | ||||
continue); | continue); | ||||
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | |||||
if (tensor_desc == nullptr) { | |||||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | |||||
continue; | |||||
} | |||||
int64_t mem_type; | int64_t mem_type; | ||||
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | ||||
@@ -20,6 +20,7 @@ | |||||
#include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
#include "hybrid/node_executor/aicore/aicore_task_builder.h" | #include "hybrid/node_executor/aicore/aicore_task_builder.h" | ||||
#include "graph/load/new_model_manager/tbe_handle_store.h" | #include "graph/load/new_model_manager/tbe_handle_store.h" | ||||
#include "graph/types.h" | |||||
using optiling::OpRunInfo; | using optiling::OpRunInfo; | ||||
@@ -34,6 +35,23 @@ constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; | |||||
Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { | ||||
GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); | ||||
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); | ||||
GE_CHECK_LE(op_desc.GetOutputsSize(), static_cast<size_t>(INT_MAX)); | |||||
int outputs_size = static_cast<int>(op_desc.GetOutputsSize()); | |||||
for (int i = 0; i < outputs_size; ++i) { | |||||
const GeTensorDescPtr tensor_desc = op_desc.MutableOutputDesc(i); | |||||
if (tensor_desc == nullptr) { | |||||
GELOGW("Op: %s, Index: %d, Tensor Desc is null", op_desc.GetName().c_str(), i); | |||||
continue; | |||||
} | |||||
int32_t calc_type = 0; | |||||
bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||||
if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { | |||||
output_indices_to_skip_.push_back(i); | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -221,7 +239,8 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) | |||||
} | } | ||||
Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { | Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { | ||||
size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces(); | |||||
size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces() | |||||
- output_indices_to_skip_.size(); | |||||
if (tiling_buffer_ != nullptr) { | if (tiling_buffer_ != nullptr) { | ||||
++expected_arg_count; | ++expected_arg_count; | ||||
} | } | ||||
@@ -244,6 +263,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { | |||||
for (int i = 0; i < task_context.NumOutputs(); ++i) { | for (int i = 0; i < task_context.NumOutputs(); ++i) { | ||||
const auto output = task_context.GetOutput(i); | const auto output = task_context.GetOutput(i); | ||||
GE_CHECK_NOTNULL(output); | GE_CHECK_NOTNULL(output); | ||||
if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) { | |||||
GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.", | |||||
task_context.GetNodeName(), i); | |||||
continue; | |||||
} | |||||
arg_base_[index++] = reinterpret_cast<uintptr_t>(output->GetData()); | arg_base_[index++] = reinterpret_cast<uintptr_t>(output->GetData()); | ||||
} | } | ||||
@@ -72,6 +72,7 @@ class AiCoreOpTask { | |||||
uint32_t args_size_ = 0; | uint32_t args_size_ = 0; | ||||
uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
bool clear_atomic_ = true; | bool clear_atomic_ = true; | ||||
std::vector<int> output_indices_to_skip_; | |||||
}; | }; | ||||
class AtomicAddrCleanOpTask : public AiCoreOpTask { | class AtomicAddrCleanOpTask : public AiCoreOpTask { | ||||
@@ -18,6 +18,7 @@ | |||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/types.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "hybrid/executor/hybrid_execution_context.h" | #include "hybrid/executor/hybrid_execution_context.h" | ||||
#include "hybrid/executor/subgraph_executor.h" | #include "hybrid/executor/subgraph_executor.h" | ||||
@@ -213,6 +214,13 @@ Status TaskContext::AllocateOutput(int index, | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
int32_t calc_type = 0; | |||||
bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||||
if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { | |||||
outputs_start_[index] = TensorValue(); | |||||
return SUCCESS; | |||||
} | |||||
auto it = node_item_->ref_outputs.find(index); | auto it = node_item_->ref_outputs.find(index); | ||||
if (it != node_item_->ref_outputs.end()) { | if (it != node_item_->ref_outputs.end()) { | ||||
auto &ref_node = it->second; | auto &ref_node = it->second; | ||||
@@ -1 +1 @@ | |||||
Subproject commit b00c50c2a8c2ce06929b27f7b74185a950737ec8 | |||||
Subproject commit 88d053a5f94c40ff21620cef50b87075d5054292 |
@@ -1 +1 @@ | |||||
Subproject commit f0109a2c70981d74932bb38bb56722caff3323a5 | |||||
Subproject commit 6904ba9488658afc30076d299183fc8875045f49 |