diff --git a/ge/graph/load/model_manager/model_utils.cc b/ge/graph/load/model_manager/model_utils.cc index a35a4669..c15c6872 100755 --- a/ge/graph/load/model_manager/model_utils.cc +++ b/ge/graph/load/model_manager/model_utils.cc @@ -22,16 +22,25 @@ #include "graph/manager/graph_var_manager.h" #include "graph/types.h" #include "graph/build/memory/block_mem_assigner.h" - -#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ - do { \ - if (SIZE <= static_cast(OFFSET)) { \ - REPORT_INNER_ERROR("E19999", "Node:%s(%s) offset:%ld out of range size:%lu, check invalid", \ - OP->GetName().c_str(), OP->GetType().c_str(), OFFSET, SIZE); \ - GELOGE(OUT_OF_MEMORY, "[Check][Param]Node: %s, memory out of range[%lu: %ld]", \ - OP->GetName().c_str(), SIZE, OFFSET); \ - return {}; \ - } \ +#include "common/math/math_util.h" + +#define VALIDATE_MEM_RANGE(OP, TOTAL_SIZE, OFFSET, SIZE) \ + do { \ + if (ge::CheckInt64AddOverflow((OFFSET), (SIZE)) != SUCCESS) { \ + GELOGE(PARAM_INVALID, "Int64 %ld and %ld addition can result in overflow!", \ + static_cast(OFFSET), static_cast(SIZE)); \ + return {}; \ + } \ + int64_t range = (OFFSET) + (SIZE); \ + if ((TOTAL_SIZE) < static_cast(range)) { \ + REPORT_INNER_ERROR("E19999", \ + "Node:%s(%s) memory out of range, offset:%ld, size:%ld, exceed total size:%lu.", \ + OP->GetName().c_str(), OP->GetType().c_str(), (OFFSET), (SIZE), (TOTAL_SIZE)); \ + GELOGE(OUT_OF_MEMORY, \ + "[Check][Param]Node:%s(%s) memory out of range, offset:%ld, size:%ld, exceed total size:%lu.", \ + OP->GetName().c_str(), OP->GetType().c_str(), (OFFSET), (SIZE), (TOTAL_SIZE)); \ + return {}; \ + } \ } while (0) namespace ge { @@ -321,19 +330,21 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co const GeTensorDescPtr tensor_desc = op_desc->MutableInputDesc(static_cast(i)); GE_IF_BOOL_EXEC(tensor_desc == nullptr, GELOGD("Op: %s, Index: %zu, has no input", op_desc->GetName().c_str(), i); continue;) + int64_t tensor_size = 0; + GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); if ((i < v_is_input_const.size()) && v_is_input_const[i]) { - // TBE: add weights address to input - int64_t tensor_size = 0; - GE_CHK_STATUS(TensorUtils::GetSize(*tensor_desc, tensor_size)); - if (tensor_size) { - int64_t data_offset = 0; - GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset)); - VALIDATE_MEM_RANGE(op_desc, model_param.weight_size, data_offset); - uint8_t *weight_addr = model_param.weight_base + data_offset; - v_input_data_addr.push_back(weight_addr); - GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, - op_desc->GetName().c_str(), i, weight_addr); - } + // Add weights address to input + int64_t data_offset = 0; + GE_CHK_STATUS(TensorUtils::GetDataOffset(*tensor_desc, data_offset)); + int64_t weight_size = 0; + // The reason why GetTensorSizeInBytes is used here is that the weight is allocated based on the size of + // TensorData in function AdjustConstWeightSize. and the size is zero when the tensor is empty. + GE_CHK_STATUS(TensorUtils::GetTensorSizeInBytes(*tensor_desc, weight_size)); + VALIDATE_MEM_RANGE(op_desc, model_param.weight_size, data_offset, weight_size); + uint8_t *weight_addr = model_param.weight_base + data_offset; + v_input_data_addr.push_back(weight_addr); + GELOGI("[IMAS]GetInputDataAddrs graph_%u type[C] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, + op_desc->GetName().c_str(), i, weight_addr); non_const_index++; continue; } @@ -346,7 +357,8 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co (void)ge::AttrUtils::GetInt(op_desc->MutableInputDesc(i), ATTR_NAME_INNER_OFFSET, inner_offset); GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset - inner_offset), uint8_t *variable_addr = nullptr; - GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset - inner_offset, variable_addr), return {}); + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset - inner_offset, + tensor_size + inner_offset, variable_addr), return {}); variable_addr += inner_offset; v_input_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", @@ -361,9 +373,8 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co mem_addr = reinterpret_cast(static_cast(input_offset)); v_input_data_addr.push_back(mem_addr); } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { - int64_t tensor_size = 0; - GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); - VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); + // The input size and peer output size may be not consecutive, therefore, the tensor_size is not been checked. + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset, static_cast(0)); mem_addr = model_param.ts_mem_mall->Acquire(input_offset, static_cast(tensor_size)); v_input_data_addr.push_back(mem_addr); } else if (tensor_has_mem_type && mem_type == RT_MEMORY_P2P_DDR) { @@ -373,7 +384,8 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co op_desc->GetName().c_str(), i, p2p_mem_addr); continue; } else { - VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); + // The input size and peer output size may be not consecutive, therefore, the tensor_size is not been checked. + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset, static_cast(0)); mem_addr = model_param.mem_base + input_offset; v_input_data_addr.push_back(mem_addr); } @@ -390,7 +402,7 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co /// @return Status /// Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, - uint8_t *&var_addr) { + int64_t tensor_size, uint8_t *&var_addr) { rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); switch (mem_type) { case RT_MEMORY_RDMA_HBM: @@ -402,7 +414,7 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc var_addr = reinterpret_cast(static_cast(offset)); break; case RT_MEMORY_HBM: - VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); + VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base, tensor_size); var_addr = model_param.var_base + offset - model_param.logic_var_base; break; default: @@ -456,9 +468,12 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C } int64_t inner_offset = 0; (void)ge::AttrUtils::GetInt(op_desc->MutableOutputDesc(i), ATTR_NAME_INNER_OFFSET, inner_offset); + int64_t tensor_size = 0; + GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i] - inner_offset), uint8_t *variable_addr = nullptr; - GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i] - inner_offset, variable_addr), return {}); + GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i] - inner_offset, + tensor_size + inner_offset, variable_addr), return {}); variable_addr += inner_offset; v_output_data_addr.push_back(variable_addr); GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", @@ -473,11 +488,7 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C mem_addr = reinterpret_cast(static_cast(v_output_offset[i])); v_output_data_addr.push_back(mem_addr); } else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { - const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); - GE_CHECK_NOTNULL_EXEC(tensor_desc, return {}); - int64_t tensor_size = 0; - GE_CHK_STATUS_EXEC(TensorUtils::GetSize(*tensor_desc, tensor_size), return {}); - VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i], tensor_size); mem_addr = model_param.ts_mem_mall->Acquire(v_output_offset[i], static_cast(tensor_size)); v_output_data_addr.push_back(mem_addr); } else if (tensor_has_mem_type && mem_type == RT_MEMORY_P2P_DDR) { @@ -487,7 +498,7 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C op_desc->GetName().c_str(), i, p2p_mem_addr); continue; } else { - VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i], tensor_size); mem_addr = static_cast(model_param.mem_base + v_output_offset[i]); v_output_data_addr.push_back(mem_addr); } @@ -560,7 +571,7 @@ vector ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] Null addr", model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); } else { - VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); + VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i], v_workspace_bytes[i]); uint8_t *mem_addr = nullptr; bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size()); if (session_scope_memory) { diff --git a/ge/graph/load/model_manager/model_utils.h b/ge/graph/load/model_manager/model_utils.h index 26f8d700..8ce1b060 100755 --- a/ge/graph/load/model_manager/model_utils.h +++ b/ge/graph/load/model_manager/model_utils.h @@ -115,7 +115,7 @@ class ModelUtils { /// @return Status /// static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, - uint8_t *&var_addr); + int64_t tensor_size, uint8_t *&var_addr); }; } // namespace ge diff --git a/tests/ut/ge/graph/load/model_utils_unittest.cc b/tests/ut/ge/graph/load/model_utils_unittest.cc index 630a75aa..33903616 100644 --- a/tests/ut/ge/graph/load/model_utils_unittest.cc +++ b/tests/ut/ge/graph/load/model_utils_unittest.cc @@ -19,6 +19,8 @@ #define private public #include "graph/load/model_manager/model_utils.h" #include "graph/manager/graph_var_manager.h" +#include "graph/utils/tensor_utils.h" +#include "graph/debug/ge_attr_define.h" using namespace std; @@ -28,6 +30,33 @@ class UtestModelUtils : public testing::Test { void TearDown() {} }; +static NodePtr CreateNode(ComputeGraph &graph, const string &name, const string &type, int in_num, int out_num) { + OpDescPtr op_desc = std::make_shared(name, type); + op_desc->SetStreamId(0); + static int32_t index = 0; + op_desc->SetId(index++); + + GeTensorDesc tensor(GeShape(), FORMAT_ND, DT_FLOAT); + TensorUtils::SetSize(tensor, 64); + vector input_offset; + for (int i = 0; i < in_num; i++) { + op_desc->AddInputDesc(tensor); + input_offset.emplace_back(i * 64); + } + op_desc->SetInputOffset(input_offset); + + vector output_offset; + for (int i = 0; i < out_num; i++) { + op_desc->AddOutputDesc(tensor); + output_offset.emplace_back(in_num * 64 + i * 64); + } + op_desc->SetOutputOffset(output_offset); + + op_desc->SetWorkspace({}); + op_desc->SetWorkspaceBytes({}); + return graph.AddNode(op_desc); +} + // test ModelUtils::GetVarAddr TEST_F(UtestModelUtils, get_var_addr_hbm) { uint8_t test = 2; @@ -44,7 +73,7 @@ TEST_F(UtestModelUtils, get_var_addr_hbm) { VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_HBM; std::shared_ptr op_desc = std::make_shared("test", "test"); uint8_t *var_addr = nullptr; - EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, 0, var_addr), SUCCESS); EXPECT_EQ(runtime_param.var_base + offset - runtime_param.logic_var_base, var_addr); VarManager::Instance(runtime_param.session_id)->Destory(); } @@ -63,7 +92,7 @@ TEST_F(UtestModelUtils, get_var_addr_rdma_hbm) { VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM; std::shared_ptr op_desc = std::make_shared("test", "test"); uint8_t *var_addr = nullptr; - EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_EQ(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, 0, var_addr), SUCCESS); EXPECT_EQ(reinterpret_cast(offset), var_addr); VarManager::Instance(runtime_param.session_id)->Destory(); } @@ -82,7 +111,43 @@ TEST_F(UtestModelUtils, get_var_addr_rdma_hbm_negative_offset) { VarManager::Instance(runtime_param.session_id)->var_resource_->var_offset_map_[offset] = RT_MEMORY_RDMA_HBM; std::shared_ptr op_desc = std::make_shared("test", "test"); uint8_t *var_addr = nullptr; - EXPECT_NE(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, var_addr), SUCCESS); + EXPECT_NE(ModelUtils::GetVarAddr(runtime_param, op_desc, offset, 0, var_addr), SUCCESS); VarManager::Instance(runtime_param.session_id)->Destory(); } + +TEST_F(UtestModelUtils, test_GetInputDataAddrs_input_const) { + RuntimeParam runtime_param; + uint8_t weight_base_addr = 0; + runtime_param.session_id = 0; + runtime_param.weight_base = &weight_base_addr; + runtime_param.weight_size = 64; + + ComputeGraphPtr graph = std::make_shared("test"); + NodePtr add_node = CreateNode(*graph, "add", ADD, 2, 1); + auto op_desc = add_node->GetOpDesc(); + EXPECT_NE(op_desc, nullptr); + + vector is_input_const = {true, true}; + op_desc->SetIsInputConst(is_input_const); + { + auto tensor_desc = op_desc->MutableInputDesc(0); + EXPECT_NE(tensor_desc, nullptr); + TensorUtils::SetSize(*tensor_desc, 64); + tensor_desc->SetShape(GeShape({1, 1})); + tensor_desc->SetOriginShape(GeShape({1, 1})); + TensorUtils::SetDataOffset(*tensor_desc, 0); + } + { + auto tensor_desc = op_desc->MutableInputDesc(1); + EXPECT_NE(tensor_desc, nullptr); + TensorUtils::SetSize(*tensor_desc, 32); + tensor_desc->SetShape(GeShape({1, 0})); + tensor_desc->SetOriginShape(GeShape({1, 0})); + TensorUtils::SetDataOffset(*tensor_desc, 64); + } + vector input_data_addr = ModelUtils::GetInputDataAddrs(runtime_param, op_desc); + EXPECT_EQ(input_data_addr.size(), 2); + EXPECT_EQ(input_data_addr.at(0), static_cast(&weight_base_addr + 0)); + EXPECT_EQ(input_data_addr.at(1), static_cast(&weight_base_addr + 64)); +} } // namespace ge