Browse Source

!1997 assign graph memory max size and variable memory max size adaptively

Merge pull request !1997 from lichun/master
tags/v1.5.1
i-robot Gitee 3 years ago
parent
commit
4e8b86f37b
5 changed files with 114 additions and 11 deletions
  1. +41
    -11
      ge/graph/manager/graph_var_manager.cc
  2. +3
    -0
      ge/graph/manager/graph_var_manager.h
  3. +6
    -0
      tests/depends/runtime/src/runtime_stub.cc
  4. +1
    -0
      tests/ut/ge/CMakeLists.txt
  5. +63
    -0
      tests/ut/ge/graph/manager/graph_var_manager_unittest.cc

+ 41
- 11
ge/graph/manager/graph_var_manager.cc View File

@@ -20,6 +20,7 @@
#include "graph/manager/graph_mem_manager.h" #include "graph/manager/graph_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h" #include "graph/manager/trans_var_data_utils.h"
#include "graph/utils/type_utils.h" #include "graph/utils/type_utils.h"
#include "graph/ge_context.h"


using std::map; using std::map;
using std::string; using std::string;
@@ -767,25 +768,52 @@ Status VarManager::GetChangedGraphId(const std::string &var_name, uint32_t &grap
return var_resource_->GetChangedGraphId(var_name, graph_id); return var_resource_->GetChangedGraphId(var_name, graph_id);
} }


Status VarManager::GetTotalMemorySize(size_t &total_mem_size) {
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X",
GetContext().DeviceId(), rt_ret);
GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret);
return RT_FAILED;
}
size_t free_mem = 0;
rt_ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_mem, &total_mem_size);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemGetInfo failed, ret:0x%X", rt_ret);
GELOGE(RT_FAILED, "[Call][RtMemGetInfo] failed, ret:0x%X", rt_ret);
return RT_FAILED;
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X",
GetContext().DeviceId(), rt_ret);
GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret);
return RT_FAILED;
}
return SUCCESS;
}

Status VarManager::SetMemoryMallocSize(const map<string, string> &options) { Status VarManager::SetMemoryMallocSize(const map<string, string> &options) {
auto it = options.find(GRAPH_MEMORY_MAX_SIZE);
if (it == options.end()) {
graph_mem_max_size_ = kGraphMemoryManagerMallocMaxSize;
} else {
string graph_memory_manager_malloc_max_size = it->second;
size_t total_mem_size = 0;
GE_CHK_STATUS_RET_NOLOG(VarManager::GetTotalMemorySize(total_mem_size));
GEEVENT("Total memory size is %zu", total_mem_size);

graph_mem_max_size_ = floor(total_mem_size * kGraphMemoryManagerMallocRatio);
var_mem_max_size_ = floor(total_mem_size * kVarMemoryManagerMallocRatio);

auto it1 = options.find(GRAPH_MEMORY_MAX_SIZE);
if (it1 != options.end()) {
string graph_memory_manager_malloc_max_size = it1->second;
ge::Status ret = ParseMemoryMallocSize(graph_memory_manager_malloc_max_size, graph_mem_max_size_); ge::Status ret = ParseMemoryMallocSize(graph_memory_manager_malloc_max_size, graph_mem_max_size_);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_); GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_);
return ge::GE_GRAPH_OPTIONS_INVALID; return ge::GE_GRAPH_OPTIONS_INVALID;
} }
GELOGI("The max size for graph mem is set to %zu", graph_mem_max_size_);
} }


it = options.find(VARIABLE_MEMORY_MAX_SIZE);
if (it == options.end()) {
var_mem_max_size_ = kMemoryVarManagerMallocSize;
} else {
string memory_var_manager_malloc_size = it->second;
auto it2 = options.find(VARIABLE_MEMORY_MAX_SIZE);
if (it2 != options.end()) {
string memory_var_manager_malloc_size = it2->second;
ge::Status ret = ParseMemoryMallocSize(memory_var_manager_malloc_size, var_mem_max_size_); ge::Status ret = ParseMemoryMallocSize(memory_var_manager_malloc_size, var_mem_max_size_);
if (ret != SUCCESS) { if (ret != SUCCESS) {
GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_); GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_);
@@ -793,6 +821,8 @@ Status VarManager::SetMemoryMallocSize(const map<string, string> &options) {
} }
} }


GEEVENT("The graph_mem_max_size is %zu and the var_mem_max_size is %zu", graph_mem_max_size_, var_mem_max_size_);

var_mem_logic_base_ = graph_mem_max_size_ + kGraphMemoryBuffer; var_mem_logic_base_ = graph_mem_max_size_ + kGraphMemoryBuffer;
if (var_mem_logic_base_ > kMaxMemorySize) { if (var_mem_logic_base_ > kMaxMemorySize) {
REPORT_INNER_ERROR("E19999", "var_login_base:%zu can not exeed limit:%zu, session_id:%lu, check invalid", REPORT_INNER_ERROR("E19999", "var_login_base:%zu can not exeed limit:%zu, session_id:%lu, check invalid",


+ 3
- 0
ge/graph/manager/graph_var_manager.h View File

@@ -43,6 +43,8 @@ const size_t kMaxMemorySize = 256UL * 1024UL * 1024UL * 1024UL;
const char kEnvGeuseStaticMemory[] = "GE_USE_STATIC_MEMORY"; const char kEnvGeuseStaticMemory[] = "GE_USE_STATIC_MEMORY";
const uint64_t kSessionMemAlignSize = 512; const uint64_t kSessionMemAlignSize = 512;
const size_t kSessionMemAlignUnit = 2; const size_t kSessionMemAlignUnit = 2;
const double kGraphMemoryManagerMallocRatio = 26.0 / 32.0;
const double kVarMemoryManagerMallocRatio = 5.0 / 32.0;


enum MemStatus { enum MemStatus {
NORMAL = 0, NORMAL = 0,
@@ -301,6 +303,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager {
mutable std::recursive_mutex mutex_; mutable std::recursive_mutex mutex_;


Status ParseMemoryMallocSize(std::string &memory_size, size_t &my_size); Status ParseMemoryMallocSize(std::string &memory_size, size_t &my_size);
Status GetTotalMemorySize(size_t &total_mem_size);
}; };


class VarManagerPool { class VarManagerPool {


+ 6
- 0
tests/depends/runtime/src/runtime_stub.cc View File

@@ -193,6 +193,12 @@ rtError_t rtMemGetInfo(size_t *free, size_t *total) {
return RT_ERROR_NONE; return RT_ERROR_NONE;
} }


rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total) {
*free = 512UL * 1024UL * 1024UL;
*total = 1024UL * 1024UL * 1024UL;
return RT_ERROR_NONE;
}

rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag) { return RT_ERROR_NONE; } rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag) { return RT_ERROR_NONE; }


rtError_t rtMemFreeManaged(void *ptr) { return RT_ERROR_NONE; } rtError_t rtMemFreeManaged(void *ptr) { return RT_ERROR_NONE; }


+ 1
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -690,6 +690,7 @@ set(MULTI_PARTS_TEST_FILES
"graph/manager/run_graph_unittest.cc" "graph/manager/run_graph_unittest.cc"
"graph/partition/dynamic_shape_partition_unittest.cc" "graph/partition/dynamic_shape_partition_unittest.cc"
"graph/manager/graph_manager_unittest.cc" "graph/manager/graph_manager_unittest.cc"
"graph/manager/graph_var_manager_unittest.cc"
"graph/optimize/mem_rw_conflict_optimize_unittest.cc" "graph/optimize/mem_rw_conflict_optimize_unittest.cc"
"graph/optimize/graph_optimize_unittest.cc" "graph/optimize/graph_optimize_unittest.cc"
"session/omg_omg_unittest.cc" "session/omg_omg_unittest.cc"


+ 63
- 0
tests/ut/ge/graph/manager/graph_var_manager_unittest.cc View File

@@ -0,0 +1,63 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <memory>

#define protected public
#define private public
#include "graph/manager/graph_var_manager.h"
#include "graph/ge_context.h"
#undef protected
#undef private

namespace ge {
class UtestGraphVarManagerTest : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};

TEST_F(UtestGraphVarManagerTest, test_get_total_memory_size) {
size_t total_mem_size = 0;
Status ret = VarManager::Instance(0)->GetTotalMemorySize(total_mem_size);
EXPECT_EQ(total_mem_size, 1024UL * 1024UL * 1024UL);
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_no_related_option) {
const map<string, string> options{};
Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options);
EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (26.0f / 32.0f)));
EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (5.0f / 32.0f)));
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_with_user_specify_graph_mem_max_size) {
const map<string, string> options{{"ge.graphMemoryMaxSize", "536870912"}};
Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options);
EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL / 2));
EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (5.0f / 32.0f)));
EXPECT_EQ(ret, SUCCESS);
}

TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_with_user_specify_var_mem_max_size) {
const map<string, string> options{{"ge.variableMemoryMaxSize", "536870912"}};
Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options);
EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (26.0f / 32.0f)));
EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL / 2));
EXPECT_EQ(ret, SUCCESS);
}
} // namespace ge

Loading…
Cancel
Save