From 5bf4d4c4245b801b754efd46c3c865f8659260f8 Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 14 Jul 2021 15:38:58 +0800 Subject: [PATCH 1/4] assign graph memory max size and variable memory max size adaptively --- ge/graph/manager/graph_var_manager.cc | 47 +++++++++++++++++++---- ge/graph/manager/graph_var_manager.h | 3 ++ tests/depends/runtime/src/runtime_stub.cc | 6 +++ 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 89a4e45b..5138a0f5 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -20,6 +20,7 @@ #include "graph/manager/graph_mem_manager.h" #include "graph/manager/trans_var_data_utils.h" #include "graph/utils/type_utils.h" +#include "graph/ge_context.h" using std::map; using std::string; @@ -767,24 +768,54 @@ Status VarManager::GetChangedGraphId(const std::string &var_name, uint32_t &grap return var_resource_->GetChangedGraphId(var_name, graph_id); } +Status VarManager::GetTotalMemorySize(size_t &total_mem_size) { + rtError_t rt_ret = rtSetDevice(GetContext().DeviceId()); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X", + GetContext().DeviceId(), rt_ret); + GELOGE(RT_FAILED, "[Call][RtSetDevice] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); + return RT_FAILED; + } + size_t free_mem = 0; + rt_ret = rtMemGetInfoEx(RT_MEMORYINFO_HBM, &free_mem, &total_mem_size); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtMemGetInfo failed, ret:0x%X", rt_ret); + GELOGE(RT_FAILED, "[Call][RtMemGetInfo] failed, ret:0x%X", rt_ret); + return RT_FAILED; + } + rt_ret = rtDeviceReset(GetContext().DeviceId()); + if (rt_ret != RT_ERROR_NONE) { + REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X", + GetContext().DeviceId(), rt_ret); + GELOGE(RT_FAILED, "[Call][RtDeviceReset] failed, device_id:%u, ret:0x%X", GetContext().DeviceId(), rt_ret); + return RT_FAILED; + } + return SUCCESS; +} + Status VarManager::SetMemoryMallocSize(const map &options) { + size_t total_mem_size = 0; + Status ret = VarManager::GetTotalMemorySize(total_mem_size); + if (ret != SUCCESS) { + return ret; + } + GEEVENT("Total memory size is %zu", total_mem_size); + + graph_mem_max_size_ = floor(total_mem_size * kGraphMemoryManagerMallocRatio); + var_mem_max_size_ = floor(total_mem_size * kVarMemoryManagerMallocRatio); + auto it = options.find(GRAPH_MEMORY_MAX_SIZE); - if (it == options.end()) { - graph_mem_max_size_ = kGraphMemoryManagerMallocMaxSize; - } else { + if (it != options.end()) { string graph_memory_manager_malloc_max_size = it->second; ge::Status ret = ParseMemoryMallocSize(graph_memory_manager_malloc_max_size, graph_mem_max_size_); if (ret != SUCCESS) { GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_); return ge::GE_GRAPH_OPTIONS_INVALID; } - GELOGI("The max size for graph mem is set to %zu", graph_mem_max_size_); } it = options.find(VARIABLE_MEMORY_MAX_SIZE); - if (it == options.end()) { - var_mem_max_size_ = kMemoryVarManagerMallocSize; - } else { + if (it != options.end()) { string memory_var_manager_malloc_size = it->second; ge::Status ret = ParseMemoryMallocSize(memory_var_manager_malloc_size, var_mem_max_size_); if (ret != SUCCESS) { @@ -793,6 +824,8 @@ Status VarManager::SetMemoryMallocSize(const map &options) { } } + GEEVENT("The graph_mem_max_size is %zu and the var_mem_max_size is %zu", graph_mem_max_size_, var_mem_max_size_); + var_mem_logic_base_ = graph_mem_max_size_ + kGraphMemoryBuffer; if (var_mem_logic_base_ > kMaxMemorySize) { REPORT_INNER_ERROR("E19999", "var_login_base:%zu can not exeed limit:%zu, session_id:%lu, check invalid", diff --git a/ge/graph/manager/graph_var_manager.h b/ge/graph/manager/graph_var_manager.h index f2b68e79..a1b45959 100755 --- a/ge/graph/manager/graph_var_manager.h +++ b/ge/graph/manager/graph_var_manager.h @@ -43,6 +43,8 @@ const size_t kMaxMemorySize = 256UL * 1024UL * 1024UL * 1024UL; const char kEnvGeuseStaticMemory[] = "GE_USE_STATIC_MEMORY"; const uint64_t kSessionMemAlignSize = 512; const size_t kSessionMemAlignUnit = 2; +const double kGraphMemoryManagerMallocRatio = 26.0 / 32.0; +const double kVarMemoryManagerMallocRatio = 5.0 / 32.0; enum MemStatus { NORMAL = 0, @@ -301,6 +303,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY VarManager { mutable std::recursive_mutex mutex_; Status ParseMemoryMallocSize(std::string &memory_size, size_t &my_size); + Status GetTotalMemorySize(size_t &total_mem_size); }; class VarManagerPool { diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 0c9e2c27..a8f7e59a 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -193,6 +193,12 @@ rtError_t rtMemGetInfo(size_t *free, size_t *total) { return RT_ERROR_NONE; } +rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size_t *total) { + *free = 512UL * 1024UL * 1024UL; + *total = 1024UL * 1024UL * 1024UL; + return RT_ERROR_NONE; +} + rtError_t rtMemAllocManaged(void **ptr, uint64_t size, uint32_t flag) { return RT_ERROR_NONE; } rtError_t rtMemFreeManaged(void *ptr) { return RT_ERROR_NONE; } From fec3176277626c8e80c69a2d76ed36f08cd7ae43 Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 14 Jul 2021 16:31:09 +0800 Subject: [PATCH 2/4] assign graph memory max size and variable memory max size adaptively --- tests/ut/ge/CMakeLists.txt | 1 + .../manager/graph_var_manager_unittest.cc | 63 +++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 tests/ut/ge/graph/manager/graph_var_manager_unittest.cc diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 856d9d43..773a2686 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -690,6 +690,7 @@ set(MULTI_PARTS_TEST_FILES "graph/manager/run_graph_unittest.cc" "graph/partition/dynamic_shape_partition_unittest.cc" "graph/manager/graph_manager_unittest.cc" + "graph/manager/graph_var_manager_unittest.cc" "graph/optimize/mem_rw_conflict_optimize_unittest.cc" "graph/optimize/graph_optimize_unittest.cc" "session/omg_omg_unittest.cc" diff --git a/tests/ut/ge/graph/manager/graph_var_manager_unittest.cc b/tests/ut/ge/graph/manager/graph_var_manager_unittest.cc new file mode 100644 index 00000000..3eda6c47 --- /dev/null +++ b/tests/ut/ge/graph/manager/graph_var_manager_unittest.cc @@ -0,0 +1,63 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#define protected public +#define private public +#include "graph/manager/graph_var_manager.h" +#include "graph/ge_context.h" +#undef protected +#undef private + +namespace ge { +class UtestGraphVarManagerTest : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UtestGraphVarManagerTest, test_get_total_memory_size) { + size_t total_mem_size = 0; + Status ret = VarManager::Instance(0)->GetTotalMemorySize(total_mem_size); + EXPECT_EQ(total_mem_size, 1024UL * 1024UL * 1024UL); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_no_related_option) { + const map options{}; + Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options); + EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (26.0f / 32.0f))); + EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (5.0f / 32.0f))); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_with_user_specify_graph_mem_max_size) { + const map options{{"ge.graphMemoryMaxSize", 1024UL * 1024UL * 1024UL / 2}}; + Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options); + EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL / 2)); + EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (5.0f / 32.0f))); + EXPECT_EQ(ret, SUCCESS); +} + +TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_with_user_specify_var_mem_max_size) { + const map options{{"ge.variableMemoryMaxSize", 1024UL * 1024UL * 1024UL / 2}}; + Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options); + EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (26.0f / 32.0f))); + EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL / 2)); + EXPECT_EQ(ret, SUCCESS); +} +} // namespace ge From 3fd107039848df8f7e6a403beee4cf73ca30ee1c Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 14 Jul 2021 16:59:49 +0800 Subject: [PATCH 3/4] assign graph memory max size and variable memory max size adaptively --- tests/ut/ge/graph/manager/graph_var_manager_unittest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ut/ge/graph/manager/graph_var_manager_unittest.cc b/tests/ut/ge/graph/manager/graph_var_manager_unittest.cc index 3eda6c47..c20e786d 100644 --- a/tests/ut/ge/graph/manager/graph_var_manager_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_var_manager_unittest.cc @@ -46,7 +46,7 @@ TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_no_related_option) } TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_with_user_specify_graph_mem_max_size) { - const map options{{"ge.graphMemoryMaxSize", 1024UL * 1024UL * 1024UL / 2}}; + const map options{{"ge.graphMemoryMaxSize", "536870912"}}; Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options); EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL / 2)); EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (5.0f / 32.0f))); @@ -54,7 +54,7 @@ TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_with_user_specify_g } TEST_F(UtestGraphVarManagerTest, test_set_memory_malloc_size_with_user_specify_var_mem_max_size) { - const map options{{"ge.variableMemoryMaxSize", 1024UL * 1024UL * 1024UL / 2}}; + const map options{{"ge.variableMemoryMaxSize", "536870912"}}; Status ret = VarManager::Instance(0)->SetMemoryMallocSize(options); EXPECT_EQ(VarManager::Instance(0)->graph_mem_max_size_, floor(1024UL * 1024UL * 1024UL * (26.0f / 32.0f))); EXPECT_EQ(VarManager::Instance(0)->var_mem_max_size_, floor(1024UL * 1024UL * 1024UL / 2)); From 45be54175221c42c1bcb7e46b230fc79c8aa2d02 Mon Sep 17 00:00:00 2001 From: lichun Date: Wed, 14 Jul 2021 19:59:55 +0800 Subject: [PATCH 4/4] assign graph memory max size and variable memory max size adaptively --- ge/graph/manager/graph_var_manager.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/ge/graph/manager/graph_var_manager.cc b/ge/graph/manager/graph_var_manager.cc index 5138a0f5..d0669254 100755 --- a/ge/graph/manager/graph_var_manager.cc +++ b/ge/graph/manager/graph_var_manager.cc @@ -795,18 +795,15 @@ Status VarManager::GetTotalMemorySize(size_t &total_mem_size) { Status VarManager::SetMemoryMallocSize(const map &options) { size_t total_mem_size = 0; - Status ret = VarManager::GetTotalMemorySize(total_mem_size); - if (ret != SUCCESS) { - return ret; - } + GE_CHK_STATUS_RET_NOLOG(VarManager::GetTotalMemorySize(total_mem_size)); GEEVENT("Total memory size is %zu", total_mem_size); graph_mem_max_size_ = floor(total_mem_size * kGraphMemoryManagerMallocRatio); var_mem_max_size_ = floor(total_mem_size * kVarMemoryManagerMallocRatio); - auto it = options.find(GRAPH_MEMORY_MAX_SIZE); - if (it != options.end()) { - string graph_memory_manager_malloc_max_size = it->second; + auto it1 = options.find(GRAPH_MEMORY_MAX_SIZE); + if (it1 != options.end()) { + string graph_memory_manager_malloc_max_size = it1->second; ge::Status ret = ParseMemoryMallocSize(graph_memory_manager_malloc_max_size, graph_mem_max_size_); if (ret != SUCCESS) { GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_); @@ -814,9 +811,9 @@ Status VarManager::SetMemoryMallocSize(const map &options) { } } - it = options.find(VARIABLE_MEMORY_MAX_SIZE); - if (it != options.end()) { - string memory_var_manager_malloc_size = it->second; + auto it2 = options.find(VARIABLE_MEMORY_MAX_SIZE); + if (it2 != options.end()) { + string memory_var_manager_malloc_size = it2->second; ge::Status ret = ParseMemoryMallocSize(memory_var_manager_malloc_size, var_mem_max_size_); if (ret != SUCCESS) { GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "[Call][ParseMemoryMallocSize] failed, session id:%lu.", session_id_);