Browse Source

!1735 shared memory optimize for wide&deep

From: @isaacxr
Reviewed-by: @xchu42,@wqtshg
Signed-off-by: @wqtshg
tags/v1.3.0
mindspore-ci-bot Gitee 3 years ago
parent
commit
778ff7ac9f
8 changed files with 160 additions and 22 deletions
  1. +2
    -0
      ge/graph/load/model_manager/davinci_model.cc
  2. +7
    -8
      ge/graph/manager/host_mem_manager.cc
  3. +1
    -1
      ge/graph/manager/host_mem_manager.h
  4. +9
    -2
      ge/graph/manager/memory_api.cc
  5. +15
    -11
      ge/hybrid/model/hybrid_model_builder.cc
  6. +2
    -0
      tests/ut/ge/CMakeLists.txt
  7. +68
    -0
      tests/ut/ge/graph/manager/memory_api_unittest.cc
  8. +56
    -0
      tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc

+ 2
- 0
ge/graph/load/model_manager/davinci_model.cc View File

@@ -3727,6 +3727,8 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {
binary.magic = RT_DEV_BINARY_MAGIC_ELF;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE;
} else {
REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid",
TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(),


+ 7
- 8
ge/graph/manager/host_mem_manager.cc View File

@@ -107,16 +107,15 @@ Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) {
return SUCCESS;
}

Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size) {
bool HostMemManager::QueryVarMemInfo(const string &op_name, SharedMemInfo &mem_info) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (var_memory_base_map_.find(op_name) == var_memory_base_map_.end()) {
REPORT_INNER_ERROR("E19999", "MemInfo.op_name:%s can't find in var_memory_base_map_", op_name.c_str());
GELOGE(INTERNAL_ERROR, "[Check][Param] Find host base base_addr failed, node name:%s!", op_name.c_str());
return INTERNAL_ERROR;
auto it = var_memory_base_map_.find(op_name);
if (it == var_memory_base_map_.end()) {
GELOGW("Host memory for node [%s] not found.", op_name.c_str());
return false;
}
base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_memory_base_map_[op_name].device_address));
data_size = var_memory_base_map_[op_name].mem_size;
return SUCCESS;
mem_info = it->second;
return true;
}

string HostMemManager::OpNameToShmName(const string &op_name) {


+ 1
- 1
ge/graph/manager/host_mem_manager.h View File

@@ -66,7 +66,7 @@ class HostMemManager {
Status Initialize();
void Finalize() noexcept;
Status MallocSharedMemory(SharedMemInfo &mem_nfo);
Status QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size);
bool QueryVarMemInfo(const string &op_name, SharedMemInfo &mem_info);

private:
static string OpNameToShmName(const string &op_name);


+ 9
- 2
ge/graph/manager/memory_api.cc View File

@@ -110,7 +110,14 @@ Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uin
}

Status GetVarBaseAddrAndSize(const string &var_name, uint64_t &base_addr, uint64_t &var_size) {
GELOGD("GetVarBaseAddrAndSize in");
return HostMemManager::Instance().QueryVarMemInfo(var_name, base_addr, var_size);
GELOGD("GetVarBaseAddrAndSize in, var name:[%s]", var_name.c_str());
SharedMemInfo mem_info;
if (!HostMemManager::Instance().QueryVarMemInfo(var_name, mem_info)) {
GELOGE(FAILED, "Get addr and size failed, name:[%s]", var_name.c_str());
return FAILED;
}
base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(mem_info.host_aligned_ptr->Get()));
var_size = mem_info.mem_size;
return SUCCESS;
}
} // namespace ge

+ 15
- 11
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -1074,21 +1074,25 @@ Status HybridModelBuilder::InitVariableTensors() {
GELOGE(INTERNAL_ERROR, "[Calculate][TensorMemSize] failed, node name:%s", it.first.c_str());
return INTERNAL_ERROR;
}
SharedMemInfo mem_info(it.first, tensor_size);
if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) {
GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][SharedMemory] failed, Host variable [%s].", it.first.c_str());
return GE_GRAPH_MALLOC_FAILED;
}
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
tensor_size) == nullptr) {
GELOGE(MEMALLOC_FAILED, "[Malloc][HostMem] for an existed GeTensor failed, Host variable [%s].",
it.first.c_str());

// Host variable will be assigned to allocated shared memory first.
SharedMemInfo mem_info;
void *mem_addr = nullptr;
if (HostMemManager::Instance().QueryVarMemInfo(it.first, mem_info)) {
mem_addr = const_cast<void *>(MemManager::Instance().HostMemInstance(RT_MEMORY_HBM)
.Malloc(mem_info.host_aligned_ptr, tensor_size));
} else {
mem_addr = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(tensor_size);
}

if (mem_addr == nullptr) {
REPORT_INNER_ERROR("E19999", "[Malloc][HostMem] for variable [%s] failed.", it.first.c_str());
GELOGE(MEMALLOC_FAILED, "[Malloc][HostMem] for variable [%s] failed.", it.first.c_str());
return MEMALLOC_FAILED;
}
GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size);

std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(),
tensor_size));
std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_addr, tensor_size));
GE_CHECK_NOTNULL(tensor);
hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor));
}


+ 2
- 0
tests/ut/ge/CMakeLists.txt View File

@@ -157,6 +157,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc"
"${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc"
"${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc"
"${GE_CODE_DIR}/ge/graph/manager/memory_api.cc"
"${GE_CODE_DIR}/ge/session/inner_session.cc"
"${GE_CODE_DIR}/ge/graph/manager/util/rt_context_util.cc"
"${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc"
@@ -799,6 +800,7 @@ set(MULTI_PARTS_TEST_FILES
"graph/manager/hcom_util_unittest.cc"
"graph/manager/graph_caching_allocator_unittest.cc"
"graph/manager/host_mem_allocator_unittest.cc"
"graph/manager/memory_api_unittest.cc"
"graph/manager/session_scope_mem_allocator_unittest.cc"
"graph/manager/run_graph_unittest.cc"
"graph/partition/dynamic_shape_partition_unittest.cc"


+ 68
- 0
tests/ut/ge/graph/manager/memory_api_unittest.cc View File

@@ -0,0 +1,68 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <memory>
#define protected public
#define private public
#include "graph/manager/host_mem_manager.h"
#include "inc/framework/memory/memory_api.h"
#undef protected
#undef private
#include "metadef/inc/graph/aligned_ptr.h"
using namespace std;
using namespace testing;
using namespace ge;
class UtestMemoryApiTest : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
TEST_F(UtestMemoryApiTest, query_mem_info_success) {
string var_name = "host_params";
SharedMemInfo info;
uint8_t tmp(0);
info.device_address = &tmp;
std::shared_ptr<AlignedPtr> aligned_ptr = std::make_shared<AlignedPtr>(100, 16);
info.host_aligned_ptr = aligned_ptr;
info.fd=0;
info.mem_size = 100;
info.op_name = var_name;
HostMemManager::Instance().var_memory_base_map_[var_name] = info;
uint64_t base_addr;
uint64_t var_size;
Status ret = GetVarBaseAddrAndSize(var_name, base_addr, var_size);
EXPECT_EQ(ret, SUCCESS);
EXPECT_EQ(var_size, 100);
HostMemManager::Instance().var_memory_base_map_.clear();
}
TEST_F(UtestMemoryApiTest, query_mem_info_failed) {
string var_name = "host_params";
uint64_t base_addr;
uint64_t var_size;
Status ret = GetVarBaseAddrAndSize(var_name, base_addr, var_size);
EXPECT_NE(ret, SUCCESS);
}

+ 56
- 0
tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc View File

@@ -23,6 +23,7 @@
#define protected public
#include "hybrid/model/hybrid_model_builder.h"
#include "hybrid/node_executor/node_executor.h"
#include "graph/manager/host_mem_manager.h"

#include "graph/utils/tensor_utils.h"
#include "graph/utils/graph_utils.h"
@@ -263,4 +264,59 @@ TEST_F(UtestHybridModelBuilder, init_constant_op_host_) {
EXPECT_EQ(hybrid_model_builder.InitConstantOps(), SUCCESS);
EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 2);
}

TEST_F(UtestHybridModelBuilder, init_host_var_with_host_mem) {
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
HybridModel hybrid_model(ge_root_model);
HybridModelBuilder hybrid_model_builder(hybrid_model);

OpDescPtr op_desc = std::make_shared<OpDesc>("host_params", VARIABLE);
GeTensorDesc tensor_desc(GeShape(),FORMAT_NHWC,DT_FLOAT);
TensorUtils::SetSize(tensor_desc, 512);
op_desc->AddOutputDesc(tensor_desc);
auto host_var = graph->AddNode(op_desc);

hybrid_model.host_variable_nodes_.emplace("host_params", host_var);
std::map<std::string, string> options;
options["ge.exec.placement"] = "HOST";
GetThreadLocalContext().SetGraphOption(options);

EXPECT_EQ(hybrid_model_builder.InitVariableTensors(), SUCCESS);
EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 1);
}

TEST_F(UtestHybridModelBuilder, init_host_var_with_host_shared_mem) {
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test");
GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph);
HybridModel hybrid_model(ge_root_model);
HybridModelBuilder hybrid_model_builder(hybrid_model);

OpDescPtr op_desc = std::make_shared<OpDesc>("host_params", VARIABLE);
GeTensorDesc tensor_desc(GeShape(),FORMAT_NHWC,DT_FLOAT);
TensorUtils::SetSize(tensor_desc, 512);
op_desc->AddOutputDesc(tensor_desc);
auto host_var = graph->AddNode(op_desc);

hybrid_model.host_variable_nodes_.emplace("host_params", host_var);
std::map<std::string, string> options;
options["ge.exec.placement"] = "HOST";
GetThreadLocalContext().SetGraphOption(options);

SharedMemInfo info;
uint8_t tmp(0);
info.device_address = &tmp;
std::shared_ptr<AlignedPtr> aligned_ptr = std::make_shared<AlignedPtr>(512, 16);
info.host_aligned_ptr = aligned_ptr;
info.fd=0;
info.mem_size = 100;
info.op_name = "host_params";
HostMemManager::Instance().var_memory_base_map_["host_params"] = info;



EXPECT_EQ(hybrid_model_builder.InitVariableTensors(), SUCCESS);
EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 1);
HostMemManager::Instance().var_memory_base_map_.clear();
}
} // namespace ge

Loading…
Cancel
Save