| @@ -3725,6 +3725,8 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF; | binary.magic = RT_DEV_BINARY_MAGIC_ELF; | ||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { | ||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; | ||||
| } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") { | |||||
| binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE; | |||||
| } else { | } else { | ||||
| REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", | REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", | ||||
| TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), | TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), | ||||
| @@ -107,16 +107,15 @@ Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) { | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size) { | |||||
| bool HostMemManager::QueryVarMemInfo(const string &op_name, SharedMemInfo &mem_info) { | |||||
| std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
| if (var_memory_base_map_.find(op_name) == var_memory_base_map_.end()) { | |||||
| REPORT_INNER_ERROR("E19999", "MemInfo.op_name:%s can't find in var_memory_base_map_", op_name.c_str()); | |||||
| GELOGE(INTERNAL_ERROR, "[Check][Param] Find host base base_addr failed, node name:%s!", op_name.c_str()); | |||||
| return INTERNAL_ERROR; | |||||
| auto it = var_memory_base_map_.find(op_name); | |||||
| if (it == var_memory_base_map_.end()) { | |||||
| GELOGW("Host memory for node [%s] not found.", op_name.c_str()); | |||||
| return false; | |||||
| } | } | ||||
| base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_memory_base_map_[op_name].device_address)); | |||||
| data_size = var_memory_base_map_[op_name].mem_size; | |||||
| return SUCCESS; | |||||
| mem_info = it->second; | |||||
| return true; | |||||
| } | } | ||||
| string HostMemManager::OpNameToShmName(const string &op_name) { | string HostMemManager::OpNameToShmName(const string &op_name) { | ||||
| @@ -66,7 +66,7 @@ class HostMemManager { | |||||
| Status Initialize(); | Status Initialize(); | ||||
| void Finalize() noexcept; | void Finalize() noexcept; | ||||
| Status MallocSharedMemory(SharedMemInfo &mem_nfo); | Status MallocSharedMemory(SharedMemInfo &mem_nfo); | ||||
| Status QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size); | |||||
| bool QueryVarMemInfo(const string &op_name, SharedMemInfo &mem_info); | |||||
| private: | private: | ||||
| static string OpNameToShmName(const string &op_name); | static string OpNameToShmName(const string &op_name); | ||||
| @@ -110,7 +110,14 @@ Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uin | |||||
| } | } | ||||
| Status GetVarBaseAddrAndSize(const string &var_name, uint64_t &base_addr, uint64_t &var_size) { | Status GetVarBaseAddrAndSize(const string &var_name, uint64_t &base_addr, uint64_t &var_size) { | ||||
| GELOGD("GetVarBaseAddrAndSize in"); | |||||
| return HostMemManager::Instance().QueryVarMemInfo(var_name, base_addr, var_size); | |||||
| GELOGD("GetVarBaseAddrAndSize in, var name:[%s]", var_name.c_str()); | |||||
| SharedMemInfo mem_info; | |||||
| if (!HostMemManager::Instance().QueryVarMemInfo(var_name, mem_info)) { | |||||
| GELOGE(FAILED, "Get addr and size failed, name:[%s]", var_name.c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(mem_info.host_aligned_ptr->Get())); | |||||
| var_size = mem_info.mem_size; | |||||
| return SUCCESS; | |||||
| } | } | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -1074,21 +1074,25 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
| GELOGE(INTERNAL_ERROR, "[Calculate][TensorMemSize] failed, node name:%s", it.first.c_str()); | GELOGE(INTERNAL_ERROR, "[Calculate][TensorMemSize] failed, node name:%s", it.first.c_str()); | ||||
| return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
| } | } | ||||
| SharedMemInfo mem_info(it.first, tensor_size); | |||||
| if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) { | |||||
| GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][SharedMemory] failed, Host variable [%s].", it.first.c_str()); | |||||
| return GE_GRAPH_MALLOC_FAILED; | |||||
| } | |||||
| if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, | |||||
| tensor_size) == nullptr) { | |||||
| GELOGE(MEMALLOC_FAILED, "[Malloc][HostMem] for an existed GeTensor failed, Host variable [%s].", | |||||
| it.first.c_str()); | |||||
| // Host variable will be assigned to allocated shared memory first. | |||||
| SharedMemInfo mem_info; | |||||
| void *mem_addr = nullptr; | |||||
| if (HostMemManager::Instance().QueryVarMemInfo(it.first, mem_info)) { | |||||
| mem_addr = const_cast<void *>(MemManager::Instance().HostMemInstance(RT_MEMORY_HBM) | |||||
| .Malloc(mem_info.host_aligned_ptr, tensor_size)); | |||||
| } else { | |||||
| mem_addr = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(tensor_size); | |||||
| } | |||||
| if (mem_addr == nullptr) { | |||||
| REPORT_INNER_ERROR("E19999", "[Malloc][HostMem] for variable [%s] failed.", it.first.c_str()); | |||||
| GELOGE(MEMALLOC_FAILED, "[Malloc][HostMem] for variable [%s] failed.", it.first.c_str()); | |||||
| return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
| } | } | ||||
| GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size); | GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size); | ||||
| std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), | |||||
| tensor_size)); | |||||
| std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_addr, tensor_size)); | |||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); | hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); | ||||
| } | } | ||||
| @@ -157,6 +157,7 @@ set(COMMON_SRC_FILES | |||||
| "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" | "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" | "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" | |||||
| "${GE_CODE_DIR}/ge/session/inner_session.cc" | "${GE_CODE_DIR}/ge/session/inner_session.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/manager/util/rt_context_util.cc" | "${GE_CODE_DIR}/ge/graph/manager/util/rt_context_util.cc" | ||||
| "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" | "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" | ||||
| @@ -798,6 +799,7 @@ set(MULTI_PARTS_TEST_FILES | |||||
| "graph/manager/hcom_util_unittest.cc" | "graph/manager/hcom_util_unittest.cc" | ||||
| "graph/manager/graph_caching_allocator_unittest.cc" | "graph/manager/graph_caching_allocator_unittest.cc" | ||||
| "graph/manager/host_mem_allocator_unittest.cc" | "graph/manager/host_mem_allocator_unittest.cc" | ||||
| "graph/manager/memory_api_unittest.cc" | |||||
| "graph/manager/session_scope_mem_allocator_unittest.cc" | "graph/manager/session_scope_mem_allocator_unittest.cc" | ||||
| "graph/manager/run_graph_unittest.cc" | "graph/manager/run_graph_unittest.cc" | ||||
| "graph/partition/dynamic_shape_partition_unittest.cc" | "graph/partition/dynamic_shape_partition_unittest.cc" | ||||
| @@ -0,0 +1,68 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include <gtest/gtest.h> | |||||
| #include <memory> | |||||
| #define protected public | |||||
| #define private public | |||||
| #include "graph/manager/host_mem_manager.h" | |||||
| #include "inc/framework/memory/memory_api.h" | |||||
| #undef protected | |||||
| #undef private | |||||
| #include "metadef/inc/graph/aligned_ptr.h" | |||||
| using namespace std; | |||||
| using namespace testing; | |||||
| using namespace ge; | |||||
| class UtestMemoryApiTest : public testing::Test { | |||||
| protected: | |||||
| void SetUp() {} | |||||
| void TearDown() {} | |||||
| }; | |||||
| TEST_F(UtestMemoryApiTest, query_mem_info_success) { | |||||
| string var_name = "host_params"; | |||||
| SharedMemInfo info; | |||||
| uint8_t tmp(0); | |||||
| info.device_address = &tmp; | |||||
| std::shared_ptr<AlignedPtr> aligned_ptr = std::make_shared<AlignedPtr>(100, 16); | |||||
| info.host_aligned_ptr = aligned_ptr; | |||||
| info.fd=0; | |||||
| info.mem_size = 100; | |||||
| info.op_name = var_name; | |||||
| HostMemManager::Instance().var_memory_base_map_[var_name] = info; | |||||
| uint64_t base_addr; | |||||
| uint64_t var_size; | |||||
| Status ret = GetVarBaseAddrAndSize(var_name, base_addr, var_size); | |||||
| EXPECT_EQ(ret, SUCCESS); | |||||
| EXPECT_EQ(var_size, 100); | |||||
| HostMemManager::Instance().var_memory_base_map_.clear(); | |||||
| } | |||||
| TEST_F(UtestMemoryApiTest, query_mem_info_failed) { | |||||
| string var_name = "host_params"; | |||||
| uint64_t base_addr; | |||||
| uint64_t var_size; | |||||
| Status ret = GetVarBaseAddrAndSize(var_name, base_addr, var_size); | |||||
| EXPECT_NE(ret, SUCCESS); | |||||
| } | |||||
| @@ -23,6 +23,7 @@ | |||||
| #define protected public | #define protected public | ||||
| #include "hybrid/model/hybrid_model_builder.h" | #include "hybrid/model/hybrid_model_builder.h" | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "graph/manager/host_mem_manager.h" | |||||
| #include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| @@ -260,4 +261,59 @@ TEST_F(UtestHybridModelBuilder, init_constant_op_host_) { | |||||
| EXPECT_EQ(hybrid_model_builder.InitConstantOps(), SUCCESS); | EXPECT_EQ(hybrid_model_builder.InitConstantOps(), SUCCESS); | ||||
| EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 2); | EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 2); | ||||
| } | } | ||||
| TEST_F(UtestHybridModelBuilder, init_host_var_with_host_mem) { | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
| GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||||
| HybridModel hybrid_model(ge_root_model); | |||||
| HybridModelBuilder hybrid_model_builder(hybrid_model); | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>("host_params", VARIABLE); | |||||
| GeTensorDesc tensor_desc(GeShape(),FORMAT_NHWC,DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor_desc, 512); | |||||
| op_desc->AddOutputDesc(tensor_desc); | |||||
| auto host_var = graph->AddNode(op_desc); | |||||
| hybrid_model.host_variable_nodes_.emplace("host_params", host_var); | |||||
| std::map<std::string, string> options; | |||||
| options["ge.exec.placement"] = "HOST"; | |||||
| GetThreadLocalContext().SetGraphOption(options); | |||||
| EXPECT_EQ(hybrid_model_builder.InitVariableTensors(), SUCCESS); | |||||
| EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 1); | |||||
| } | |||||
| TEST_F(UtestHybridModelBuilder, init_host_var_with_host_shared_mem) { | |||||
| ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test"); | |||||
| GeRootModelPtr ge_root_model = make_shared<GeRootModel>(graph); | |||||
| HybridModel hybrid_model(ge_root_model); | |||||
| HybridModelBuilder hybrid_model_builder(hybrid_model); | |||||
| OpDescPtr op_desc = std::make_shared<OpDesc>("host_params", VARIABLE); | |||||
| GeTensorDesc tensor_desc(GeShape(),FORMAT_NHWC,DT_FLOAT); | |||||
| TensorUtils::SetSize(tensor_desc, 512); | |||||
| op_desc->AddOutputDesc(tensor_desc); | |||||
| auto host_var = graph->AddNode(op_desc); | |||||
| hybrid_model.host_variable_nodes_.emplace("host_params", host_var); | |||||
| std::map<std::string, string> options; | |||||
| options["ge.exec.placement"] = "HOST"; | |||||
| GetThreadLocalContext().SetGraphOption(options); | |||||
| SharedMemInfo info; | |||||
| uint8_t tmp(0); | |||||
| info.device_address = &tmp; | |||||
| std::shared_ptr<AlignedPtr> aligned_ptr = std::make_shared<AlignedPtr>(512, 16); | |||||
| info.host_aligned_ptr = aligned_ptr; | |||||
| info.fd=0; | |||||
| info.mem_size = 100; | |||||
| info.op_name = "host_params"; | |||||
| HostMemManager::Instance().var_memory_base_map_["host_params"] = info; | |||||
| EXPECT_EQ(hybrid_model_builder.InitVariableTensors(), SUCCESS); | |||||
| EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 1); | |||||
| HostMemManager::Instance().var_memory_base_map_.clear(); | |||||
| } | |||||
| } // namespace ge | } // namespace ge | ||||