diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index f8b61216..782f6866 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -3727,6 +3727,8 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { binary.magic = RT_DEV_BINARY_MAGIC_ELF; } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; + } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") { + binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE; } else { REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), diff --git a/ge/graph/manager/host_mem_manager.cc b/ge/graph/manager/host_mem_manager.cc index 63265bf4..5e65bac0 100644 --- a/ge/graph/manager/host_mem_manager.cc +++ b/ge/graph/manager/host_mem_manager.cc @@ -107,16 +107,15 @@ Status HostMemManager::MallocSharedMemory(SharedMemInfo &mem_info) { return SUCCESS; } -Status HostMemManager::QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size) { +bool HostMemManager::QueryVarMemInfo(const string &op_name, SharedMemInfo &mem_info) { std::lock_guard lock(mutex_); - if (var_memory_base_map_.find(op_name) == var_memory_base_map_.end()) { - REPORT_INNER_ERROR("E19999", "MemInfo.op_name:%s can't find in var_memory_base_map_", op_name.c_str()); - GELOGE(INTERNAL_ERROR, "[Check][Param] Find host base base_addr failed, node name:%s!", op_name.c_str()); - return INTERNAL_ERROR; + auto it = var_memory_base_map_.find(op_name); + if (it == var_memory_base_map_.end()) { + GELOGW("Host memory for node [%s] not found.", op_name.c_str()); + return false; } - base_addr = static_cast(reinterpret_cast(var_memory_base_map_[op_name].device_address)); - data_size = var_memory_base_map_[op_name].mem_size; - return SUCCESS; + mem_info = it->second; + return true; } string HostMemManager::OpNameToShmName(const string &op_name) { diff --git a/ge/graph/manager/host_mem_manager.h b/ge/graph/manager/host_mem_manager.h index be3237c3..84d5aebe 100644 --- a/ge/graph/manager/host_mem_manager.h +++ b/ge/graph/manager/host_mem_manager.h @@ -66,7 +66,7 @@ class HostMemManager { Status Initialize(); void Finalize() noexcept; Status MallocSharedMemory(SharedMemInfo &mem_nfo); - Status QueryVarMemInfo(const string &op_name, uint64_t &base_addr, uint64_t &data_size); + bool QueryVarMemInfo(const string &op_name, SharedMemInfo &mem_info); private: static string OpNameToShmName(const string &op_name); diff --git a/ge/graph/manager/memory_api.cc b/ge/graph/manager/memory_api.cc index 8ea0594b..777e2d59 100644 --- a/ge/graph/manager/memory_api.cc +++ b/ge/graph/manager/memory_api.cc @@ -110,7 +110,14 @@ Status MallocSharedMemory(const TensorInfo &tensor_info, uint64_t &dev_addr, uin } Status GetVarBaseAddrAndSize(const string &var_name, uint64_t &base_addr, uint64_t &var_size) { - GELOGD("GetVarBaseAddrAndSize in"); - return HostMemManager::Instance().QueryVarMemInfo(var_name, base_addr, var_size); + GELOGD("GetVarBaseAddrAndSize in, var name:[%s]", var_name.c_str()); + SharedMemInfo mem_info; + if (!HostMemManager::Instance().QueryVarMemInfo(var_name, mem_info)) { + GELOGE(FAILED, "Get addr and size failed, name:[%s]", var_name.c_str()); + return FAILED; + } + base_addr = static_cast(reinterpret_cast(mem_info.host_aligned_ptr->Get())); + var_size = mem_info.mem_size; + return SUCCESS; } } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index b00b8ec8..906dddae 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1074,21 +1074,25 @@ Status HybridModelBuilder::InitVariableTensors() { GELOGE(INTERNAL_ERROR, "[Calculate][TensorMemSize] failed, node name:%s", it.first.c_str()); return INTERNAL_ERROR; } - SharedMemInfo mem_info(it.first, tensor_size); - if (HostMemManager::Instance().MallocSharedMemory(mem_info) != SUCCESS) { - GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][SharedMemory] failed, Host variable [%s].", it.first.c_str()); - return GE_GRAPH_MALLOC_FAILED; - } - if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, - tensor_size) == nullptr) { - GELOGE(MEMALLOC_FAILED, "[Malloc][HostMem] for an existed GeTensor failed, Host variable [%s].", - it.first.c_str()); + + // Host variable will be assigned to allocated shared memory first. + SharedMemInfo mem_info; + void *mem_addr = nullptr; + if (HostMemManager::Instance().QueryVarMemInfo(it.first, mem_info)) { + mem_addr = const_cast(MemManager::Instance().HostMemInstance(RT_MEMORY_HBM) + .Malloc(mem_info.host_aligned_ptr, tensor_size)); + } else { + mem_addr = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(tensor_size); + } + + if (mem_addr == nullptr) { + REPORT_INNER_ERROR("E19999", "[Malloc][HostMem] for variable [%s] failed.", it.first.c_str()); + GELOGE(MEMALLOC_FAILED, "[Malloc][HostMem] for variable [%s] failed.", it.first.c_str()); return MEMALLOC_FAILED; } GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size); - std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), - tensor_size)); + std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_addr, tensor_size)); GE_CHECK_NOTNULL(tensor); hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); } diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 9ca694d6..5bff0f98 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -157,6 +157,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/load/model_manager/model_manager.cc" "${GE_CODE_DIR}/ge/common/profiling/profiling_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/host_mem_manager.cc" + "${GE_CODE_DIR}/ge/graph/manager/memory_api.cc" "${GE_CODE_DIR}/ge/session/inner_session.cc" "${GE_CODE_DIR}/ge/graph/manager/util/rt_context_util.cc" "${GE_CODE_DIR}/ge/graph/execute/graph_execute.cc" @@ -799,6 +800,7 @@ set(MULTI_PARTS_TEST_FILES "graph/manager/hcom_util_unittest.cc" "graph/manager/graph_caching_allocator_unittest.cc" "graph/manager/host_mem_allocator_unittest.cc" + "graph/manager/memory_api_unittest.cc" "graph/manager/session_scope_mem_allocator_unittest.cc" "graph/manager/run_graph_unittest.cc" "graph/partition/dynamic_shape_partition_unittest.cc" diff --git a/tests/ut/ge/graph/manager/memory_api_unittest.cc b/tests/ut/ge/graph/manager/memory_api_unittest.cc new file mode 100644 index 00000000..0799adb9 --- /dev/null +++ b/tests/ut/ge/graph/manager/memory_api_unittest.cc @@ -0,0 +1,68 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + + + +#define protected public +#define private public +#include "graph/manager/host_mem_manager.h" +#include "inc/framework/memory/memory_api.h" +#undef protected +#undef private +#include "metadef/inc/graph/aligned_ptr.h" + +using namespace std; +using namespace testing; +using namespace ge; + +class UtestMemoryApiTest : public testing::Test { + protected: + void SetUp() {} + + void TearDown() {} +}; + +TEST_F(UtestMemoryApiTest, query_mem_info_success) { +string var_name = "host_params"; +SharedMemInfo info; +uint8_t tmp(0); +info.device_address = &tmp; + +std::shared_ptr aligned_ptr = std::make_shared(100, 16); + +info.host_aligned_ptr = aligned_ptr; +info.fd=0; +info.mem_size = 100; +info.op_name = var_name; +HostMemManager::Instance().var_memory_base_map_[var_name] = info; +uint64_t base_addr; +uint64_t var_size; +Status ret = GetVarBaseAddrAndSize(var_name, base_addr, var_size); +EXPECT_EQ(ret, SUCCESS); +EXPECT_EQ(var_size, 100); +HostMemManager::Instance().var_memory_base_map_.clear(); +} + +TEST_F(UtestMemoryApiTest, query_mem_info_failed) { +string var_name = "host_params"; +uint64_t base_addr; +uint64_t var_size; +Status ret = GetVarBaseAddrAndSize(var_name, base_addr, var_size); +EXPECT_NE(ret, SUCCESS); +} diff --git a/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc index 1037c764..85264082 100644 --- a/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc +++ b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc @@ -23,6 +23,7 @@ #define protected public #include "hybrid/model/hybrid_model_builder.h" #include "hybrid/node_executor/node_executor.h" +#include "graph/manager/host_mem_manager.h" #include "graph/utils/tensor_utils.h" #include "graph/utils/graph_utils.h" @@ -263,4 +264,59 @@ TEST_F(UtestHybridModelBuilder, init_constant_op_host_) { EXPECT_EQ(hybrid_model_builder.InitConstantOps(), SUCCESS); EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 2); } + +TEST_F(UtestHybridModelBuilder, init_host_var_with_host_mem) { +ComputeGraphPtr graph = std::make_shared("test"); +GeRootModelPtr ge_root_model = make_shared(graph); +HybridModel hybrid_model(ge_root_model); +HybridModelBuilder hybrid_model_builder(hybrid_model); + +OpDescPtr op_desc = std::make_shared("host_params", VARIABLE); +GeTensorDesc tensor_desc(GeShape(),FORMAT_NHWC,DT_FLOAT); +TensorUtils::SetSize(tensor_desc, 512); +op_desc->AddOutputDesc(tensor_desc); +auto host_var = graph->AddNode(op_desc); + +hybrid_model.host_variable_nodes_.emplace("host_params", host_var); +std::map options; +options["ge.exec.placement"] = "HOST"; +GetThreadLocalContext().SetGraphOption(options); + +EXPECT_EQ(hybrid_model_builder.InitVariableTensors(), SUCCESS); +EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 1); +} + +TEST_F(UtestHybridModelBuilder, init_host_var_with_host_shared_mem) { +ComputeGraphPtr graph = std::make_shared("test"); +GeRootModelPtr ge_root_model = make_shared(graph); +HybridModel hybrid_model(ge_root_model); +HybridModelBuilder hybrid_model_builder(hybrid_model); + +OpDescPtr op_desc = std::make_shared("host_params", VARIABLE); +GeTensorDesc tensor_desc(GeShape(),FORMAT_NHWC,DT_FLOAT); +TensorUtils::SetSize(tensor_desc, 512); +op_desc->AddOutputDesc(tensor_desc); +auto host_var = graph->AddNode(op_desc); + +hybrid_model.host_variable_nodes_.emplace("host_params", host_var); +std::map options; +options["ge.exec.placement"] = "HOST"; +GetThreadLocalContext().SetGraphOption(options); + +SharedMemInfo info; +uint8_t tmp(0); +info.device_address = &tmp; +std::shared_ptr aligned_ptr = std::make_shared(512, 16); +info.host_aligned_ptr = aligned_ptr; +info.fd=0; +info.mem_size = 100; +info.op_name = "host_params"; +HostMemManager::Instance().var_memory_base_map_["host_params"] = info; + + + +EXPECT_EQ(hybrid_model_builder.InitVariableTensors(), SUCCESS); +EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 1); +HostMemManager::Instance().var_memory_base_map_.clear(); +} } // namespace ge