From: @chen_yemeng Reviewed-by: Signed-off-by:tags/v1.2.0
| @@ -125,6 +125,7 @@ set(TRAIN_SRC_LIST | |||||
| "graph/manager/graph_var_manager.cc" | "graph/manager/graph_var_manager.cc" | ||||
| "graph/manager/host_mem_manager.cc" | "graph/manager/host_mem_manager.cc" | ||||
| "graph/manager/rdma_pool_allocator.cc" | "graph/manager/rdma_pool_allocator.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc> | |||||
| "graph/manager/memory_api.cc" | "graph/manager/memory_api.cc" | ||||
| "graph/manager/model_manager/event_manager.cc" | "graph/manager/model_manager/event_manager.cc" | ||||
| "graph/manager/trans_var_data_utils.cc" | "graph/manager/trans_var_data_utils.cc" | ||||
| @@ -166,6 +167,7 @@ set(TRAIN_SRC_LIST | |||||
| "graph/passes/hccl_group_pass.cc" | "graph/passes/hccl_group_pass.cc" | ||||
| "graph/passes/enter_pass.cc" | "graph/passes/enter_pass.cc" | ||||
| "graph/passes/assign_pass.cc" | "graph/passes/assign_pass.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc> | |||||
| "graph/passes/flow_ctrl_pass.cc" | "graph/passes/flow_ctrl_pass.cc" | ||||
| "graph/passes/global_step_insert_pass.cc" | "graph/passes/global_step_insert_pass.cc" | ||||
| "host_kernels/transpose_kernel.cc" | "host_kernels/transpose_kernel.cc" | ||||
| @@ -401,6 +403,7 @@ set(INFER_SRC_LIST | |||||
| "graph/manager/graph_var_manager.cc" | "graph/manager/graph_var_manager.cc" | ||||
| "graph/manager/host_mem_manager.cc" | "graph/manager/host_mem_manager.cc" | ||||
| "graph/manager/rdma_pool_allocator.cc" | "graph/manager/rdma_pool_allocator.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc> | |||||
| "graph/manager/graph_mem_allocator.cc" | "graph/manager/graph_mem_allocator.cc" | ||||
| "graph/manager/graph_caching_allocator.cc" | "graph/manager/graph_caching_allocator.cc" | ||||
| "model/ge_model.cc" | "model/ge_model.cc" | ||||
| @@ -522,6 +525,7 @@ set(INFER_SRC_LIST | |||||
| "graph/passes/for_pass.cc" | "graph/passes/for_pass.cc" | ||||
| "graph/passes/enter_pass.cc" | "graph/passes/enter_pass.cc" | ||||
| "graph/passes/assign_pass.cc" | "graph/passes/assign_pass.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc> | |||||
| "graph/passes/addn_pass.cc" | "graph/passes/addn_pass.cc" | ||||
| "graph/passes/common_subexpression_elimination_pass.cc" | "graph/passes/common_subexpression_elimination_pass.cc" | ||||
| "graph/passes/remove_same_const_pass.cc" | "graph/passes/remove_same_const_pass.cc" | ||||
| @@ -620,6 +624,7 @@ target_compile_definitions(ge_runner PRIVATE | |||||
| FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
| DAVINCI_CLOUD | DAVINCI_CLOUD | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_runner PRIVATE | target_compile_options(ge_runner PRIVATE | ||||
| @@ -687,6 +692,7 @@ target_compile_definitions(ge_compiler PRIVATE | |||||
| FMK_HOST_INFER | FMK_HOST_INFER | ||||
| COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_compile_options(ge_compiler PRIVATE | target_compile_options(ge_compiler PRIVATE | ||||
| @@ -28,6 +28,7 @@ set(SRC_LIST | |||||
| "../graph/manager/trans_var_data_utils.cc" | "../graph/manager/trans_var_data_utils.cc" | ||||
| "../graph/manager/util/debug.cc" | "../graph/manager/util/debug.cc" | ||||
| "../graph/manager/rdma_pool_allocator.cc" | "../graph/manager/rdma_pool_allocator.cc" | ||||
| $<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:../graph/manager/host_mem_allocator.cc> | |||||
| "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | ||||
| "../model/ge_model.cc" | "../model/ge_model.cc" | ||||
| "../model/ge_root_model.cc" | "../model/ge_root_model.cc" | ||||
| @@ -174,6 +175,7 @@ target_compile_definitions(ge_executor PRIVATE | |||||
| $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | ||||
| $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | ||||
| LOG_CPP | LOG_CPP | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_executor PRIVATE | target_include_directories(ge_executor PRIVATE | ||||
| @@ -216,6 +218,7 @@ target_compile_definitions(ge_executor_shared PRIVATE | |||||
| PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
| DAVINCI_SUPPORT_PROFILING | DAVINCI_SUPPORT_PROFILING | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_include_directories(ge_executor_shared PRIVATE | target_include_directories(ge_executor_shared PRIVATE | ||||
| @@ -15,6 +15,7 @@ local_ge_executor_src_files := \ | |||||
| ../graph/manager/graph_manager_utils.cc \ | ../graph/manager/graph_manager_utils.cc \ | ||||
| ../graph/manager/graph_var_manager.cc \ | ../graph/manager/graph_var_manager.cc \ | ||||
| ../graph/manager/rdma_pool_allocator.cc \ | ../graph/manager/rdma_pool_allocator.cc \ | ||||
| ../graph/manager/host_mem_allocator.cc \ | |||||
| ../graph/manager/graph_mem_allocator.cc \ | ../graph/manager/graph_mem_allocator.cc \ | ||||
| ../graph/manager/graph_caching_allocator.cc \ | ../graph/manager/graph_caching_allocator.cc \ | ||||
| ../graph/manager/trans_var_data_utils.cc \ | ../graph/manager/trans_var_data_utils.cc \ | ||||
| @@ -64,6 +64,7 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \ | |||||
| graph/manager/graph_var_manager.cc \ | graph/manager/graph_var_manager.cc \ | ||||
| graph/manager/host_mem_manager.cc \ | graph/manager/host_mem_manager.cc \ | ||||
| graph/manager/rdma_pool_allocator.cc \ | graph/manager/rdma_pool_allocator.cc \ | ||||
| graph/manager/host_mem_allocator.cc \ | |||||
| graph/manager/graph_mem_allocator.cc \ | graph/manager/graph_mem_allocator.cc \ | ||||
| graph/manager/graph_caching_allocator.cc \ | graph/manager/graph_caching_allocator.cc \ | ||||
| @@ -196,6 +197,7 @@ OMG_HOST_SRC_FILES := \ | |||||
| graph/passes/for_pass.cc \ | graph/passes/for_pass.cc \ | ||||
| graph/passes/enter_pass.cc \ | graph/passes/enter_pass.cc \ | ||||
| graph/passes/assign_pass.cc \ | graph/passes/assign_pass.cc \ | ||||
| graph/passes/inplace_support_check_pass.cc \ | |||||
| graph/passes/addn_pass.cc \ | graph/passes/addn_pass.cc \ | ||||
| graph/passes/common_subexpression_elimination_pass.cc \ | graph/passes/common_subexpression_elimination_pass.cc \ | ||||
| graph/passes/transop_symmetry_elimination_pass.cc \ | graph/passes/transop_symmetry_elimination_pass.cc \ | ||||
| @@ -26,6 +26,31 @@ | |||||
| #include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
| namespace { | namespace { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | |||||
| case (DTYPE): { \ | |||||
| GeTensorPtr ge_tensor = nullptr; \ | |||||
| if (need_create_flag) { \ | |||||
| uint64_t size = data_num * sizeof(TYPE); \ | |||||
| ge_tensor = MakeShared<GeTensor>(out_desc, size); \ | |||||
| GE_CHECK_NOTNULL(ge_tensor); \ | |||||
| GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \ | |||||
| ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ | |||||
| ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ | |||||
| outputs.emplace_back(ge_tensor); \ | |||||
| } else { \ | |||||
| ge_tensor = outputs[i]; \ | |||||
| GE_CHECK_NOTNULL(ge_tensor); \ | |||||
| GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ | |||||
| } \ | |||||
| auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ | |||||
| auto tensor_name = op_desc->GetOutputNameByIndex(i); \ | |||||
| GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \ | |||||
| op_desc->GetName().c_str(), i); \ | |||||
| named_outputs.emplace(tensor_name, tensor); \ | |||||
| break; \ | |||||
| } | |||||
| #else | |||||
| #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ | ||||
| case (DTYPE): { \ | case (DTYPE): { \ | ||||
| GeTensorPtr ge_tensor = nullptr; \ | GeTensorPtr ge_tensor = nullptr; \ | ||||
| @@ -61,6 +86,7 @@ namespace { | |||||
| named_outputs.emplace(tensor_name, tensor); \ | named_outputs.emplace(tensor_name, tensor); \ | ||||
| break; \ | break; \ | ||||
| } | } | ||||
| #endif | |||||
| } | } | ||||
| namespace ge { | namespace ge { | ||||
| @@ -94,6 +94,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| graph/manager/graph_var_manager.cc \ | graph/manager/graph_var_manager.cc \ | ||||
| graph/manager/host_mem_manager.cc \ | graph/manager/host_mem_manager.cc \ | ||||
| graph/manager/rdma_pool_allocator.cc \ | graph/manager/rdma_pool_allocator.cc \ | ||||
| graph/manager/host_mem_allocator.cc \ | |||||
| graph/manager/memory_api.cc \ | graph/manager/memory_api.cc \ | ||||
| graph/manager/model_manager/event_manager.cc \ | graph/manager/model_manager/event_manager.cc \ | ||||
| graph/manager/trans_var_data_utils.cc \ | graph/manager/trans_var_data_utils.cc \ | ||||
| @@ -135,6 +136,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
| graph/passes/hccl_group_pass.cc \ | graph/passes/hccl_group_pass.cc \ | ||||
| graph/passes/enter_pass.cc \ | graph/passes/enter_pass.cc \ | ||||
| graph/passes/assign_pass.cc \ | graph/passes/assign_pass.cc \ | ||||
| graph/passes/inplace_support_check_pass.cc \ | |||||
| graph/passes/flow_ctrl_pass.cc \ | graph/passes/flow_ctrl_pass.cc \ | ||||
| graph/passes/global_step_insert_pass.cc \ | graph/passes/global_step_insert_pass.cc \ | ||||
| host_kernels/transpose_kernel.cc \ | host_kernels/transpose_kernel.cc \ | ||||
| @@ -38,6 +38,10 @@ | |||||
| #include "graph/partition/stage_partition.h" | #include "graph/partition/stage_partition.h" | ||||
| #include "graph/passes/addn_pass.h" | #include "graph/passes/addn_pass.h" | ||||
| #include "graph/passes/bitcast_pass.h" | #include "graph/passes/bitcast_pass.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/passes/assign_pass.h" | |||||
| #include "graph/passes/inplace_support_check_pass.h" | |||||
| #endif | |||||
| #include "graph/passes/atomic_addr_clean_pass.h" | #include "graph/passes/atomic_addr_clean_pass.h" | ||||
| #include "graph/passes/attach_stream_label_pass.h" | #include "graph/passes/attach_stream_label_pass.h" | ||||
| #include "graph/passes/cast_remove_pass.h" | #include "graph/passes/cast_remove_pass.h" | ||||
| @@ -2239,10 +2243,20 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||||
| ReshapeRemovePass reshape_remove_pass; | ReshapeRemovePass reshape_remove_pass; | ||||
| CondRemovePass condition_remove_pass; | CondRemovePass condition_remove_pass; | ||||
| BitcastPass bitcast_pass; | BitcastPass bitcast_pass; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| AssignPass assign_pass; | |||||
| InplaceSupportCheckPass inplace_support_check_pass; | |||||
| #endif | |||||
| names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); | ||||
| names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); | ||||
| names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); | ||||
| names_to_passes.emplace_back("BitcastPass", &bitcast_pass); | names_to_passes.emplace_back("BitcastPass", &bitcast_pass); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (GetContext().GetHostExecFlag()) { | |||||
| names_to_passes.emplace_back("AssignPass", &assign_pass); | |||||
| names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass); | |||||
| } | |||||
| #endif | |||||
| GE_TIMESTAMP_START(names_to_passes); | GE_TIMESTAMP_START(names_to_passes); | ||||
| ret = GEPass(compute_graph).Run(names_to_passes); | ret = GEPass(compute_graph).Run(names_to_passes); | ||||
| GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); | GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); | ||||
| @@ -19,7 +19,9 @@ | |||||
| #include <string> | #include <string> | ||||
| #include "graph/manager/graph_caching_allocator.h" | #include "graph/manager/graph_caching_allocator.h" | ||||
| #include "graph/manager/rdma_pool_allocator.h" | #include "graph/manager/rdma_pool_allocator.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/host_mem_allocator.h" | |||||
| #endif | |||||
| namespace ge { | namespace ge { | ||||
| void MemoryAllocator::Initialize(uint32_t device_id) { | void MemoryAllocator::Initialize(uint32_t device_id) { | ||||
| GELOGI("MemoryAllocator::Initialize"); | GELOGI("MemoryAllocator::Initialize"); | ||||
| @@ -190,6 +192,12 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) { | |||||
| GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); | GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); | ||||
| return ge::INTERNAL_ERROR; | return ge::INTERNAL_ERROR; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { | |||||
| GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); | |||||
| return ge::INTERNAL_ERROR; | |||||
| } | |||||
| #endif | |||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| @@ -211,6 +219,9 @@ void MemManager::Finalize() noexcept { | |||||
| // caching and rdma allocator use memory allocator, so finalize them first | // caching and rdma allocator use memory allocator, so finalize them first | ||||
| FinalizeAllocatorMap(caching_allocator_map_); | FinalizeAllocatorMap(caching_allocator_map_); | ||||
| FinalizeAllocatorMap(rdma_allocator_map_); | FinalizeAllocatorMap(rdma_allocator_map_); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| FinalizeAllocatorMap(host_allocator_map_); | |||||
| #endif | |||||
| FinalizeAllocatorMap(memory_allocator_map_); | FinalizeAllocatorMap(memory_allocator_map_); | ||||
| } | } | ||||
| @@ -239,4 +250,9 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { | |||||
| RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { | RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { | ||||
| return Instance().GetAllocator(memory_type, rdma_allocator_map_); | return Instance().GetAllocator(memory_type, rdma_allocator_map_); | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { | |||||
| return Instance().GetAllocator(memory_type, host_allocator_map_); | |||||
| } | |||||
| #endif | |||||
| } // namespace ge | } // namespace ge | ||||
| @@ -139,7 +139,9 @@ class MemoryAllocator { | |||||
| using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | ||||
| class CachingAllocator; | class CachingAllocator; | ||||
| class RdmaPoolAllocator; | class RdmaPoolAllocator; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| class HostMemAllocator; | |||||
| #endif | |||||
| class MemManager { | class MemManager { | ||||
| public: | public: | ||||
| MemManager(); | MemManager(); | ||||
| @@ -148,6 +150,9 @@ class MemManager { | |||||
| static MemoryAllocator *Instance(rtMemType_t memory_type); | static MemoryAllocator *Instance(rtMemType_t memory_type); | ||||
| CachingAllocator &CachingInstance(rtMemType_t memory_type); | CachingAllocator &CachingInstance(rtMemType_t memory_type); | ||||
| RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); | RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| HostMemAllocator &HostMemInstance(rtMemType_t memory_type); | |||||
| #endif | |||||
| MemManager(const MemManager &) = delete; | MemManager(const MemManager &) = delete; | ||||
| MemManager &operator=(const MemManager &) = delete; | MemManager &operator=(const MemManager &) = delete; | ||||
| /// | /// | ||||
| @@ -235,6 +240,9 @@ class MemManager { | |||||
| std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; | std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; | ||||
| std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | ||||
| std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; | std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_; | |||||
| #endif | |||||
| std::recursive_mutex allocator_mutex_; | std::recursive_mutex allocator_mutex_; | ||||
| }; | }; | ||||
| } // namespace ge | } // namespace ge | ||||
| @@ -0,0 +1,69 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/manager/host_mem_allocator.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "common/ge/ge_util.h" | |||||
| namespace ge { | |||||
| const void *HostMemAllocator::Malloc(const std::shared_ptr<AlignedPtr> &aligned_ptr, size_t size) { | |||||
| if (aligned_ptr == nullptr) { | |||||
| GELOGW("Insert a null aligned_ptr"); | |||||
| return nullptr; | |||||
| } | |||||
| GELOGD("allocate existed host memory succ, size=%zu", size); | |||||
| allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr }; | |||||
| return aligned_ptr->Get(); | |||||
| } | |||||
| uint8_t *HostMemAllocator::Malloc(size_t size) { | |||||
| GELOGD("start to malloc host memory, size=%zu", size); | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| std::shared_ptr<AlignedPtr> aligned_ptr = MakeShared<AlignedPtr>(size); | |||||
| if (aligned_ptr == nullptr) { | |||||
| GELOGE(INTERNAL_ERROR, "make shared_ptr for AlignedPtr failed"); | |||||
| return nullptr; | |||||
| } | |||||
| allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr }; | |||||
| GELOGD("allocate host memory succ, size=%zu", size); | |||||
| return aligned_ptr->MutableGet(); | |||||
| } | |||||
| Status HostMemAllocator::Free(const void *memory_addr) { | |||||
| if (memory_addr == nullptr) { | |||||
| GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer"); | |||||
| return GE_GRAPH_FREE_FAILED; | |||||
| } | |||||
| std::lock_guard<std::mutex> lock(mutex_); | |||||
| auto it = allocated_blocks_.find(memory_addr); | |||||
| if (it == allocated_blocks_.end()) { | |||||
| GELOGE(PARAM_INVALID, "Invalid memory pointer"); | |||||
| return PARAM_INVALID; | |||||
| } | |||||
| it->second.second.reset(); | |||||
| allocated_blocks_.erase(it); | |||||
| return SUCCESS; | |||||
| } | |||||
| void HostMemAllocator::Clear() { | |||||
| for (auto &block : allocated_blocks_) { | |||||
| block.second.second.reset(); | |||||
| } | |||||
| allocated_blocks_.clear(); | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,58 @@ | |||||
| /** | |||||
| * Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_ | |||||
| #define GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_ | |||||
| #include <mutex> | |||||
| #include <unordered_map> | |||||
| #include "framework/common/ge_inner_error_codes.h" | |||||
| #include "graph/aligned_ptr.h" | |||||
| #include "runtime/mem.h" | |||||
| namespace ge { | |||||
| class HostMemAllocator { | |||||
| public: | |||||
| explicit HostMemAllocator(rtMemType_t memory_type) : memory_type_(memory_type) {} | |||||
| ~HostMemAllocator() = default; | |||||
| HostMemAllocator(const HostMemAllocator &) = delete; | |||||
| HostMemAllocator &operator=(const HostMemAllocator &) = delete; | |||||
| Status Initialize() { | |||||
| Clear(); | |||||
| return SUCCESS; | |||||
| } | |||||
| void Finalize() { Clear(); } | |||||
| const void *Malloc(const std::shared_ptr<AlignedPtr>& aligned_ptr, size_t size); | |||||
| uint8_t *Malloc(size_t size); | |||||
| Status Free(const void *memory_addr); | |||||
| std::pair<size_t, std::shared_ptr<AlignedPtr>> GetAlignedPtr(const void *addr) { return allocated_blocks_[addr]; } | |||||
| private: | |||||
| void Clear(); | |||||
| rtMemType_t memory_type_; | |||||
| std::unordered_map<const void *, std::pair<size_t, std::shared_ptr<AlignedPtr>>> allocated_blocks_; | |||||
| // lock around all operations | |||||
| mutable std::mutex mutex_; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_ | |||||
| @@ -43,16 +43,30 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) { | |||||
| return GE_GRAPH_MEMORY_ALLOC_FAILED; | return GE_GRAPH_MEMORY_ALLOC_FAILED; | ||||
| } | } | ||||
| mem_info.fd = output_para.fd; | mem_info.fd = output_para.fd; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc(mem_info.mem_size, | |||||
| [&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) { | |||||
| ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr)); | |||||
| }, | |||||
| [](uint8_t *ptr) { | |||||
| ptr = nullptr; | |||||
| }, 0); | |||||
| #else | |||||
| mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr); | mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr); | ||||
| #endif | |||||
| mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr); | mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { | Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { | ||||
| GELOGD("SharedMemAllocator::DeAllocate"); | GELOGD("SharedMemAllocator::DeAllocate"); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, | |||||
| mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address}; | |||||
| #else | |||||
| rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, | rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, | ||||
| mem_info.host_address, mem_info.device_address}; | mem_info.host_address, mem_info.device_address}; | ||||
| #endif | |||||
| rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); | rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); | ||||
| if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
| GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); | GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); | ||||
| @@ -42,7 +42,11 @@ struct SharedMemInfo { | |||||
| uint64_t mem_size = 0; | uint64_t mem_size = 0; | ||||
| int fd = 0; | int fd = 0; | ||||
| uint8_t *device_address = nullptr; | uint8_t *device_address = nullptr; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr; | |||||
| #else | |||||
| uint8_t *host_address = nullptr; | uint8_t *host_address = nullptr; | ||||
| #endif | |||||
| SharedMemInfo() = default; | SharedMemInfo() = default; | ||||
| SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} | SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} | ||||
| }; | }; | ||||
| @@ -9,25 +9,143 @@ | |||||
| * | * | ||||
| * Unless required by applicable law or agreed to in writing, software | * Unless required by applicable law or agreed to in writing, software | ||||
| * distributed under the License is distributed on an "AS IS" BASIS, | * distributed under the License is distributed on an "AS IS" BASIS, | ||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.l | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #include "graph/passes/assign_pass.h" | #include "graph/passes/assign_pass.h" | ||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
| namespace { | namespace { | ||||
| const uint32_t kValidInputNodeOutputNum = 1; | |||||
| const int32_t kAssignRefInputIndex = 0; | |||||
| const int32_t kAssignValueInputIndex = 1; | |||||
| constexpr uint32_t kValidInputNodeOutputNum = 1; | |||||
| constexpr int32_t kAssignRefInputIndex = 0; | |||||
| constexpr int32_t kAssignValueInputIndex = 1; | |||||
| static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| } | } | ||||
| namespace ge { | namespace ge { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| Status AssignPass::Run(NodePtr &node) { | |||||
| GELOGD("AssignPass running"); | |||||
| if (TransformAttr(node) != SUCCESS) { | |||||
| GELOGE(FAILED, "Transform assign_var_name attr failed, node=%s", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (node->GetType() == ASSIGN) { | |||||
| if (OptimizedAssignNode(node) != SUCCESS) { | |||||
| GELOGE(FAILED, "Optimize for assign_node %s failed", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| } | |||||
| GELOGD("AssignPass success"); | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | |||||
| /// @brief Optimize for assign_node | |||||
| /// @param [in] assign_node | |||||
| /// @return Status | |||||
| /// | |||||
| Status AssignPass::OptimizedAssignNode(NodePtr &assign_node) { | |||||
| const auto &ref_in_anchor = assign_node->GetInDataAnchor(kAssignRefInputIndex); | |||||
| const auto &value_in_anchor = assign_node->GetInDataAnchor(kAssignValueInputIndex); | |||||
| if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) { | |||||
| GELOGE(FAILED, "In data anchor is null, node:%s", assign_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor(); | |||||
| const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor(); | |||||
| if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) { | |||||
| GELOGE(FAILED, "Peer data anchor is null, node:%s", assign_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (IsCondMatch(assign_node, ref_peer_anchor, value_peer_anchor)) { | |||||
| /// | |||||
| /// variable not-const not-const | |||||
| /// \ / | | |||||
| /// \ / | | |||||
| /// Assign ----> variable | |||||
| /// | | | |||||
| /// | | | |||||
| /// node node | |||||
| /// | |||||
| GELOGD("Optimization for assign_node %s start", assign_node->GetName().c_str()); | |||||
| if (IsolateAndDeleteNode(assign_node, {kAssignRefInputIndex}) != SUCCESS) { | |||||
| GELOGE(FAILED, "Isolate and delete assign_node %s failed.", assign_node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc(); | |||||
| if ((ref_input == nullptr) || (value_input == nullptr)) { | |||||
| GELOGE(FAILED, "value input is null"); | |||||
| return FAILED; | |||||
| } | |||||
| // variable has and only has one input | |||||
| if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) { | |||||
| GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s", | |||||
| value_input->GetName().c_str(), ref_input->GetName().c_str()); | |||||
| if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME, | |||||
| ref_input->GetName())) { | |||||
| GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); | |||||
| return FAILED; | |||||
| } | |||||
| auto value_node = value_peer_anchor->GetOwnerNode(); | |||||
| AddRePassNode(value_node); | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| /// | |||||
| /// @brief Transform assign_var_name attr | |||||
| /// @param [in] node | |||||
| /// @return Status | |||||
| /// | |||||
| Status AssignPass::TransformAttr(NodePtr &node) { | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) { | |||||
| int32_t inplace_input_idx = -1; | |||||
| std::string assign_var_name; | |||||
| if (AttrUtils::GetInt(output_desc, INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx) && | |||||
| AttrUtils::GetStr(output_desc, ASSIGN_VAR_NAME, assign_var_name)) { | |||||
| GELOGD("Transform attr ASSIGN_VAR_NAME on node %s, assign_var_name=%s, inplace_input_idx=%d, ", | |||||
| node->GetName().c_str(), assign_var_name.c_str(), inplace_input_idx); | |||||
| const auto &in_data_anchor = node->GetInDataAnchor(inplace_input_idx); | |||||
| GE_CHECK_NOTNULL(in_data_anchor); | |||||
| const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| GE_CHECK_NOTNULL(peer_data_anchor); | |||||
| auto in_node = peer_data_anchor->GetOwnerNode(); | |||||
| GE_CHECK_NOTNULL(in_node->GetOpDesc()); | |||||
| GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s", in_node->GetName().c_str(), assign_var_name.c_str()); | |||||
| if (!AttrUtils::SetStr(in_node->GetOpDesc()->MutableOutputDesc(peer_data_anchor->GetIdx()), | |||||
| ASSIGN_VAR_NAME, assign_var_name)) { | |||||
| GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed."); | |||||
| return FAILED; | |||||
| } | |||||
| AddRePassNode(in_node); | |||||
| } | |||||
| } | |||||
| return SUCCESS; | |||||
| } | |||||
| #else | |||||
| Status AssignPass::Run(NodePtr &node) { | Status AssignPass::Run(NodePtr &node) { | ||||
| GELOGD("AssignPass running"); | GELOGD("AssignPass running"); | ||||
| if (node->GetType() != ASSIGN) { | if (node->GetType() != ASSIGN) { | ||||
| @@ -91,7 +209,7 @@ Status AssignPass::Run(NodePtr &node) { | |||||
| GELOGD("AssignPass success"); | GELOGD("AssignPass success"); | ||||
| return SUCCESS; | return SUCCESS; | ||||
| } | } | ||||
| #endif | |||||
| /// | /// | ||||
| /// @brief Check if need optimize for assign_node | /// @brief Check if need optimize for assign_node | ||||
| /// @param [in] assign_node | /// @param [in] assign_node | ||||
| @@ -105,9 +223,8 @@ bool AssignPass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_pe | |||||
| node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), | node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), | ||||
| value_peer_anchor->GetOwnerNode()->GetName().c_str()); | value_peer_anchor->GetOwnerNode()->GetName().c_str()); | ||||
| const std::string &value_type = value_peer_anchor->GetOwnerNode()->GetType(); | |||||
| if ((value_type == CONSTANTOP) || (value_type == CONSTANT)) { | |||||
| GELOGD("value input is const"); | |||||
| if (kNoTaskNodeTypes.count(value_peer_anchor->GetOwnerNode()->GetType()) > 0) { | |||||
| GELOGD("value input is not calculate node"); | |||||
| return false; | return false; | ||||
| } | } | ||||
| @@ -25,6 +25,21 @@ class AssignPass : public BaseNodePass { | |||||
| Status Run(NodePtr &node) override; | Status Run(NodePtr &node) override; | ||||
| private: | private: | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| /// | |||||
| /// @brief Optimize for assign_node | |||||
| /// @param [in] assign_node | |||||
| /// @return Status | |||||
| /// | |||||
| Status OptimizedAssignNode(NodePtr &assign_node); | |||||
| /// | |||||
| /// @brief Transform assign_var_name attr | |||||
| /// @param [in] node | |||||
| /// @return Status | |||||
| /// | |||||
| Status TransformAttr(NodePtr &node); | |||||
| #endif | |||||
| /// | /// | ||||
| /// @brief Check if need optimize for assign_node | /// @brief Check if need optimize for assign_node | ||||
| /// @param [in] assign_node | /// @param [in] assign_node | ||||
| @@ -19,13 +19,7 @@ | |||||
| #include <map> | #include <map> | ||||
| #include <memory> | #include <memory> | ||||
| #include <string> | #include <string> | ||||
| #include <utility> | |||||
| #include <vector> | #include <vector> | ||||
| #include "common/ge/ge_util.h" | |||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "framework/common/ge_inner_error_codes.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| #include "graph/utils/op_desc_utils.h" | #include "graph/utils/op_desc_utils.h" | ||||
| #include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
| @@ -121,11 +115,21 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph, | |||||
| TypeUtils::DataTypeToSerialString(data_type).c_str()); | TypeUtils::DataTypeToSerialString(data_type).c_str()); | ||||
| continue; | continue; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) { | |||||
| GELOGW("aligned_ptr is null while size is not 0"); | |||||
| continue; | |||||
| } | |||||
| #endif | |||||
| ++insert_const_nums; | ++insert_const_nums; | ||||
| SameConstKey map_key; | SameConstKey map_key; | ||||
| map_key.data_size = type_size; | map_key.data_size = type_size; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| map_key.aligned_ptr = weight->MutableData().GetAlignedPtr(); | |||||
| #else | |||||
| map_key.data = weight->GetData().GetData(); | map_key.data = weight->GetData().GetData(); | ||||
| #endif | |||||
| map_key.data_type = data_type; | map_key.data_type = data_type; | ||||
| map_key.format = output_tensor->GetFormat(); | map_key.format = output_tensor->GetFormat(); | ||||
| map_key.shape = output_tensor->GetShape().GetDims(); | map_key.shape = output_tensor->GetShape().GetDims(); | ||||
| @@ -21,14 +21,20 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <utility> | #include <utility> | ||||
| #include <vector> | #include <vector> | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/aligned_ptr.h" | |||||
| #endif | |||||
| #include "graph/types.h" | #include "graph/types.h" | ||||
| #include "inc/graph_pass.h" | #include "inc/graph_pass.h" | ||||
| namespace ge { | namespace ge { | ||||
| struct SameConstKey { | struct SameConstKey { | ||||
| int data_size; | int data_size; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| std::shared_ptr<AlignedPtr> aligned_ptr; | |||||
| #else | |||||
| const uint8_t *data; | const uint8_t *data; | ||||
| #endif | |||||
| DataType data_type; | DataType data_type; | ||||
| Format format; | Format format; | ||||
| std::vector<int64_t> shape; | std::vector<int64_t> shape; | ||||
| @@ -38,10 +44,19 @@ struct SameConstKey { | |||||
| if (data_size != key.data_size) { | if (data_size != key.data_size) { | ||||
| return data_size < key.data_size; | return data_size < key.data_size; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (data_size != 0) { | |||||
| int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size); | |||||
| if (ret != 0) { | |||||
| return ret < 0; | |||||
| } | |||||
| } | |||||
| #else | |||||
| int ret = memcmp(data, key.data, data_size); | int ret = memcmp(data, key.data, data_size); | ||||
| if (ret != 0) { | if (ret != 0) { | ||||
| return ret < 0; | return ret < 0; | ||||
| } | } | ||||
| #endif | |||||
| if (data_type != key.data_type) { | if (data_type != key.data_type) { | ||||
| return data_type < key.data_type; | return data_type < key.data_type; | ||||
| } | } | ||||
| @@ -0,0 +1,83 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "graph/passes/inplace_support_check_pass.h" | |||||
| #include "framework/common/debug/log.h" | |||||
| #include "graph/utils/graph_utils.h" | |||||
| #include "graph/debug/ge_attr_define.h" | |||||
| namespace { | |||||
| constexpr uint32_t kInplaceSupportOutputIndex = 0; | |||||
| constexpr uint32_t kInplaceSupportOutputNum = 1; | |||||
| static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA, | |||||
| ge::CONSTANT, ge::CONSTANTOP, | |||||
| ge::VARIABLE, ge::VARIABLEV2 }; | |||||
| } | |||||
| namespace ge { | |||||
| Status InplaceSupportCheckPass::Run(NodePtr &node) { | |||||
| GELOGD("InplaceSupportCheckPass running"); | |||||
| if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) { | |||||
| GELOGD("output num of node %s is not %u, skip InplaceSupportCheckPass", | |||||
| node->GetName().c_str(), kInplaceSupportOutputNum); | |||||
| return SUCCESS; | |||||
| } | |||||
| GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
| const DataType &output_type = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetDataType(); | |||||
| const GeShape &output_shape = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetShape(); | |||||
| GELOGD("process InplaceSupportCheckPass on node %s", node->GetName().c_str()); | |||||
| for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
| const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
| if (peer_data_anchor == nullptr) { | |||||
| continue; | |||||
| } | |||||
| auto in_node = peer_data_anchor->GetOwnerNode(); | |||||
| if (kSrcNodeTypes.count(in_node->GetType()) > 0) { | |||||
| GELOGD("meet src_node %s", in_node->GetName().c_str()); | |||||
| continue; | |||||
| } | |||||
| if (peer_data_anchor->GetPeerInDataNodesSize() != kInplaceSupportOutputNum) { | |||||
| GELOGD("peer_data_anchor links with multi in_data_anchors"); | |||||
| continue; | |||||
| } | |||||
| int32_t inplace_input_idx = in_data_anchor->GetIdx(); | |||||
| const DataType &input_type = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetDataType(); | |||||
| const GeShape &input_shape = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetShape(); | |||||
| if (input_type != output_type) { | |||||
| GELOGW("DataType mismatch, in_idx=%d, input_type=%u, output_type=%u", inplace_input_idx, input_type, output_type); | |||||
| continue; | |||||
| } | |||||
| if (input_shape.GetDims() != output_shape.GetDims()) { | |||||
| GELOGW("Shape mismatch, in_idx=%d, input_shape=[%s], output_shape=[%s]", | |||||
| inplace_input_idx, input_shape.ToString().c_str(), output_shape.ToString().c_str()); | |||||
| continue; | |||||
| } | |||||
| GELOGD("add attr INPLACE_SUPPORT_INPUT_INDEX on node %s, input_idx=%d", node->GetName().c_str(), inplace_input_idx); | |||||
| if (!AttrUtils::SetInt(node->GetOpDesc()->MutableOutputDesc(kInplaceSupportOutputIndex), | |||||
| INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx)) { | |||||
| GELOGE(FAILED, "Set attr INPLACE_SUPPORT_INPUT_INDEX on node %s failed.", node->GetName().c_str()); | |||||
| return FAILED; | |||||
| } | |||||
| AddRePassNode(node); | |||||
| break; | |||||
| } | |||||
| GELOGD("InplaceSupportCheckPass success"); | |||||
| return SUCCESS; | |||||
| } | |||||
| } // namespace ge | |||||
| @@ -0,0 +1,28 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_ | |||||
| #define GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_ | |||||
| #include "graph/passes/base_pass.h" | |||||
| namespace ge { | |||||
| class InplaceSupportCheckPass : public BaseNodePass { | |||||
| public: | |||||
| Status Run(NodePtr &node) override; | |||||
| }; | |||||
| } // namespace ge | |||||
| #endif // GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_ | |||||
| @@ -598,7 +598,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons | |||||
| /// | /// | ||||
| Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, | Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, | ||||
| const std::set<NodePtr> &same_cond_switch) { | const std::set<NodePtr> &same_cond_switch) { | ||||
| GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), | |||||
| GELOGD("ModifySwitchInCtlEdges: switch_node=%s, cast_node=%s", switch_node->GetName().c_str(), | |||||
| cast_node->GetName().c_str()); | cast_node->GetName().c_str()); | ||||
| std::string orig_switch_name = switch_node->GetName(); | std::string orig_switch_name = switch_node->GetName(); | ||||
| OpDescPtr switch_desc = switch_node->GetOpDesc(); | OpDescPtr switch_desc = switch_node->GetOpDesc(); | ||||
| @@ -19,7 +19,6 @@ | |||||
| #include <set> | #include <set> | ||||
| #include <string> | #include <string> | ||||
| #include "common/formats/format_transfers/format_transfer_fractal_nz.h" | #include "common/formats/format_transfers/format_transfer_fractal_nz.h" | ||||
| #include "common/formats/format_transfers/format_transfer_fractal_z.h" | |||||
| #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | ||||
| #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" | ||||
| #include "common/formats/format_transfers/format_transfer_transpose.h" | #include "common/formats/format_transfers/format_transfer_transpose.h" | ||||
| @@ -38,7 +37,9 @@ | |||||
| #include "graph/passes/addn_pass.h" | #include "graph/passes/addn_pass.h" | ||||
| #include "graph/passes/aicpu_constant_folding_pass.h" | #include "graph/passes/aicpu_constant_folding_pass.h" | ||||
| #include "graph/passes/assert_pass.h" | #include "graph/passes/assert_pass.h" | ||||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/passes/assign_pass.h" | #include "graph/passes/assign_pass.h" | ||||
| #endif | |||||
| #include "graph/passes/common_subexpression_elimination_pass.h" | #include "graph/passes/common_subexpression_elimination_pass.h" | ||||
| #include "graph/passes/cond_pass.h" | #include "graph/passes/cond_pass.h" | ||||
| #include "graph/passes/cond_remove_pass.h" | #include "graph/passes/cond_remove_pass.h" | ||||
| @@ -1699,7 +1700,9 @@ Status GraphPrepare::PrepareOptimize() { | |||||
| VarIsInitializedOpPass var_is_initialized_pass; | VarIsInitializedOpPass var_is_initialized_pass; | ||||
| ParallelConcatStartOpPass parallel_concat_start_op_pass; | ParallelConcatStartOpPass parallel_concat_start_op_pass; | ||||
| IdentityPass identity_pass(false); | IdentityPass identity_pass(false); | ||||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||||
| AssignPass assign_pass; | AssignPass assign_pass; | ||||
| #endif | |||||
| SnapshotPass snapshot_pass; | SnapshotPass snapshot_pass; | ||||
| if (!options_.train_graph_flag) { | if (!options_.train_graph_flag) { | ||||
| names_to_passes.emplace_back("DropOutPass", &dropout_pass); | names_to_passes.emplace_back("DropOutPass", &dropout_pass); | ||||
| @@ -1714,9 +1717,11 @@ Status GraphPrepare::PrepareOptimize() { | |||||
| names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); | names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); | ||||
| names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); | names_to_passes.emplace_back("ParallelConcatStartOpPass", ¶llel_concat_start_op_pass); | ||||
| names_to_passes.emplace_back("IdentityPass", &identity_pass); | names_to_passes.emplace_back("IdentityPass", &identity_pass); | ||||
| #ifdef ONLY_COMPILE_OPEN_SRC | |||||
| if (GetContext().GetHostExecFlag()) { | if (GetContext().GetHostExecFlag()) { | ||||
| names_to_passes.emplace_back("AssignPass", &assign_pass); | names_to_passes.emplace_back("AssignPass", &assign_pass); | ||||
| } | } | ||||
| #endif | |||||
| GE_TIMESTAMP_START(names_to_passes); | GE_TIMESTAMP_START(names_to_passes); | ||||
| ret = ge_passes.Run(names_to_passes); | ret = ge_passes.Run(names_to_passes); | ||||
| GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); | GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); | ||||
| @@ -20,6 +20,9 @@ | |||||
| #include "graph/manager/graph_caching_allocator.h" | #include "graph/manager/graph_caching_allocator.h" | ||||
| #include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
| #include "graph/manager/rdma_pool_allocator.h" | #include "graph/manager/rdma_pool_allocator.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/host_mem_allocator.h" | |||||
| #endif | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -64,7 +67,11 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { | |||||
| if (mem_type == RDMA_HBM) { | if (mem_type == RDMA_HBM) { | ||||
| buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); | buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); | ||||
| } else if (mem_type == HOST_DDR) { | } else if (mem_type == HOST_DDR) { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size); | |||||
| #else | |||||
| buffer = malloc(allocate_size); | buffer = malloc(allocate_size); | ||||
| #endif | |||||
| } else { | } else { | ||||
| if (allocate_size > kMaxHbmMemorySize) { | if (allocate_size > kMaxHbmMemorySize) { | ||||
| GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); | GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); | ||||
| @@ -101,7 +108,11 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) { | |||||
| if (mem_type == RDMA_HBM) { | if (mem_type == RDMA_HBM) { | ||||
| MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | ||||
| } else if (mem_type == HOST_DDR) { | } else if (mem_type == HOST_DDR) { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data); | |||||
| #else | |||||
| free(data); | free(data); | ||||
| #endif | |||||
| } else { | } else { | ||||
| MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); | ||||
| } | } | ||||
| @@ -25,11 +25,13 @@ | |||||
| #include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
| #include "graph/manager/host_mem_manager.h" | #include "graph/manager/host_mem_manager.h" | ||||
| #include "graph/manager/trans_var_data_utils.h" | #include "graph/manager/trans_var_data_utils.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/graph_mem_allocator.h" | |||||
| #include "graph/manager/host_mem_allocator.h" | |||||
| #endif | |||||
| #include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
| #include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
| #include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
| #include "framework/common/debug/ge_log.h" | |||||
| #include "graph/utils/attr_utils.h" | |||||
| namespace ge { | namespace ge { | ||||
| namespace hybrid { | namespace hybrid { | ||||
| @@ -852,9 +854,24 @@ Status HybridModelBuilder::InitConstantOps() { | |||||
| std::unique_ptr<TensorValue> var_tensor; | std::unique_ptr<TensorValue> var_tensor; | ||||
| if (GetContext().GetHostExecFlag()) { | if (GetContext().GetHostExecFlag()) { | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| GE_CHECK_NOTNULL(ge_tensor); | |||||
| // Address for eigen kernel should be aligned with 16 bytes | |||||
| // Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned | |||||
| GeTensor aligned_tensor = ge_tensor->Clone(); | |||||
| GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize()); | |||||
| if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(), | |||||
| aligned_tensor.GetData().size()) == nullptr) { | |||||
| GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); | |||||
| return MEMALLOC_FAILED; | |||||
| } | |||||
| var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(), | |||||
| aligned_tensor.GetData().size())); | |||||
| #else | |||||
| auto buffer = ge_tensor->MutableData(); | auto buffer = ge_tensor->MutableData(); | ||||
| GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); | GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); | ||||
| var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); | var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); | ||||
| #endif | |||||
| } else { | } else { | ||||
| GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); | ||||
| GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); | ||||
| @@ -909,9 +926,21 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
| GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); | GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); | ||||
| return GE_GRAPH_MALLOC_FAILED; | return GE_GRAPH_MALLOC_FAILED; | ||||
| } | } | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr, | |||||
| tensor_size) == nullptr) { | |||||
| GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); | |||||
| return MEMALLOC_FAILED; | |||||
| } | |||||
| GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size); | |||||
| std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), | |||||
| tensor_size)); | |||||
| #else | |||||
| GELOGD("Host variable [%s] malloc success.", it.first.c_str()); | GELOGD("Host variable [%s] malloc success.", it.first.c_str()); | ||||
| std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); | std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); | ||||
| #endif | |||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); | hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); | ||||
| } | } | ||||
| @@ -18,6 +18,10 @@ | |||||
| #include "hybrid/node_executor/host_cpu/kernel_factory.h" | #include "hybrid/node_executor/host_cpu/kernel_factory.h" | ||||
| #include "graph/passes/folding_pass.h" | #include "graph/passes/folding_pass.h" | ||||
| #include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| #include "graph/manager/graph_mem_allocator.h" | |||||
| #include "graph/manager/host_mem_allocator.h" | |||||
| #endif | |||||
| #include "ge_local_engine/engine/host_cpu_engine.h" | #include "ge_local_engine/engine/host_cpu_engine.h" | ||||
| namespace ge { | namespace ge { | ||||
| @@ -50,15 +54,23 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||||
| auto input_desc_ptr = context.GetInputDesc(i); | auto input_desc_ptr = context.GetInputDesc(i); | ||||
| GE_CHECK_NOTNULL(input_desc_ptr); | GE_CHECK_NOTNULL(input_desc_ptr); | ||||
| const auto &input_desc = *input_desc_ptr; | const auto &input_desc = *input_desc_ptr; | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| auto tensor = context.GetInput(i); | |||||
| GE_CHECK_NOTNULL(tensor); | |||||
| auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | |||||
| GE_CHECK_NOTNULL(item.second); | |||||
| auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first); | |||||
| #else | |||||
| GE_CHECK_NOTNULL(context.GetInput(i)); | GE_CHECK_NOTNULL(context.GetInput(i)); | ||||
| auto in_tensor = MakeShared<GeTensor>(input_desc, | auto in_tensor = MakeShared<GeTensor>(input_desc, | ||||
| reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()), | reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()), | ||||
| context.GetInput(i)->GetSize()); | context.GetInput(i)->GetSize()); | ||||
| #endif | |||||
| GE_CHECK_NOTNULL(in_tensor); | GE_CHECK_NOTNULL(in_tensor); | ||||
| in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); | in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); | ||||
| in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); | in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); | ||||
| inputs.emplace_back(in_tensor); | inputs.emplace_back(in_tensor); | ||||
| GELOGI("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size()); | |||||
| GELOGD("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size()); | |||||
| } | } | ||||
| std::vector<GeTensorPtr> outputs; | std::vector<GeTensorPtr> outputs; | ||||
| @@ -72,14 +84,20 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) { | |||||
| } | } | ||||
| auto tensor = context.GetOutput(i); | auto tensor = context.GetOutput(i); | ||||
| GE_CHECK_NOTNULL(tensor); | GE_CHECK_NOTNULL(tensor); | ||||
| #ifndef ONLY_COMPILE_OPEN_SRC | |||||
| auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData()); | |||||
| GE_CHECK_NOTNULL(item.second); | |||||
| auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first); | |||||
| #else | |||||
| auto out_tensor = MakeShared<GeTensor>(output_desc, | auto out_tensor = MakeShared<GeTensor>(output_desc, | ||||
| reinterpret_cast<const uint8_t *>(tensor->GetData()), | reinterpret_cast<const uint8_t *>(tensor->GetData()), | ||||
| tensor->GetSize()); | tensor->GetSize()); | ||||
| #endif | |||||
| GE_CHECK_NOTNULL(out_tensor); | GE_CHECK_NOTNULL(out_tensor); | ||||
| out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); | out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); | ||||
| out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); | out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); | ||||
| outputs.emplace_back(out_tensor); | outputs.emplace_back(out_tensor); | ||||
| GELOGI("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size()); | |||||
| GELOGD("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size()); | |||||
| } | } | ||||
| return HostCpuEngine::GetInstance().Run(node_, inputs, outputs); | return HostCpuEngine::GetInstance().Run(node_, inputs, outputs); | ||||
| @@ -704,6 +704,7 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS}) | |||||
| target_compile_definitions(ge_ut_common PRIVATE | target_compile_definitions(ge_ut_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_ut_common PRIVATE | target_link_libraries(ge_ut_common PRIVATE | ||||
| @@ -718,6 +719,7 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F | |||||
| target_compile_definitions(ge_ut_common_format PRIVATE | target_compile_definitions(ge_ut_common_format PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_ut_common_format PRIVATE | target_link_libraries(ge_ut_common_format PRIVATE | ||||
| @@ -774,6 +776,7 @@ add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_SRCS} $ | |||||
| target_compile_definitions(ge_load_common PRIVATE | target_compile_definitions(ge_load_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_load_common PRIVATE | target_link_libraries(ge_load_common PRIVATE | ||||
| @@ -788,6 +791,7 @@ add_library(ge_execute_common STATIC ${GRAPH_EXECUTE_COMMON_SRC_FILES} ${PROTO_S | |||||
| target_compile_definitions(ge_execute_common PRIVATE | target_compile_definitions(ge_execute_common PRIVATE | ||||
| google=ascend_private | google=ascend_private | ||||
| $<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC> | |||||
| ) | ) | ||||
| target_link_libraries(ge_execute_common PRIVATE | target_link_libraries(ge_execute_common PRIVATE | ||||