Browse Source

回退 'Pull Request !689 : GeTensor aligned addr & zero copy support'

tags/v1.2.0
王涛 Gitee 3 years ago
parent
commit
4207a54092
25 changed files with 26 additions and 578 deletions
  1. +0
    -6
      ge/CMakeLists.txt
  2. +0
    -3
      ge/executor/CMakeLists.txt
  3. +0
    -1
      ge/executor/module.mk
  4. +0
    -2
      ge/ge_inference.mk
  5. +0
    -26
      ge/ge_local_engine/engine/host_cpu_engine.cc
  6. +0
    -2
      ge/ge_runner.mk
  7. +0
    -14
      ge/graph/manager/graph_manager.cc
  8. +1
    -17
      ge/graph/manager/graph_mem_allocator.cc
  9. +1
    -9
      ge/graph/manager/graph_mem_allocator.h
  10. +0
    -69
      ge/graph/manager/host_mem_allocator.cc
  11. +0
    -58
      ge/graph/manager/host_mem_allocator.h
  12. +1
    -15
      ge/graph/manager/host_mem_manager.cc
  13. +0
    -4
      ge/graph/manager/host_mem_manager.h
  14. +10
    -127
      ge/graph/passes/assign_pass.cc
  15. +0
    -15
      ge/graph/passes/assign_pass.h
  16. +6
    -10
      ge/graph/passes/constant_fuse_same_pass.cc
  17. +1
    -16
      ge/graph/passes/constant_fuse_same_pass.h
  18. +0
    -83
      ge/graph/passes/inplace_support_check_pass.cc
  19. +0
    -28
      ge/graph/passes/inplace_support_check_pass.h
  20. +1
    -1
      ge/graph/passes/switch_to_stream_switch_pass.cc
  21. +1
    -6
      ge/graph/preprocess/graph_preprocess.cc
  22. +0
    -11
      ge/hybrid/common/npu_memory_allocator.cc
  23. +2
    -31
      ge/hybrid/model/hybrid_model_builder.cc
  24. +2
    -20
      ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc
  25. +0
    -4
      tests/ut/ge/CMakeLists.txt

+ 0
- 6
ge/CMakeLists.txt View File

@@ -125,7 +125,6 @@ set(TRAIN_SRC_LIST
"graph/manager/graph_var_manager.cc" "graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc" "graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc" "graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
"graph/manager/memory_api.cc" "graph/manager/memory_api.cc"
"graph/manager/model_manager/event_manager.cc" "graph/manager/model_manager/event_manager.cc"
"graph/manager/trans_var_data_utils.cc" "graph/manager/trans_var_data_utils.cc"
@@ -167,7 +166,6 @@ set(TRAIN_SRC_LIST
"graph/passes/hccl_group_pass.cc" "graph/passes/hccl_group_pass.cc"
"graph/passes/enter_pass.cc" "graph/passes/enter_pass.cc"
"graph/passes/assign_pass.cc" "graph/passes/assign_pass.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
"graph/passes/flow_ctrl_pass.cc" "graph/passes/flow_ctrl_pass.cc"
"graph/passes/global_step_insert_pass.cc" "graph/passes/global_step_insert_pass.cc"
"host_kernels/transpose_kernel.cc" "host_kernels/transpose_kernel.cc"
@@ -403,7 +401,6 @@ set(INFER_SRC_LIST
"graph/manager/graph_var_manager.cc" "graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc" "graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc" "graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/manager/host_mem_allocator.cc>
"graph/manager/graph_mem_allocator.cc" "graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_caching_allocator.cc" "graph/manager/graph_caching_allocator.cc"
"model/ge_model.cc" "model/ge_model.cc"
@@ -525,7 +522,6 @@ set(INFER_SRC_LIST
"graph/passes/for_pass.cc" "graph/passes/for_pass.cc"
"graph/passes/enter_pass.cc" "graph/passes/enter_pass.cc"
"graph/passes/assign_pass.cc" "graph/passes/assign_pass.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:graph/passes/inplace_support_check_pass.cc>
"graph/passes/addn_pass.cc" "graph/passes/addn_pass.cc"
"graph/passes/common_subexpression_elimination_pass.cc" "graph/passes/common_subexpression_elimination_pass.cc"
"graph/passes/remove_same_const_pass.cc" "graph/passes/remove_same_const_pass.cc"
@@ -624,7 +620,6 @@ target_compile_definitions(ge_runner PRIVATE
FMK_SUPPORT_DUMP FMK_SUPPORT_DUMP
DAVINCI_CLOUD DAVINCI_CLOUD
google=ascend_private google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_compile_options(ge_runner PRIVATE target_compile_options(ge_runner PRIVATE
@@ -692,7 +687,6 @@ target_compile_definitions(ge_compiler PRIVATE
FMK_HOST_INFER FMK_HOST_INFER
COMPILE_OMG_PACKAGE COMPILE_OMG_PACKAGE
google=ascend_private google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_compile_options(ge_compiler PRIVATE target_compile_options(ge_compiler PRIVATE


+ 0
- 3
ge/executor/CMakeLists.txt View File

@@ -28,7 +28,6 @@ set(SRC_LIST
"../graph/manager/trans_var_data_utils.cc" "../graph/manager/trans_var_data_utils.cc"
"../graph/manager/util/debug.cc" "../graph/manager/util/debug.cc"
"../graph/manager/rdma_pool_allocator.cc" "../graph/manager/rdma_pool_allocator.cc"
$<$<NOT:$<STREQUAL:${ENABLE_OPEN_SRC},True>>:../graph/manager/host_mem_allocator.cc>
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc" "../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"../model/ge_model.cc" "../model/ge_model.cc"
"../model/ge_root_model.cc" "../model/ge_root_model.cc"
@@ -175,7 +174,6 @@ target_compile_definitions(ge_executor PRIVATE
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0>
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX>
LOG_CPP LOG_CPP
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_include_directories(ge_executor PRIVATE target_include_directories(ge_executor PRIVATE
@@ -218,7 +216,6 @@ target_compile_definitions(ge_executor_shared PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0 PROTOBUF_INLINE_NOT_IN_HEADERS=0
DAVINCI_SUPPORT_PROFILING DAVINCI_SUPPORT_PROFILING
google=ascend_private google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_include_directories(ge_executor_shared PRIVATE target_include_directories(ge_executor_shared PRIVATE


+ 0
- 1
ge/executor/module.mk View File

@@ -15,7 +15,6 @@ local_ge_executor_src_files := \
../graph/manager/graph_manager_utils.cc \ ../graph/manager/graph_manager_utils.cc \
../graph/manager/graph_var_manager.cc \ ../graph/manager/graph_var_manager.cc \
../graph/manager/rdma_pool_allocator.cc \ ../graph/manager/rdma_pool_allocator.cc \
../graph/manager/host_mem_allocator.cc \
../graph/manager/graph_mem_allocator.cc \ ../graph/manager/graph_mem_allocator.cc \
../graph/manager/graph_caching_allocator.cc \ ../graph/manager/graph_caching_allocator.cc \
../graph/manager/trans_var_data_utils.cc \ ../graph/manager/trans_var_data_utils.cc \


+ 0
- 2
ge/ge_inference.mk View File

@@ -64,7 +64,6 @@ GRAPH_MANAGER_LOCAL_SRC_FILES := \
graph/manager/graph_var_manager.cc \ graph/manager/graph_var_manager.cc \
graph/manager/host_mem_manager.cc \ graph/manager/host_mem_manager.cc \
graph/manager/rdma_pool_allocator.cc \ graph/manager/rdma_pool_allocator.cc \
graph/manager/host_mem_allocator.cc \
graph/manager/graph_mem_allocator.cc \ graph/manager/graph_mem_allocator.cc \
graph/manager/graph_caching_allocator.cc \ graph/manager/graph_caching_allocator.cc \


@@ -197,7 +196,6 @@ OMG_HOST_SRC_FILES := \
graph/passes/for_pass.cc \ graph/passes/for_pass.cc \
graph/passes/enter_pass.cc \ graph/passes/enter_pass.cc \
graph/passes/assign_pass.cc \ graph/passes/assign_pass.cc \
graph/passes/inplace_support_check_pass.cc \
graph/passes/addn_pass.cc \ graph/passes/addn_pass.cc \
graph/passes/common_subexpression_elimination_pass.cc \ graph/passes/common_subexpression_elimination_pass.cc \
graph/passes/transop_symmetry_elimination_pass.cc \ graph/passes/transop_symmetry_elimination_pass.cc \


+ 0
- 26
ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -26,31 +26,6 @@
#include "common/math/math_util.h" #include "common/math/math_util.h"


namespace { namespace {
#ifndef ONLY_COMPILE_OPEN_SRC
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \
case (DTYPE): { \
GeTensorPtr ge_tensor = nullptr; \
if (need_create_flag) { \
uint64_t size = data_num * sizeof(TYPE); \
ge_tensor = MakeShared<GeTensor>(out_desc, size); \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \
ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \
ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \
outputs.emplace_back(ge_tensor); \
} else { \
ge_tensor = outputs[i]; \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \
} \
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
auto tensor_name = op_desc->GetOutputNameByIndex(i); \
GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \
op_desc->GetName().c_str(), i); \
named_outputs.emplace(tensor_name, tensor); \
break; \
}
#else
#define CREATE_OUTPUT_CASE(DTYPE, TYPE) \ #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \
case (DTYPE): { \ case (DTYPE): { \
GeTensorPtr ge_tensor = nullptr; \ GeTensorPtr ge_tensor = nullptr; \
@@ -86,7 +61,6 @@ namespace {
named_outputs.emplace(tensor_name, tensor); \ named_outputs.emplace(tensor_name, tensor); \
break; \ break; \
} }
#endif
} }


namespace ge { namespace ge {


+ 0
- 2
ge/ge_runner.mk View File

@@ -94,7 +94,6 @@ LIBGE_LOCAL_SRC_FILES := \
graph/manager/graph_var_manager.cc \ graph/manager/graph_var_manager.cc \
graph/manager/host_mem_manager.cc \ graph/manager/host_mem_manager.cc \
graph/manager/rdma_pool_allocator.cc \ graph/manager/rdma_pool_allocator.cc \
graph/manager/host_mem_allocator.cc \
graph/manager/memory_api.cc \ graph/manager/memory_api.cc \
graph/manager/model_manager/event_manager.cc \ graph/manager/model_manager/event_manager.cc \
graph/manager/trans_var_data_utils.cc \ graph/manager/trans_var_data_utils.cc \
@@ -136,7 +135,6 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/hccl_group_pass.cc \ graph/passes/hccl_group_pass.cc \
graph/passes/enter_pass.cc \ graph/passes/enter_pass.cc \
graph/passes/assign_pass.cc \ graph/passes/assign_pass.cc \
graph/passes/inplace_support_check_pass.cc \
graph/passes/flow_ctrl_pass.cc \ graph/passes/flow_ctrl_pass.cc \
graph/passes/global_step_insert_pass.cc \ graph/passes/global_step_insert_pass.cc \
host_kernels/transpose_kernel.cc \ host_kernels/transpose_kernel.cc \


+ 0
- 14
ge/graph/manager/graph_manager.cc View File

@@ -38,10 +38,6 @@
#include "graph/partition/stage_partition.h" #include "graph/partition/stage_partition.h"
#include "graph/passes/addn_pass.h" #include "graph/passes/addn_pass.h"
#include "graph/passes/bitcast_pass.h" #include "graph/passes/bitcast_pass.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/passes/assign_pass.h"
#include "graph/passes/inplace_support_check_pass.h"
#endif
#include "graph/passes/atomic_addr_clean_pass.h" #include "graph/passes/atomic_addr_clean_pass.h"
#include "graph/passes/attach_stream_label_pass.h" #include "graph/passes/attach_stream_label_pass.h"
#include "graph/passes/cast_remove_pass.h" #include "graph/passes/cast_remove_pass.h"
@@ -2251,20 +2247,10 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
ReshapeRemovePass reshape_remove_pass; ReshapeRemovePass reshape_remove_pass;
CondRemovePass condition_remove_pass; CondRemovePass condition_remove_pass;
BitcastPass bitcast_pass; BitcastPass bitcast_pass;
#ifndef ONLY_COMPILE_OPEN_SRC
AssignPass assign_pass;
InplaceSupportCheckPass inplace_support_check_pass;
#endif
names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass); names_to_passes.emplace_back("ConstantFoldingPass", &constant_folding_pass);
names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass); names_to_passes.emplace_back("ReshapeRemovePass", &reshape_remove_pass);
names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass); names_to_passes.emplace_back("CondRemovePass", &condition_remove_pass);
names_to_passes.emplace_back("BitcastPass", &bitcast_pass); names_to_passes.emplace_back("BitcastPass", &bitcast_pass);
#ifndef ONLY_COMPILE_OPEN_SRC
if (GetContext().GetHostExecFlag()) {
names_to_passes.emplace_back("AssignPass", &assign_pass);
names_to_passes.emplace_back("InplaceSupportCheckPass", &inplace_support_check_pass);
}
#endif
GE_TIMESTAMP_START(names_to_passes); GE_TIMESTAMP_START(names_to_passes);
ret = GEPass(compute_graph).Run(names_to_passes); ret = GEPass(compute_graph).Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses"); GE_TIMESTAMP_END(names_to_passes, "OptimizeStage2::MergedGraphNameToPasses");


+ 1
- 17
ge/graph/manager/graph_mem_allocator.cc View File

@@ -19,9 +19,7 @@
#include <string> #include <string>
#include "graph/manager/graph_caching_allocator.h" #include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/rdma_pool_allocator.h" #include "graph/manager/rdma_pool_allocator.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/host_mem_allocator.h"
#endif

namespace ge { namespace ge {
void MemoryAllocator::Initialize(uint32_t device_id) { void MemoryAllocator::Initialize(uint32_t device_id) {
GELOGI("MemoryAllocator::Initialize"); GELOGI("MemoryAllocator::Initialize");
@@ -192,12 +190,6 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) {
GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed.");
return ge::INTERNAL_ERROR; return ge::INTERNAL_ERROR;
} }
#ifndef ONLY_COMPILE_OPEN_SRC
if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) {
GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed.");
return ge::INTERNAL_ERROR;
}
#endif
return SUCCESS; return SUCCESS;
} }


@@ -219,9 +211,6 @@ void MemManager::Finalize() noexcept {
// caching and rdma allocator use memory allocator, so finalize them first // caching and rdma allocator use memory allocator, so finalize them first
FinalizeAllocatorMap(caching_allocator_map_); FinalizeAllocatorMap(caching_allocator_map_);
FinalizeAllocatorMap(rdma_allocator_map_); FinalizeAllocatorMap(rdma_allocator_map_);
#ifndef ONLY_COMPILE_OPEN_SRC
FinalizeAllocatorMap(host_allocator_map_);
#endif
FinalizeAllocatorMap(memory_allocator_map_); FinalizeAllocatorMap(memory_allocator_map_);
} }


@@ -250,9 +239,4 @@ CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) {
RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, rdma_allocator_map_); return Instance().GetAllocator(memory_type, rdma_allocator_map_);
} }
#ifndef ONLY_COMPILE_OPEN_SRC
HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) {
return Instance().GetAllocator(memory_type, host_allocator_map_);
}
#endif
} // namespace ge } // namespace ge

+ 1
- 9
ge/graph/manager/graph_mem_allocator.h View File

@@ -139,9 +139,7 @@ class MemoryAllocator {
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>;
class CachingAllocator; class CachingAllocator;
class RdmaPoolAllocator; class RdmaPoolAllocator;
#ifndef ONLY_COMPILE_OPEN_SRC
class HostMemAllocator;
#endif

class MemManager { class MemManager {
public: public:
MemManager(); MemManager();
@@ -150,9 +148,6 @@ class MemManager {
static MemoryAllocator *Instance(rtMemType_t memory_type); static MemoryAllocator *Instance(rtMemType_t memory_type);
CachingAllocator &CachingInstance(rtMemType_t memory_type); CachingAllocator &CachingInstance(rtMemType_t memory_type);
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type);
#ifndef ONLY_COMPILE_OPEN_SRC
HostMemAllocator &HostMemInstance(rtMemType_t memory_type);
#endif
MemManager(const MemManager &) = delete; MemManager(const MemManager &) = delete;
MemManager &operator=(const MemManager &) = delete; MemManager &operator=(const MemManager &) = delete;
/// ///
@@ -240,9 +235,6 @@ class MemManager {
std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_;
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_;
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_;
#ifndef ONLY_COMPILE_OPEN_SRC
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_;
#endif
std::recursive_mutex allocator_mutex_; std::recursive_mutex allocator_mutex_;
}; };
} // namespace ge } // namespace ge


+ 0
- 69
ge/graph/manager/host_mem_allocator.cc View File

@@ -1,69 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/manager/host_mem_allocator.h"
#include "framework/common/debug/ge_log.h"
#include "common/ge/ge_util.h"

namespace ge {
const void *HostMemAllocator::Malloc(const std::shared_ptr<AlignedPtr> &aligned_ptr, size_t size) {
if (aligned_ptr == nullptr) {
GELOGW("Insert a null aligned_ptr");
return nullptr;
}
GELOGD("allocate existed host memory succ, size=%zu", size);
allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr };
return aligned_ptr->Get();
}

uint8_t *HostMemAllocator::Malloc(size_t size) {
GELOGD("start to malloc host memory, size=%zu", size);
std::lock_guard<std::mutex> lock(mutex_);
std::shared_ptr<AlignedPtr> aligned_ptr = MakeShared<AlignedPtr>(size);
if (aligned_ptr == nullptr) {
GELOGE(INTERNAL_ERROR, "make shared_ptr for AlignedPtr failed");
return nullptr;
}
allocated_blocks_[aligned_ptr->Get()] = { size, aligned_ptr };
GELOGD("allocate host memory succ, size=%zu", size);
return aligned_ptr->MutableGet();
}

Status HostMemAllocator::Free(const void *memory_addr) {
if (memory_addr == nullptr) {
GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer");
return GE_GRAPH_FREE_FAILED;
}

std::lock_guard<std::mutex> lock(mutex_);
auto it = allocated_blocks_.find(memory_addr);
if (it == allocated_blocks_.end()) {
GELOGE(PARAM_INVALID, "Invalid memory pointer");
return PARAM_INVALID;
}
it->second.second.reset();
allocated_blocks_.erase(it);

return SUCCESS;
}

void HostMemAllocator::Clear() {
for (auto &block : allocated_blocks_) {
block.second.second.reset();
}
allocated_blocks_.clear();
}
} // namespace ge

+ 0
- 58
ge/graph/manager/host_mem_allocator.h View File

@@ -1,58 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_
#define GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_

#include <mutex>
#include <unordered_map>

#include "framework/common/ge_inner_error_codes.h"
#include "graph/aligned_ptr.h"
#include "runtime/mem.h"

namespace ge {
class HostMemAllocator {
public:
explicit HostMemAllocator(rtMemType_t memory_type) : memory_type_(memory_type) {}
~HostMemAllocator() = default;

HostMemAllocator(const HostMemAllocator &) = delete;
HostMemAllocator &operator=(const HostMemAllocator &) = delete;

Status Initialize() {
Clear();
return SUCCESS;
}
void Finalize() { Clear(); }

const void *Malloc(const std::shared_ptr<AlignedPtr>& aligned_ptr, size_t size);
uint8_t *Malloc(size_t size);
Status Free(const void *memory_addr);

std::pair<size_t, std::shared_ptr<AlignedPtr>> GetAlignedPtr(const void *addr) { return allocated_blocks_[addr]; }

private:
void Clear();

rtMemType_t memory_type_;
std::unordered_map<const void *, std::pair<size_t, std::shared_ptr<AlignedPtr>>> allocated_blocks_;
// lock around all operations
mutable std::mutex mutex_;
};
} // namespace ge

#endif // GE_GRAPH_MANAGER_HOST_MEM_ALLOCATOR_H_

+ 1
- 15
ge/graph/manager/host_mem_manager.cc View File

@@ -43,30 +43,16 @@ Status SharedMemAllocator::Allocate(SharedMemInfo &mem_info) {
return GE_GRAPH_MEMORY_ALLOC_FAILED; return GE_GRAPH_MEMORY_ALLOC_FAILED;
} }
mem_info.fd = output_para.fd; mem_info.fd = output_para.fd;
#ifndef ONLY_COMPILE_OPEN_SRC
mem_info.host_aligned_ptr = AlignedPtr::BuildFromAllocFunc(mem_info.mem_size,
[&output_para](std::unique_ptr<uint8_t[], deleter> &ptr) {
ptr.reset(reinterpret_cast<uint8_t *>(output_para.ptr));
},
[](uint8_t *ptr) {
ptr = nullptr;
}, 0);
#else
mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr); mem_info.host_address = reinterpret_cast<uint8_t *>(output_para.ptr);
#endif
mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr); mem_info.device_address = reinterpret_cast<uint8_t *>(output_para.devPtr);
return SUCCESS; return SUCCESS;
} }


Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) { Status SharedMemAllocator::DeAllocate(SharedMemInfo &mem_info) {
GELOGD("SharedMemAllocator::DeAllocate"); GELOGD("SharedMemAllocator::DeAllocate");
#ifndef ONLY_COMPILE_OPEN_SRC
rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
mem_info.host_aligned_ptr->MutableGet(), mem_info.device_address};
#else
rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd, rtFreeHostSharedMemoryIn free_para = {mem_info.shm_name.c_str(), mem_info.mem_size, mem_info.fd,
mem_info.host_address, mem_info.device_address}; mem_info.host_address, mem_info.device_address};
#endif

rtError_t rt_ret = rtFreeHostSharedMemory(&free_para); rtError_t rt_ret = rtFreeHostSharedMemory(&free_para);
if (rt_ret != RT_ERROR_NONE) { if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret); GELOGE(RT_FAILED, "Call rt api(rtFreeHostSharedMemory) failed, ret: 0x%X.", rt_ret);


+ 0
- 4
ge/graph/manager/host_mem_manager.h View File

@@ -42,11 +42,7 @@ struct SharedMemInfo {
uint64_t mem_size = 0; uint64_t mem_size = 0;
int fd = 0; int fd = 0;
uint8_t *device_address = nullptr; uint8_t *device_address = nullptr;
#ifndef ONLY_COMPILE_OPEN_SRC
std::shared_ptr<AlignedPtr> host_aligned_ptr = nullptr;
#else
uint8_t *host_address = nullptr; uint8_t *host_address = nullptr;
#endif
SharedMemInfo() = default; SharedMemInfo() = default;
SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {} SharedMemInfo(string name, uint64_t size) : op_name(std::move(name)), mem_size(size) {}
}; };


+ 10
- 127
ge/graph/passes/assign_pass.cc View File

@@ -9,143 +9,25 @@
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.l
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */


#include "graph/passes/assign_pass.h" #include "graph/passes/assign_pass.h"

#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h" #include "framework/common/debug/log.h"
#include "graph/utils/graph_utils.h" #include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h" #include "graph/debug/ge_attr_define.h"


namespace { namespace {
constexpr uint32_t kValidInputNodeOutputNum = 1;
constexpr int32_t kAssignRefInputIndex = 0;
constexpr int32_t kAssignValueInputIndex = 1;
static const std::set<std::string> kNoTaskNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
const uint32_t kValidInputNodeOutputNum = 1;
const int32_t kAssignRefInputIndex = 0;
const int32_t kAssignValueInputIndex = 1;
} }


namespace ge { namespace ge {
#ifndef ONLY_COMPILE_OPEN_SRC
Status AssignPass::Run(NodePtr &node) {
GELOGD("AssignPass running");

if (TransformAttr(node) != SUCCESS) {
GELOGE(FAILED, "Transform assign_var_name attr failed, node=%s", node->GetName().c_str());
return FAILED;
}

if (node->GetType() == ASSIGN) {
if (OptimizedAssignNode(node) != SUCCESS) {
GELOGE(FAILED, "Optimize for assign_node %s failed", node->GetName().c_str());
return FAILED;
}
}

GELOGD("AssignPass success");
return SUCCESS;
}

///
/// @brief Optimize for assign_node
/// @param [in] assign_node
/// @return Status
///
Status AssignPass::OptimizedAssignNode(NodePtr &assign_node) {
const auto &ref_in_anchor = assign_node->GetInDataAnchor(kAssignRefInputIndex);
const auto &value_in_anchor = assign_node->GetInDataAnchor(kAssignValueInputIndex);
if ((ref_in_anchor == nullptr) || (value_in_anchor == nullptr)) {
GELOGE(FAILED, "In data anchor is null, node:%s", assign_node->GetName().c_str());
return FAILED;
}
const auto &ref_peer_anchor = ref_in_anchor->GetPeerOutAnchor();
const auto &value_peer_anchor = value_in_anchor->GetPeerOutAnchor();
if ((ref_peer_anchor == nullptr) || (value_peer_anchor == nullptr)) {
GELOGE(FAILED, "Peer data anchor is null, node:%s", assign_node->GetName().c_str());
return FAILED;
}

if (IsCondMatch(assign_node, ref_peer_anchor, value_peer_anchor)) {
///
/// variable not-const not-const
/// \ / |
/// \ / |
/// Assign ----> variable
/// | |
/// | |
/// node node
///
GELOGD("Optimization for assign_node %s start", assign_node->GetName().c_str());
if (IsolateAndDeleteNode(assign_node, {kAssignRefInputIndex}) != SUCCESS) {
GELOGE(FAILED, "Isolate and delete assign_node %s failed.", assign_node->GetName().c_str());
return FAILED;
}

const auto &ref_input = ref_peer_anchor->GetOwnerNode()->GetOpDesc();
const auto &value_input = value_peer_anchor->GetOwnerNode()->GetOpDesc();
if ((ref_input == nullptr) || (value_input == nullptr)) {
GELOGE(FAILED, "value input is null");
return FAILED;
}

// variable has and only has one input
if (ref_input->UpdateInputDesc(0, value_input->GetOutputDesc(value_peer_anchor->GetIdx())) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Update input_desc for variable %s failed.", ref_input->GetName().c_str());
return FAILED;
}
if (GraphUtils::AddEdge(value_peer_anchor, ref_peer_anchor->GetOwnerNode()->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Add data edge %s->%s failed", value_input->GetName().c_str(), ref_input->GetName().c_str());
return FAILED;
}

GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s",
value_input->GetName().c_str(), ref_input->GetName().c_str());
if (!AttrUtils::SetStr(value_input->MutableOutputDesc(value_peer_anchor->GetIdx()), ASSIGN_VAR_NAME,
ref_input->GetName())) {
GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
return FAILED;
}
auto value_node = value_peer_anchor->GetOwnerNode();
AddRePassNode(value_node);
}
return SUCCESS;
}

///
/// @brief Transform assign_var_name attr
/// @param [in] node
/// @return Status
///
Status AssignPass::TransformAttr(NodePtr &node) {
GE_CHECK_NOTNULL(node->GetOpDesc());
for (const auto &output_desc : node->GetOpDesc()->GetAllOutputsDesc()) {
int32_t inplace_input_idx = -1;
std::string assign_var_name;
if (AttrUtils::GetInt(output_desc, INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx) &&
AttrUtils::GetStr(output_desc, ASSIGN_VAR_NAME, assign_var_name)) {
GELOGD("Transform attr ASSIGN_VAR_NAME on node %s, assign_var_name=%s, inplace_input_idx=%d, ",
node->GetName().c_str(), assign_var_name.c_str(), inplace_input_idx);
const auto &in_data_anchor = node->GetInDataAnchor(inplace_input_idx);
GE_CHECK_NOTNULL(in_data_anchor);
const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(peer_data_anchor);
auto in_node = peer_data_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node->GetOpDesc());
GELOGD("add attr ASSIGN_VAR_NAME on node %s, var_name=%s", in_node->GetName().c_str(), assign_var_name.c_str());
if (!AttrUtils::SetStr(in_node->GetOpDesc()->MutableOutputDesc(peer_data_anchor->GetIdx()),
ASSIGN_VAR_NAME, assign_var_name)) {
GELOGE(FAILED, "Set attr ASSIGN_VAR_NAME failed.");
return FAILED;
}
AddRePassNode(in_node);
}
}
return SUCCESS;
}
#else
Status AssignPass::Run(NodePtr &node) { Status AssignPass::Run(NodePtr &node) {
GELOGD("AssignPass running"); GELOGD("AssignPass running");
if (node->GetType() != ASSIGN) { if (node->GetType() != ASSIGN) {
@@ -209,7 +91,7 @@ Status AssignPass::Run(NodePtr &node) {
GELOGD("AssignPass success"); GELOGD("AssignPass success");
return SUCCESS; return SUCCESS;
} }
#endif
/// ///
/// @brief Check if need optimize for assign_node /// @brief Check if need optimize for assign_node
/// @param [in] assign_node /// @param [in] assign_node
@@ -223,8 +105,9 @@ bool AssignPass::IsCondMatch(const NodePtr &node, const OutDataAnchorPtr &ref_pe
node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(), node->GetName().c_str(), ref_peer_anchor->GetOwnerNode()->GetName().c_str(),
value_peer_anchor->GetOwnerNode()->GetName().c_str()); value_peer_anchor->GetOwnerNode()->GetName().c_str());


if (kNoTaskNodeTypes.count(value_peer_anchor->GetOwnerNode()->GetType()) > 0) {
GELOGD("value input is not calculate node");
const std::string &value_type = value_peer_anchor->GetOwnerNode()->GetType();
if ((value_type == CONSTANTOP) || (value_type == CONSTANT)) {
GELOGD("value input is const");
return false; return false;
} }




+ 0
- 15
ge/graph/passes/assign_pass.h View File

@@ -25,21 +25,6 @@ class AssignPass : public BaseNodePass {
Status Run(NodePtr &node) override; Status Run(NodePtr &node) override;


private: private:
#ifndef ONLY_COMPILE_OPEN_SRC
///
/// @brief Optimize for assign_node
/// @param [in] assign_node
/// @return Status
///
Status OptimizedAssignNode(NodePtr &assign_node);

///
/// @brief Transform assign_var_name attr
/// @param [in] node
/// @return Status
///
Status TransformAttr(NodePtr &node);
#endif
/// ///
/// @brief Check if need optimize for assign_node /// @brief Check if need optimize for assign_node
/// @param [in] assign_node /// @param [in] assign_node


+ 6
- 10
ge/graph/passes/constant_fuse_same_pass.cc View File

@@ -19,7 +19,13 @@
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>

#include "common/ge/ge_util.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/op_desc_utils.h" #include "graph/utils/op_desc_utils.h"
#include "graph/utils/type_utils.h" #include "graph/utils/type_utils.h"


@@ -115,21 +121,11 @@ void ConstantFuseSamePass::GetFuseConstNodes(ComputeGraphPtr &graph,
TypeUtils::DataTypeToSerialString(data_type).c_str()); TypeUtils::DataTypeToSerialString(data_type).c_str());
continue; continue;
} }
#ifndef ONLY_COMPILE_OPEN_SRC
if ((type_size != 0) && (weight->MutableData().GetAlignedPtr() == nullptr)) {
GELOGW("aligned_ptr is null while size is not 0");
continue;
}
#endif
++insert_const_nums; ++insert_const_nums;


SameConstKey map_key; SameConstKey map_key;
map_key.data_size = type_size; map_key.data_size = type_size;
#ifndef ONLY_COMPILE_OPEN_SRC
map_key.aligned_ptr = weight->MutableData().GetAlignedPtr();
#else
map_key.data = weight->GetData().GetData(); map_key.data = weight->GetData().GetData();
#endif
map_key.data_type = data_type; map_key.data_type = data_type;
map_key.format = output_tensor->GetFormat(); map_key.format = output_tensor->GetFormat();
map_key.shape = output_tensor->GetShape().GetDims(); map_key.shape = output_tensor->GetShape().GetDims();


+ 1
- 16
ge/graph/passes/constant_fuse_same_pass.h View File

@@ -21,20 +21,14 @@
#include <set> #include <set>
#include <utility> #include <utility>
#include <vector> #include <vector>
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/aligned_ptr.h"
#endif

#include "graph/types.h" #include "graph/types.h"
#include "inc/graph_pass.h" #include "inc/graph_pass.h"


namespace ge { namespace ge {
struct SameConstKey { struct SameConstKey {
int data_size; int data_size;
#ifndef ONLY_COMPILE_OPEN_SRC
std::shared_ptr<AlignedPtr> aligned_ptr;
#else
const uint8_t *data; const uint8_t *data;
#endif
DataType data_type; DataType data_type;
Format format; Format format;
std::vector<int64_t> shape; std::vector<int64_t> shape;
@@ -44,19 +38,10 @@ struct SameConstKey {
if (data_size != key.data_size) { if (data_size != key.data_size) {
return data_size < key.data_size; return data_size < key.data_size;
} }
#ifndef ONLY_COMPILE_OPEN_SRC
if (data_size != 0) {
int ret = memcmp(aligned_ptr->Get(), key.aligned_ptr->Get(), data_size);
if (ret != 0) {
return ret < 0;
}
}
#else
int ret = memcmp(data, key.data, data_size); int ret = memcmp(data, key.data, data_size);
if (ret != 0) { if (ret != 0) {
return ret < 0; return ret < 0;
} }
#endif
if (data_type != key.data_type) { if (data_type != key.data_type) {
return data_type < key.data_type; return data_type < key.data_type;
} }


+ 0
- 83
ge/graph/passes/inplace_support_check_pass.cc View File

@@ -1,83 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/passes/inplace_support_check_pass.h"
#include "framework/common/debug/log.h"
#include "graph/utils/graph_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace {
constexpr uint32_t kInplaceSupportOutputIndex = 0;
constexpr uint32_t kInplaceSupportOutputNum = 1;
static const std::set<std::string> kSrcNodeTypes = { ge::DATA, ge::ANN_DATA, ge::AIPPDATA,
ge::CONSTANT, ge::CONSTANTOP,
ge::VARIABLE, ge::VARIABLEV2 };
}

namespace ge {
Status InplaceSupportCheckPass::Run(NodePtr &node) {
GELOGD("InplaceSupportCheckPass running");
if (node->GetAllOutDataAnchorsSize() != kInplaceSupportOutputNum) {
GELOGD("output num of node %s is not %u, skip InplaceSupportCheckPass",
node->GetName().c_str(), kInplaceSupportOutputNum);
return SUCCESS;
}
GE_CHECK_NOTNULL(node->GetOpDesc());
const DataType &output_type = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetDataType();
const GeShape &output_shape = node->GetOpDesc()->GetOutputDesc(kInplaceSupportOutputIndex).GetShape();
GELOGD("process InplaceSupportCheckPass on node %s", node->GetName().c_str());
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
const auto &peer_data_anchor = in_data_anchor->GetPeerOutAnchor();
if (peer_data_anchor == nullptr) {
continue;
}
auto in_node = peer_data_anchor->GetOwnerNode();
if (kSrcNodeTypes.count(in_node->GetType()) > 0) {
GELOGD("meet src_node %s", in_node->GetName().c_str());
continue;
}
if (peer_data_anchor->GetPeerInDataNodesSize() != kInplaceSupportOutputNum) {
GELOGD("peer_data_anchor links with multi in_data_anchors");
continue;
}

int32_t inplace_input_idx = in_data_anchor->GetIdx();
const DataType &input_type = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetDataType();
const GeShape &input_shape = node->GetOpDesc()->GetInputDesc(inplace_input_idx).GetShape();
if (input_type != output_type) {
GELOGW("DataType mismatch, in_idx=%d, input_type=%u, output_type=%u", inplace_input_idx, input_type, output_type);
continue;
}
if (input_shape.GetDims() != output_shape.GetDims()) {
GELOGW("Shape mismatch, in_idx=%d, input_shape=[%s], output_shape=[%s]",
inplace_input_idx, input_shape.ToString().c_str(), output_shape.ToString().c_str());
continue;
}

GELOGD("add attr INPLACE_SUPPORT_INPUT_INDEX on node %s, input_idx=%d", node->GetName().c_str(), inplace_input_idx);
if (!AttrUtils::SetInt(node->GetOpDesc()->MutableOutputDesc(kInplaceSupportOutputIndex),
INPLACE_SUPPORT_INPUT_INDEX, inplace_input_idx)) {
GELOGE(FAILED, "Set attr INPLACE_SUPPORT_INPUT_INDEX on node %s failed.", node->GetName().c_str());
return FAILED;
}
AddRePassNode(node);
break;
}

GELOGD("InplaceSupportCheckPass success");
return SUCCESS;
}
} // namespace ge

+ 0
- 28
ge/graph/passes/inplace_support_check_pass.h View File

@@ -1,28 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_
#define GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_

#include "graph/passes/base_pass.h"

namespace ge {
class InplaceSupportCheckPass : public BaseNodePass {
public:
Status Run(NodePtr &node) override;
};
} // namespace ge
#endif // GE_GRAPH_PASSES_INPLACE_SUPPORT_CHECK_PASS_H_

+ 1
- 1
ge/graph/passes/switch_to_stream_switch_pass.cc View File

@@ -598,7 +598,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons
/// ///
Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node,
const std::set<NodePtr> &same_cond_switch) { const std::set<NodePtr> &same_cond_switch) {
GELOGD("ModifySwitchInCtlEdges: switch_node=%s, cast_node=%s", switch_node->GetName().c_str(),
GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
cast_node->GetName().c_str()); cast_node->GetName().c_str());
std::string orig_switch_name = switch_node->GetName(); std::string orig_switch_name = switch_node->GetName();
OpDescPtr switch_desc = switch_node->GetOpDesc(); OpDescPtr switch_desc = switch_node->GetOpDesc();


+ 1
- 6
ge/graph/preprocess/graph_preprocess.cc View File

@@ -19,6 +19,7 @@
#include <set> #include <set>
#include <string> #include <string>
#include "common/formats/format_transfers/format_transfer_fractal_nz.h" #include "common/formats/format_transfers/format_transfer_fractal_nz.h"
#include "common/formats/format_transfers/format_transfer_fractal_z.h"
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
#include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h" #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h"
#include "common/formats/format_transfers/format_transfer_transpose.h" #include "common/formats/format_transfers/format_transfer_transpose.h"
@@ -37,9 +38,7 @@
#include "graph/passes/addn_pass.h" #include "graph/passes/addn_pass.h"
#include "graph/passes/aicpu_constant_folding_pass.h" #include "graph/passes/aicpu_constant_folding_pass.h"
#include "graph/passes/assert_pass.h" #include "graph/passes/assert_pass.h"
#ifdef ONLY_COMPILE_OPEN_SRC
#include "graph/passes/assign_pass.h" #include "graph/passes/assign_pass.h"
#endif
#include "graph/passes/common_subexpression_elimination_pass.h" #include "graph/passes/common_subexpression_elimination_pass.h"
#include "graph/passes/cond_pass.h" #include "graph/passes/cond_pass.h"
#include "graph/passes/cond_remove_pass.h" #include "graph/passes/cond_remove_pass.h"
@@ -1700,9 +1699,7 @@ Status GraphPrepare::PrepareOptimize() {
VarIsInitializedOpPass var_is_initialized_pass; VarIsInitializedOpPass var_is_initialized_pass;
ParallelConcatStartOpPass parallel_concat_start_op_pass; ParallelConcatStartOpPass parallel_concat_start_op_pass;
IdentityPass identity_pass(false); IdentityPass identity_pass(false);
#ifdef ONLY_COMPILE_OPEN_SRC
AssignPass assign_pass; AssignPass assign_pass;
#endif
SnapshotPass snapshot_pass; SnapshotPass snapshot_pass;
if (!options_.train_graph_flag) { if (!options_.train_graph_flag) {
names_to_passes.emplace_back("DropOutPass", &dropout_pass); names_to_passes.emplace_back("DropOutPass", &dropout_pass);
@@ -1717,11 +1714,9 @@ Status GraphPrepare::PrepareOptimize() {
names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass); names_to_passes.emplace_back("VarIsInitializedOpPass", &var_is_initialized_pass);
names_to_passes.emplace_back("ParallelConcatStartOpPass", &parallel_concat_start_op_pass); names_to_passes.emplace_back("ParallelConcatStartOpPass", &parallel_concat_start_op_pass);
names_to_passes.emplace_back("IdentityPass", &identity_pass); names_to_passes.emplace_back("IdentityPass", &identity_pass);
#ifdef ONLY_COMPILE_OPEN_SRC
if (GetContext().GetHostExecFlag()) { if (GetContext().GetHostExecFlag()) {
names_to_passes.emplace_back("AssignPass", &assign_pass); names_to_passes.emplace_back("AssignPass", &assign_pass);
} }
#endif
GE_TIMESTAMP_START(names_to_passes); GE_TIMESTAMP_START(names_to_passes);
ret = ge_passes.Run(names_to_passes); ret = ge_passes.Run(names_to_passes);
GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses"); GE_TIMESTAMP_END(names_to_passes, "GraphPrepare::NamesToPasses");


+ 0
- 11
ge/hybrid/common/npu_memory_allocator.cc View File

@@ -20,9 +20,6 @@
#include "graph/manager/graph_caching_allocator.h" #include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/graph_mem_allocator.h" #include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/rdma_pool_allocator.h" #include "graph/manager/rdma_pool_allocator.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/host_mem_allocator.h"
#endif


namespace ge { namespace ge {
namespace hybrid { namespace hybrid {
@@ -67,11 +64,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
if (mem_type == RDMA_HBM) { if (mem_type == RDMA_HBM) {
buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_); buffer = MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Malloc(allocate_size, device_id_);
} else if (mem_type == HOST_DDR) { } else if (mem_type == HOST_DDR) {
#ifndef ONLY_COMPILE_OPEN_SRC
buffer = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(allocate_size);
#else
buffer = malloc(allocate_size); buffer = malloc(allocate_size);
#endif
} else { } else {
if (allocate_size > kMaxHbmMemorySize) { if (allocate_size > kMaxHbmMemorySize) {
GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size); GELOGE(PARAM_INVALID, "Invalid HBM memory size: %zu", allocate_size);
@@ -108,11 +101,7 @@ void NpuMemoryAllocator::Deallocate(void *data, MemStorageType mem_type) {
if (mem_type == RDMA_HBM) { if (mem_type == RDMA_HBM) {
MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
} else if (mem_type == HOST_DDR) { } else if (mem_type == HOST_DDR) {
#ifndef ONLY_COMPILE_OPEN_SRC
MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Free(data);
#else
free(data); free(data);
#endif
} else { } else {
MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_); MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(reinterpret_cast<uint8_t *>(data), device_id_);
} }


+ 2
- 31
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -25,13 +25,11 @@
#include "graph/manager/graph_var_manager.h" #include "graph/manager/graph_var_manager.h"
#include "graph/manager/host_mem_manager.h" #include "graph/manager/host_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h" #include "graph/manager/trans_var_data_utils.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#endif
#include "graph/utils/graph_utils.h" #include "graph/utils/graph_utils.h"
#include "hybrid/common/npu_memory_allocator.h" #include "hybrid/common/npu_memory_allocator.h"
#include "hybrid/node_executor/node_executor.h" #include "hybrid/node_executor/node_executor.h"
#include "framework/common/debug/ge_log.h"
#include "graph/utils/attr_utils.h"


namespace ge { namespace ge {
namespace hybrid { namespace hybrid {
@@ -854,24 +852,9 @@ Status HybridModelBuilder::InitConstantOps() {


std::unique_ptr<TensorValue> var_tensor; std::unique_ptr<TensorValue> var_tensor;
if (GetContext().GetHostExecFlag()) { if (GetContext().GetHostExecFlag()) {
#ifndef ONLY_COMPILE_OPEN_SRC
GE_CHECK_NOTNULL(ge_tensor);
// Address for eigen kernel should be aligned with 16 bytes
// Tensors return by api GetWeights share data with proto, whose addr is not confirmed to be aligned
GeTensor aligned_tensor = ge_tensor->Clone();
GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize());
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(),
aligned_tensor.GetData().size()) == nullptr) {
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
return MEMALLOC_FAILED;
}
var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(),
aligned_tensor.GetData().size()));
#else
auto buffer = ge_tensor->MutableData(); auto buffer = ge_tensor->MutableData();
GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize()); GELOGD("Init tensor with host constant. size = %zu", buffer.GetSize());
var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize())); var_tensor.reset(new(std::nothrow)TensorValue(buffer.GetData(), buffer.GetSize()));
#endif
} else { } else {
GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor)); GE_CHK_STATUS_RET_NOLOG(VarNodeToTensor(var_node, var_tensor));
GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize()); GELOGD("Init const op tensor. name = %s, size = %ld", var_name.c_str(), var_tensor->GetSize());
@@ -926,21 +909,9 @@ Status HybridModelBuilder::InitVariableTensors() {
GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str()); GELOGE(GE_GRAPH_MALLOC_FAILED, "Host variable [%s] malloc failed.", it.first.c_str());
return GE_GRAPH_MALLOC_FAILED; return GE_GRAPH_MALLOC_FAILED;
} }
#ifndef ONLY_COMPILE_OPEN_SRC
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
tensor_size) == nullptr) {
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed.");
return MEMALLOC_FAILED;
}
GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size);

std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(),
tensor_size));
#else
GELOGD("Host variable [%s] malloc success.", it.first.c_str()); GELOGD("Host variable [%s] malloc success.", it.first.c_str());


std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size)); std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_address, tensor_size));
#endif
GE_CHECK_NOTNULL(tensor); GE_CHECK_NOTNULL(tensor);
hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor)); hybrid_model_.variable_tensors_.emplace(it.first, std::move(tensor));
} }


+ 2
- 20
ge/hybrid/node_executor/host_cpu/host_cpu_node_executor.cc View File

@@ -18,10 +18,6 @@
#include "hybrid/node_executor/host_cpu/kernel_factory.h" #include "hybrid/node_executor/host_cpu/kernel_factory.h"
#include "graph/passes/folding_pass.h" #include "graph/passes/folding_pass.h"
#include "hybrid/model/hybrid_model.h" #include "hybrid/model/hybrid_model.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/host_mem_allocator.h"
#endif
#include "ge_local_engine/engine/host_cpu_engine.h" #include "ge_local_engine/engine/host_cpu_engine.h"


namespace ge { namespace ge {
@@ -54,23 +50,15 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
auto input_desc_ptr = context.GetInputDesc(i); auto input_desc_ptr = context.GetInputDesc(i);
GE_CHECK_NOTNULL(input_desc_ptr); GE_CHECK_NOTNULL(input_desc_ptr);
const auto &input_desc = *input_desc_ptr; const auto &input_desc = *input_desc_ptr;
#ifndef ONLY_COMPILE_OPEN_SRC
auto tensor = context.GetInput(i);
GE_CHECK_NOTNULL(tensor);
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second);
auto in_tensor = MakeShared<GeTensor>(input_desc, item.second, item.first);
#else
GE_CHECK_NOTNULL(context.GetInput(i)); GE_CHECK_NOTNULL(context.GetInput(i));
auto in_tensor = MakeShared<GeTensor>(input_desc, auto in_tensor = MakeShared<GeTensor>(input_desc,
reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()), reinterpret_cast<const uint8_t *>(context.GetInput(i)->GetData()),
context.GetInput(i)->GetSize()); context.GetInput(i)->GetSize());
#endif
GE_CHECK_NOTNULL(in_tensor); GE_CHECK_NOTNULL(in_tensor);
in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType()); in_tensor->MutableTensorDesc().SetDataType(input_desc.GetDataType());
in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape()); in_tensor->MutableTensorDesc().SetShape(input_desc.GetShape());
inputs.emplace_back(in_tensor); inputs.emplace_back(in_tensor);
GELOGD("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size());
GELOGI("node:%s allocate input %d, size=%zu", op_desc->GetName().c_str(), i, in_tensor->GetData().size());
} }


std::vector<GeTensorPtr> outputs; std::vector<GeTensorPtr> outputs;
@@ -84,20 +72,14 @@ Status CpuKernelNodeTask::Execute(TaskContext &context) {
} }
auto tensor = context.GetOutput(i); auto tensor = context.GetOutput(i);
GE_CHECK_NOTNULL(tensor); GE_CHECK_NOTNULL(tensor);
#ifndef ONLY_COMPILE_OPEN_SRC
auto item = MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).GetAlignedPtr(tensor->GetData());
GE_CHECK_NOTNULL(item.second);
auto out_tensor = MakeShared<GeTensor>(output_desc, item.second, item.first);
#else
auto out_tensor = MakeShared<GeTensor>(output_desc, auto out_tensor = MakeShared<GeTensor>(output_desc,
reinterpret_cast<const uint8_t *>(tensor->GetData()), reinterpret_cast<const uint8_t *>(tensor->GetData()),
tensor->GetSize()); tensor->GetSize());
#endif
GE_CHECK_NOTNULL(out_tensor); GE_CHECK_NOTNULL(out_tensor);
out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType()); out_tensor->MutableTensorDesc().SetDataType(output_desc.GetDataType());
out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape()); out_tensor->MutableTensorDesc().SetShape(output_desc.GetShape());
outputs.emplace_back(out_tensor); outputs.emplace_back(out_tensor);
GELOGD("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size());
GELOGI("node:%s allocate output %d, size=%zu", op_desc->GetName().c_str(), i, out_tensor->GetData().size());
} }


return HostCpuEngine::GetInstance().Run(node_, inputs, outputs); return HostCpuEngine::GetInstance().Run(node_, inputs, outputs);


+ 0
- 4
tests/ut/ge/CMakeLists.txt View File

@@ -704,7 +704,6 @@ add_library(ge_ut_common STATIC ${COMMON_SRC_FILES} ${PROTO_SRCS} ${PROTO_HDRS})


target_compile_definitions(ge_ut_common PRIVATE target_compile_definitions(ge_ut_common PRIVATE
google=ascend_private google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_link_libraries(ge_ut_common PRIVATE target_link_libraries(ge_ut_common PRIVATE
@@ -719,7 +718,6 @@ add_library(ge_ut_common_format STATIC ${COMMON_SRC_FILES} ${COMMON_FORMAT_SRC_F


target_compile_definitions(ge_ut_common_format PRIVATE target_compile_definitions(ge_ut_common_format PRIVATE
google=ascend_private google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_link_libraries(ge_ut_common_format PRIVATE target_link_libraries(ge_ut_common_format PRIVATE
@@ -776,7 +774,6 @@ add_library(ge_load_common STATIC ${GRAPH_LOAD_COMMON_SRC_FILES} ${PROTO_SRCS} $


target_compile_definitions(ge_load_common PRIVATE target_compile_definitions(ge_load_common PRIVATE
google=ascend_private google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_link_libraries(ge_load_common PRIVATE target_link_libraries(ge_load_common PRIVATE
@@ -791,7 +788,6 @@ add_library(ge_execute_common STATIC ${GRAPH_EXECUTE_COMMON_SRC_FILES} ${PROTO_S


target_compile_definitions(ge_execute_common PRIVATE target_compile_definitions(ge_execute_common PRIVATE
google=ascend_private google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
) )


target_link_libraries(ge_execute_common PRIVATE target_link_libraries(ge_execute_common PRIVATE


Loading…
Cancel
Save